diff options
Diffstat (limited to 'lib')
443 files changed, 23478 insertions, 20095 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 371dcaf..503fbbd 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -233,10 +233,12 @@ bool llvm::isNoAliasCall(const Value *V) { /// NoAlias returns /// bool llvm::isIdentifiedObject(const Value *V) { - if (isa<AllocaInst>(V) || isNoAliasCall(V)) + if (isa<AllocaInst>(V)) return true; if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V)) return true; + if (isNoAliasCall(V)) + return true; if (const Argument *A = dyn_cast<Argument>(V)) return A->hasNoAliasAttr() || A->hasByValAttr(); return false; diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index bfa3ff1..37ee9fc 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Target/TargetData.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp index 88c2875..bc2d9c55 100644 --- a/lib/Analysis/AliasDebugger.cpp +++ b/lib/Analysis/AliasDebugger.cpp @@ -45,8 +45,12 @@ namespace { InitializeAliasAnalysis(this); // set up super class for(Module::global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) + E = M.global_end(); I != E; ++I) { Vals.insert(&*I); + for (User::const_op_iterator OI = I->op_begin(), + OE = I->op_end(); OI != OE; ++OI) + Vals.insert(*OI); + } for(Module::iterator I = M.begin(), E = M.end(); I != E; ++I){ @@ -58,8 +62,12 @@ namespace { for (Function::const_iterator FI = I->begin(), FE = I->end(); FI != FE; ++FI) for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) + BI != BE; ++BI) { Vals.insert(&*BI); + for (User::const_op_iterator OI = BI->op_begin(), + OE = BI->op_end(); OI != OE; ++OI) + Vals.insert(*OI); + } } } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index cfe7a1c..4f53a6d 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -78,6 +78,20 @@ static bool isNonEscapingLocalObject(const Value *V) { return false; } +/// isEscapeSource - Return true if the pointer is one which would have +/// been considered an escape by isNonEscapingLocalObject. +static bool isEscapeSource(const Value *V) { + if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V)) + return true; + + // The load case works because isNonEscapingLocalObject considers all + // stores to be escapes (it passes true for the StoreCaptures argument + // to PointerMayBeCaptured). + if (isa<LoadInst>(V)) + return true; + + return false; +} /// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. @@ -94,7 +108,7 @@ static bool isObjectSmallerThan(const Value *V, unsigned Size, } else if (const CallInst* CI = extractMallocCall(V)) { if (!isArrayMalloc(V, &TD)) // The size is the argument to the malloc call. - if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getOperand(1))) + if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0))) return (C->getZExtValue() < Size); return false; } else if (const Argument *A = dyn_cast<Argument>(V)) { @@ -177,9 +191,29 @@ static RegisterAnalysisGroup<AliasAnalysis> V(U); ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } //===----------------------------------------------------------------------===// -// BasicAA Pass +// BasicAliasAnalysis Pass //===----------------------------------------------------------------------===// +#ifndef NDEBUG +static const Function *getParent(const Value *V) { + if (const Instruction *inst = dyn_cast<Instruction>(V)) + return inst->getParent()->getParent(); + + if (const Argument *arg = dyn_cast<Argument>(V)) + return arg->getParent(); + + return NULL; +} + +static bool notDifferentParent(const Value *O1, const Value *O2) { + + const Function *F1 = getParent(O1); + const Function *F2 = getParent(O2); + + return !F1 || !F2 || F1 == F2; +} +#endif + namespace { /// BasicAliasAnalysis - This is the default alias analysis implementation. /// Because it doesn't chain to a previous alias analysis (like -no-aa), it @@ -187,11 +221,14 @@ namespace { struct BasicAliasAnalysis : public NoAA { static char ID; // Class identification, replacement for typeinfo BasicAliasAnalysis() : NoAA(&ID) {} + AliasResult alias(const Value *V1, unsigned V1Size, const Value *V2, unsigned V2Size) { - assert(VisitedPHIs.empty() && "VisitedPHIs must be cleared after use!"); + assert(Visited.empty() && "Visited must be cleared after use!"); + assert(notDifferentParent(V1, V2) && + "BasicAliasAnalysis doesn't support interprocedural queries."); AliasResult Alias = aliasCheck(V1, V1Size, V2, V2Size); - VisitedPHIs.clear(); + Visited.clear(); return Alias; } @@ -213,8 +250,8 @@ namespace { } private: - // VisitedPHIs - Track PHI nodes visited by a aliasCheck() call. - SmallPtrSet<const Value*, 16> VisitedPHIs; + // Visited - Track instructions visited by a aliasPHI, aliasSelect(), and aliasGEP(). + SmallPtrSet<const Value*, 16> Visited; // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP // instruction against another. @@ -268,6 +305,9 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) { /// simple "address taken" analysis on local objects. AliasAnalysis::ModRefResult BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { + assert(notDifferentParent(CS.getInstruction(), P) && + "AliasAnalysis query involving multiple functions!"); + const Value *Object = P->getUnderlyingObject(); // If this is a tail call and P points to a stack location, we know that @@ -318,10 +358,10 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { case Intrinsic::memcpy: case Intrinsic::memmove: { unsigned Len = ~0U; - if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) Len = LenCI->getZExtValue(); - Value *Dest = II->getOperand(1); - Value *Src = II->getOperand(2); + Value *Dest = II->getArgOperand(0); + Value *Src = II->getArgOperand(1); if (isNoAlias(Dest, Len, P, Size)) { if (isNoAlias(Src, Len, P, Size)) return NoModRef; @@ -332,9 +372,9 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { case Intrinsic::memset: // Since memset is 'accesses arguments' only, the AliasAnalysis base class // will handle it for the variable length case. - if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) { + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) { unsigned Len = LenCI->getZExtValue(); - Value *Dest = II->getOperand(1); + Value *Dest = II->getArgOperand(0); if (isNoAlias(Dest, Len, P, Size)) return NoModRef; } @@ -352,7 +392,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { case Intrinsic::atomic_load_umax: case Intrinsic::atomic_load_umin: if (TD) { - Value *Op1 = II->getOperand(1); + Value *Op1 = II->getArgOperand(0); unsigned Op1Size = TD->getTypeStoreSize(Op1->getType()); if (isNoAlias(Op1, Op1Size, P, Size)) return NoModRef; @@ -361,14 +401,14 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: { - unsigned PtrSize = cast<ConstantInt>(II->getOperand(1))->getZExtValue(); - if (isNoAlias(II->getOperand(2), PtrSize, P, Size)) + unsigned PtrSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); + if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size)) return NoModRef; break; } case Intrinsic::invariant_end: { - unsigned PtrSize = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); - if (isNoAlias(II->getOperand(3), PtrSize, P, Size)) + unsigned PtrSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); + if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size)) return NoModRef; break; } @@ -440,6 +480,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, const Value *V2, unsigned V2Size, const Value *UnderlyingV1, const Value *UnderlyingV2) { + // If this GEP has been visited before, we're on a use-def cycle. + // Such cycles are only valid when PHI nodes are involved or in unreachable + // code. The visitPHI function catches cycles containing PHIs, but there + // could still be a cycle without PHIs in unreachable code. + if (!Visited.insert(GEP1)) + return MayAlias; + int64_t GEP1BaseOffset; SmallVector<std::pair<const Value*, int64_t>, 4> GEP1VariableIndices; @@ -550,6 +597,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, AliasAnalysis::AliasResult BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize, const Value *V2, unsigned V2Size) { + // If this select has been visited before, we're on a use-def cycle. + // Such cycles are only valid when PHI nodes are involved or in unreachable + // code. The visitPHI function catches cycles containing PHIs, but there + // could still be a cycle without PHIs in unreachable code. + if (!Visited.insert(SI)) + return MayAlias; + // If the values are Selects with the same condition, we can do a more precise // check: just check for aliases between the values on corresponding arms. if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) @@ -570,11 +624,17 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize, // If both arms of the Select node NoAlias or MustAlias V2, then returns // NoAlias / MustAlias. Otherwise, returns MayAlias. AliasResult Alias = - aliasCheck(SI->getTrueValue(), SISize, V2, V2Size); + aliasCheck(V2, V2Size, SI->getTrueValue(), SISize); if (Alias == MayAlias) return MayAlias; + + // If V2 is visited, the recursive case will have been caught in the + // above aliasCheck call, so these subsequent calls to aliasCheck + // don't need to assume that V2 is being visited recursively. + Visited.erase(V2); + AliasResult ThisAlias = - aliasCheck(SI->getFalseValue(), SISize, V2, V2Size); + aliasCheck(V2, V2Size, SI->getFalseValue(), SISize); if (ThisAlias != Alias) return MayAlias; return Alias; @@ -586,7 +646,7 @@ AliasAnalysis::AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, const Value *V2, unsigned V2Size) { // The PHI node has already been visited, avoid recursion any further. - if (!VisitedPHIs.insert(PN)) + if (!Visited.insert(PN)) return MayAlias; // If the values are PHIs in the same block, we can do a more precise @@ -636,10 +696,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { Value *V = V1Srcs[i]; - // If V2 is a PHI, the recursive case will have been caught in the + // If V2 is visited, the recursive case will have been caught in the // above aliasCheck call, so these subsequent calls to aliasCheck // don't need to assume that V2 is being visited recursively. - VisitedPHIs.erase(V2); + Visited.erase(V2); AliasResult ThisAlias = aliasCheck(V2, V2Size, V, PNSize); if (ThisAlias != Alias || ThisAlias == MayAlias) @@ -693,17 +753,32 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1))) return NoAlias; - // Arguments can't alias with local allocations or noalias calls. - if ((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) || - (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1)))) + // Arguments can't alias with local allocations or noalias calls + // in the same function. + if (((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) || + (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1))))) return NoAlias; // Most objects can't alias null. - if ((isa<ConstantPointerNull>(V2) && isKnownNonNull(O1)) || - (isa<ConstantPointerNull>(V1) && isKnownNonNull(O2))) + if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) || + (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2))) return NoAlias; - } + // If one pointer is the result of a call/invoke or load and the other is a + // non-escaping local object within the same function, then we know the + // object couldn't escape to a point where the call could return it. + // + // Note that if the pointers are in different functions, there are a + // variety of complications. A call with a nocapture argument may still + // temporary store the nocapture argument's value in a temporary memory + // location if that memory location doesn't escape. Or it may pass a + // nocapture value to other functions as long as they don't capture it. + if (isEscapeSource(O1) && isNonEscapingLocalObject(O2)) + return NoAlias; + if (isEscapeSource(O2) && isNonEscapingLocalObject(O1)) + return NoAlias; + } + // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. if (TD) @@ -711,22 +786,6 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, *TD))) return NoAlias; - // If one pointer is the result of a call/invoke or load and the other is a - // non-escaping local object, then we know the object couldn't escape to a - // point where the call could return it. The load case works because - // isNonEscapingLocalObject considers all stores to be escapes (it - // passes true for the StoreCaptures argument to PointerMayBeCaptured). - if (O1 != O2) { - if ((isa<CallInst>(O1) || isa<InvokeInst>(O1) || isa<LoadInst>(O1) || - isa<Argument>(O1)) && - isNonEscapingLocalObject(O2)) - return NoAlias; - if ((isa<CallInst>(O2) || isa<InvokeInst>(O2) || isa<LoadInst>(O2) || - isa<Argument>(O2)) && - isNonEscapingLocalObject(O1)) - return NoAlias; - } - // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the // GEP can't simplify, we don't even look at the PHI cases. if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) { diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 5a37ce0..d9b670d 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_library(LLVMAnalysis LibCallSemantics.cpp Lint.cpp LiveValues.cpp + Loads.cpp LoopDependenceAnalysis.cpp LoopInfo.cpp LoopPass.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 37cda02..13d8f4d 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -208,7 +208,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, i != e; ++i, ++GTI) { ConstantInt *CI = dyn_cast<ConstantInt>(*i); if (!CI) return false; // Index isn't a simple constant? - if (CI->getZExtValue() == 0) continue; // Not adding anything. + if (CI->isZero()) continue; // Not adding anything. if (const StructType *ST = dyn_cast<StructType>(*GTI)) { // N = N + Offset @@ -436,8 +436,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, unsigned StrLen = Str.length(); const Type *Ty = cast<PointerType>(CE->getType())->getElementType(); unsigned NumBits = Ty->getPrimitiveSizeInBits(); - // Replace LI with immediate integer store. - if ((NumBits >> 3) == StrLen + 1) { + // Replace load with immediate integer if the result is an integer or fp + // value. + if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 && + (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) { APInt StrVal(NumBits, 0); APInt SingleChar(NumBits, 0); if (TD->isLittleEndian()) { @@ -454,7 +456,11 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, SingleChar = 0; StrVal = (StrVal << 8) | SingleChar; } - return ConstantInt::get(CE->getContext(), StrVal); + + Constant *Res = ConstantInt::get(CE->getContext(), StrVal); + if (Ty->isFloatingPointTy()) + Res = ConstantExpr::getBitCast(Res, Ty); + return Res; } } @@ -772,9 +778,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, case Instruction::ICmp: case Instruction::FCmp: assert(0 && "Invalid for compares"); case Instruction::Call: - if (Function *F = dyn_cast<Function>(Ops[0])) + if (Function *F = dyn_cast<Function>(Ops[CallInst::ArgOffset ? 0:NumOps-1])) if (canConstantFoldCallTo(F)) - return ConstantFoldCall(F, Ops+1, NumOps-1); + return ConstantFoldCall(F, Ops+CallInst::ArgOffset, NumOps-1); return 0; case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index a7b6d2b..c8d0d22 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -73,6 +73,15 @@ GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { return 0; } +Function *DIDescriptor::getFunctionField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt)); + return 0; +} + unsigned DIVariable::getNumAddrElements() const { return DbgNode->getNumOperands()-6; } @@ -397,6 +406,8 @@ bool DIVariable::isInlinedFnArgument(const Function *CurFn) { /// information for the function F. bool DISubprogram::describes(const Function *F) { assert(F && "Invalid function"); + if (F == getFunction()) + return true; StringRef Name = getLinkageName(); if (Name.empty()) Name = getName(); @@ -938,7 +949,8 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, unsigned VK, unsigned VIndex, DIType ContainingType, bool isArtificial, - bool isOptimized) { + bool isOptimized, + Function *Fn) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subprogram), @@ -956,9 +968,15 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), ContainingType, ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial), - ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized) + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + Fn }; - return DISubprogram(MDNode::get(VMContext, &Elts[0], 16)); + MDNode *Node = MDNode::get(VMContext, &Elts[0], 17); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(Node); + return DISubprogram(Node); } /// CreateSubprogramDefinition - Create new subprogram descriptor for the @@ -984,9 +1002,15 @@ DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) DeclNode->getOperand(12), // VIndex DeclNode->getOperand(13), // Containting Type DeclNode->getOperand(14), // isArtificial - DeclNode->getOperand(15) // isOptimized + DeclNode->getOperand(15), // isOptimized + SPDeclaration.getFunction() }; - return DISubprogram(MDNode::get(VMContext, &Elts[0], 16)); + MDNode *Node =MDNode::get(VMContext, &Elts[0], 16); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(Node); + return DISubprogram(Node); } /// CreateGlobalVariable - Create a new descriptor for the specified global. @@ -1042,8 +1066,18 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, // The optimizer may remove local variable. If there is an interest // to preserve variable info in such situation then stash it in a // named mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.lv"); - NMD->addOperand(Node); + DISubprogram Fn(getDISubprogram(Context)); + StringRef FName = "fn"; + if (Fn.getFunction()) + FName = Fn.getFunction()->getName(); + char One = '\1'; + if (FName.startswith(StringRef(&One, 1))) + FName = FName.substr(1); + NamedMDNode *FnLocals = M.getNamedMetadata(Twine("llvm.dbg.lv.", FName)); + if (!FnLocals) + FnLocals = NamedMDNode::Create(VMContext, Twine("llvm.dbg.lv.", FName), + NULL, 0, &M); + FnLocals->addOperand(Node); } return DIVariable(Node); } @@ -1110,18 +1144,6 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, return DILocation(MDNode::get(VMContext, &Elts[0], 4)); } -/// CreateLocation - Creates a debug info location. -DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, - DIScope S, MDNode *OrigLoc) { - Value *Elts[] = { - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo), - S, - OrigLoc - }; - return DILocation(MDNode::get(VMContext, &Elts[0], 4)); -} - //===----------------------------------------------------------------------===// // DIFactory: Routines for inserting code into a function //===----------------------------------------------------------------------===// @@ -1218,17 +1240,19 @@ void DebugInfoFinder::processModule(Module &M) { processLocation(DILocation(IA)); } - NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); - if (!NMD) - return; - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i))); - if (addGlobalVariable(DIG)) { - addCompileUnit(DIG.getCompileUnit()); - processType(DIG.getType()); + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i))); + if (addGlobalVariable(DIG)) { + addCompileUnit(DIG.getCompileUnit()); + processType(DIG.getType()); + } } } + + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + processSubprogram(DISubprogram(NMD->getOperand(i))); } /// processLocation - Process DILocation. diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp index a1676e5..d95c376 100644 --- a/lib/Analysis/DomPrinter.cpp +++ b/lib/Analysis/DomPrinter.cpp @@ -43,10 +43,10 @@ struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits { if (isSimple()) return DOTGraphTraits<const Function*> - ::getSimpleNodeLabel(BB, BB->getParent()); + ::getSimpleNodeLabel(BB, BB->getParent()); else return DOTGraphTraits<const Function*> - ::getCompleteNodeLabel(BB, BB->getParent()); + ::getCompleteNodeLabel(BB, BB->getParent()); } }; diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 2bde56d7..65c7c6e 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -126,13 +126,15 @@ private: } // Loop over all of the users of the function, looking for non-call uses. - for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I) - if ((!isa<CallInst>(I) && !isa<InvokeInst>(I)) - || !CallSite(cast<Instruction>(I)).isCallee(I)) { + for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){ + User *U = *I; + if ((!isa<CallInst>(U) && !isa<InvokeInst>(U)) + || !CallSite(cast<Instruction>(U)).isCallee(I)) { // Not a call, or being used as a parameter rather than as the callee. ExternalCallingNode->addCalledFunction(CallSite(), Node); break; } + } // If this function is not defined in this translation unit, it could call // anything. diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index b14afa3..f13deea 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -233,33 +233,34 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, GlobalValue *OkayStoreDest) { if (!V->getType()->isPointerTy()) return true; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) { + User *U = *UI; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { Readers.push_back(LI->getParent()->getParent()); - } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { if (V == SI->getOperand(1)) { Writers.push_back(SI->getParent()->getParent()); } else if (SI->getOperand(1) != OkayStoreDest) { return true; // Storing the pointer } - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true; - } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) { + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest)) return true; - } else if (isFreeCall(*UI)) { - Writers.push_back(cast<Instruction>(*UI)->getParent()->getParent()); - } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { + } else if (isFreeCall(U)) { + Writers.push_back(cast<Instruction>(U)->getParent()->getParent()); + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { // Make sure that this is just the function being called, not that it is // passing into the function. - for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i) - if (CI->getOperand(i) == V) return true; - } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) + if (CI->getArgOperand(i) == V) return true; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { // Make sure that this is just the function being called, not that it is // passing into the function. - for (unsigned i = 0, e = II->getNumOperands() - 3; i != e; ++i) - if (II->getOperand(i) == V) return true; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { + for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) + if (II->getArgOperand(i) == V) return true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { if (CE->getOpcode() == Instruction::GetElementPtr || CE->getOpcode() == Instruction::BitCast) { if (AnalyzeUsesOfPointer(CE, Readers, Writers)) @@ -267,12 +268,14 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } else { return true; } - } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) { + } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) return true; // Allow comparison against null. } else { return true; } + } + return false; } @@ -291,7 +294,8 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Walk the user list of the global. If we find anything other than a direct // load or store, bail out. for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ - if (LoadInst *LI = dyn_cast<LoadInst>(*I)) { + User *U = *I; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { // The pointer loaded from the global can only be used in simple ways: // we allow addressing of it and loading storing to it. We do *not* allow // storing the loaded pointer somewhere else or passing to a function. @@ -299,7 +303,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters)) return false; // Loaded pointer escapes. // TODO: Could try some IP mod/ref of the loaded pointer. - } else if (StoreInst *SI = dyn_cast<StoreInst>(*I)) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { // Storing the global itself. if (SI->getOperand(0) == GV) return false; diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 98dbb69..b1df517 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -162,14 +162,14 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { if (Function *F = CS.getCalledFunction()) { if (F->isDeclaration() && (F->getName() == "setjmp" || F->getName() == "_setjmp")) - NeverInline = true; + callsSetJmp = true; // If this call is to function itself, then the function is recursive. // Inlining it into other functions is a bad idea, because this is // basically just a form of loop peeling, and our metrics aren't useful // for that case. if (F == BB->getParent()) - NeverInline = true; + isRecursive = true; } if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { @@ -220,7 +220,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { // jump would jump from the inlined copy of the function into the original // function which is extremely undefined behavior. if (isa<IndirectBrInst>(BB->getTerminator())) - NeverInline = true; + containsIndirectBr = true; // Remember NumInsts for this BB. NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; @@ -247,7 +247,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { // Don't bother calculating argument weights if we are never going to inline // the function anyway. - if (Metrics.NeverInline) + if (NeverInline()) return; // Check out all of the arguments to the function, figuring out how much @@ -258,6 +258,14 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { CountCodeReductionForAlloca(I))); } +/// NeverInline - returns true if the function should never be inlined into +/// any caller +bool InlineCostAnalyzer::FunctionInfo::NeverInline() +{ + return (Metrics.callsSetJmp || Metrics.isRecursive || + Metrics.containsIndirectBr); + +} // getInlineCost - The heuristic used to determine if we should inline the // function call or not. // @@ -315,7 +323,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, CalleeFI->analyzeFunction(Callee); // If we should never inline this, return a huge cost. - if (CalleeFI->Metrics.NeverInline) + if (CalleeFI->NeverInline()) return InlineCost::getNever(); // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we @@ -443,10 +451,15 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { } // Since CalleeMetrics were already calculated, we know that the CallerMetrics - // reference isn't invalidated: both were in the DenseMap. - CallerMetrics.NeverInline |= CalleeMetrics.NeverInline; + // reference isn't invalidated: both were in the DenseMap. CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; + // FIXME: If any of these three are true for the callee, the callee was + // not inlined into the caller, so I think they're redundant here. + CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp; + CallerMetrics.isRecursive |= CalleeMetrics.isRecursive; + CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr; + CallerMetrics.NumInsts += CalleeMetrics.NumInsts; CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; CallerMetrics.NumCalls += CalleeMetrics.NumCalls; diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index a031cbc..9f1b30d 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -19,7 +19,8 @@ // // Another limitation is that it assumes all code will be executed. A store // through a null pointer in a basic block which is never reached is harmless, -// but this pass will warn about it anyway. +// but this pass will warn about it anyway. This is the main reason why most +// of these checks live here instead of in the Verifier pass. // // Optimization passes may make conditions that this pass checks for more or // less obvious. If an optimization pass appears to be introducing a warning, @@ -35,7 +36,11 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/Lint.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" @@ -64,7 +69,8 @@ namespace { void visitFunction(Function &F); void visitCallSite(CallSite CS); - void visitMemoryReference(Instruction &I, Value *Ptr, unsigned Align, + void visitMemoryReference(Instruction &I, Value *Ptr, + unsigned Size, unsigned Align, const Type *Ty, unsigned Flags); void visitCallInst(CallInst &I); @@ -88,9 +94,14 @@ namespace { void visitInsertElementInst(InsertElementInst &I); void visitUnreachableInst(UnreachableInst &I); + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, + SmallPtrSet<Value *, 4> &Visited) const; + public: Module *Mod; AliasAnalysis *AA; + DominatorTree *DT; TargetData *TD; std::string Messages; @@ -104,6 +115,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTree>(); } virtual void print(raw_ostream &O, const Module *M) const {} @@ -176,6 +188,7 @@ X("lint", "Statically lint-checks LLVM IR", false, true); bool Lint::runOnFunction(Function &F) { Mod = F.getParent(); AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); visit(F); dbgs() << MessagesStr.str(); @@ -188,15 +201,17 @@ void Lint::visitFunction(Function &F) { // fairly common mistake to neglect to name a function. Assert1(F.hasName() || F.hasLocalLinkage(), "Unusual: Unnamed function with non-local linkage", &F); + + // TODO: Check for irreducible control flow. } void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); - visitMemoryReference(I, Callee, 0, 0, MemRef::Callee); + visitMemoryReference(I, Callee, ~0u, 0, 0, MemRef::Callee); - if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) { + if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { Assert1(CS.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", &I); @@ -209,23 +224,53 @@ void Lint::visitCallSite(CallSite CS) { FT->getNumParams() == NumActualArgs, "Undefined behavior: Call argument count mismatches callee " "argument count", &I); - - // TODO: Check argument types (in case the callee was casted) - - // TODO: Check ABI-significant attributes. - // TODO: Check noalias attribute. - - // TODO: Check sret attribute. + Assert1(FT->getReturnType() == I.getType(), + "Undefined behavior: Call return type mismatches " + "callee return type", &I); + + // Check argument types (in case the callee was casted) and attributes. + // TODO: Verify that caller and callee attributes are compatible. + Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); + CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + for (; AI != AE; ++AI) { + Value *Actual = *AI; + if (PI != PE) { + Argument *Formal = PI++; + Assert1(Formal->getType() == Actual->getType(), + "Undefined behavior: Call argument type mismatches " + "callee parameter type", &I); + + // Check that noalias arguments don't alias other arguments. The + // AliasAnalysis API isn't expressive enough for what we really want + // to do. Known partial overlap is not distinguished from the case + // where nothing is known. + if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) + for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) { + Assert1(AI == BI || + AA->alias(*AI, ~0u, *BI, ~0u) != AliasAnalysis::MustAlias, + "Unusual: noalias argument aliases another argument", &I); + } + + // Check that an sret argument points to valid memory. + if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { + const Type *Ty = + cast<PointerType>(Formal->getType())->getElementType(); + visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), + TD ? TD->getABITypeAlignment(Ty) : 0, + Ty, MemRef::Read | MemRef::Write); + } + } + } } if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { - Value *Obj = (*AI)->getUnderlyingObject(); - Assert1(!isa<AllocaInst>(Obj) && !isa<VAArgInst>(Obj), + Value *Obj = findValue(*AI, /*OffsetOk=*/true); + Assert1(!isa<AllocaInst>(Obj), "Undefined behavior: Call with \"tail\" keyword references " - "alloca or va_arg", &I); + "alloca", &I); } @@ -237,9 +282,10 @@ void Lint::visitCallSite(CallSite CS) { case Intrinsic::memcpy: { MemCpyInst *MCI = cast<MemCpyInst>(&I); - visitMemoryReference(I, MCI->getSource(), MCI->getAlignment(), 0, + // TODO: If the size is known, use it. + visitMemoryReference(I, MCI->getDest(), ~0u, MCI->getAlignment(), 0, MemRef::Write); - visitMemoryReference(I, MCI->getDest(), MCI->getAlignment(), 0, + visitMemoryReference(I, MCI->getSource(), ~0u, MCI->getAlignment(), 0, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API @@ -247,7 +293,8 @@ void Lint::visitCallSite(CallSite CS) { // overlap is not distinguished from the case where nothing is known. unsigned Size = 0; if (const ConstantInt *Len = - dyn_cast<ConstantInt>(MCI->getLength()->stripPointerCasts())) + dyn_cast<ConstantInt>(findValue(MCI->getLength(), + /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != @@ -257,15 +304,17 @@ void Lint::visitCallSite(CallSite CS) { } case Intrinsic::memmove: { MemMoveInst *MMI = cast<MemMoveInst>(&I); - visitMemoryReference(I, MMI->getSource(), MMI->getAlignment(), 0, + // TODO: If the size is known, use it. + visitMemoryReference(I, MMI->getDest(), ~0u, MMI->getAlignment(), 0, MemRef::Write); - visitMemoryReference(I, MMI->getDest(), MMI->getAlignment(), 0, + visitMemoryReference(I, MMI->getSource(), ~0u, MMI->getAlignment(), 0, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); - visitMemoryReference(I, MSI->getDest(), MSI->getAlignment(), 0, + // TODO: If the size is known, use it. + visitMemoryReference(I, MSI->getDest(), ~0u, MSI->getAlignment(), 0, MemRef::Write); break; } @@ -275,15 +324,15 @@ void Lint::visitCallSite(CallSite CS) { "Undefined behavior: va_start called in a non-varargs function", &I); - visitMemoryReference(I, CS.getArgument(0), 0, 0, + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: - visitMemoryReference(I, CS.getArgument(0), 0, 0, MemRef::Write); - visitMemoryReference(I, CS.getArgument(1), 0, 0, MemRef::Read); + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Write); + visitMemoryReference(I, CS.getArgument(1), ~0u, 0, 0, MemRef::Read); break; case Intrinsic::vaend: - visitMemoryReference(I, CS.getArgument(0), 0, 0, + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Read | MemRef::Write); break; @@ -291,7 +340,7 @@ void Lint::visitCallSite(CallSite CS) { // Stackrestore doesn't read or write memory, but it sets the // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. - visitMemoryReference(I, CS.getArgument(0), 0, 0, + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Read | MemRef::Write); break; } @@ -310,17 +359,35 @@ void Lint::visitReturnInst(ReturnInst &I) { Assert1(!F->doesNotReturn(), "Unusual: Return statement in function with noreturn attribute", &I); + + if (Value *V = I.getReturnValue()) { + Value *Obj = findValue(V, /*OffsetOk=*/true); + Assert1(!isa<AllocaInst>(Obj), + "Unusual: Returning alloca value", &I); + } } -// TODO: Add a length argument and check that the reference is in bounds +// TODO: Check that the reference is in bounds. +// TODO: Check readnone/readonly function attributes. void Lint::visitMemoryReference(Instruction &I, - Value *Ptr, unsigned Align, const Type *Ty, - unsigned Flags) { - Value *UnderlyingObject = Ptr->getUnderlyingObject(); + Value *Ptr, unsigned Size, unsigned Align, + const Type *Ty, unsigned Flags) { + // If no memory is being referenced, it doesn't matter if the pointer + // is valid. + if (Size == 0) + return; + + Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); Assert1(!isa<ConstantPointerNull>(UnderlyingObject), "Undefined behavior: Null pointer dereference", &I); Assert1(!isa<UndefValue>(UnderlyingObject), "Undefined behavior: Undef pointer dereference", &I); + Assert1(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), + "Unusual: All-ones pointer dereference", &I); + Assert1(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isOne(), + "Unusual: Address one pointer dereference", &I); if (Flags & MemRef::Write) { if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) @@ -361,13 +428,16 @@ void Lint::visitMemoryReference(Instruction &I, } void Lint::visitLoadInst(LoadInst &I) { - visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), I.getType(), - MemRef::Read); + visitMemoryReference(I, I.getPointerOperand(), + AA->getTypeStoreSize(I.getType()), I.getAlignment(), + I.getType(), MemRef::Read); } void Lint::visitStoreInst(StoreInst &I) { - visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), - I.getOperand(0)->getType(), MemRef::Write); + visitMemoryReference(I, I.getPointerOperand(), + AA->getTypeStoreSize(I.getOperand(0)->getType()), + I.getAlignment(), + I.getOperand(0)->getType(), MemRef::Write); } void Lint::visitXor(BinaryOperator &I) { @@ -384,21 +454,21 @@ void Lint::visitSub(BinaryOperator &I) { void Lint::visitLShr(BinaryOperator &I) { if (ConstantInt *CI = - dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts())) + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } void Lint::visitAShr(BinaryOperator &I) { if (ConstantInt *CI = - dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts())) + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } void Lint::visitShl(BinaryOperator &I) { if (ConstantInt *CI = - dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts())) + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } @@ -439,27 +509,31 @@ void Lint::visitAllocaInst(AllocaInst &I) { // This isn't undefined behavior, it's just an obvious pessimization. Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), "Pessimization: Static alloca outside of entry block", &I); + + // TODO: Check for an unusual size (MSB set?) } void Lint::visitVAArgInst(VAArgInst &I) { - visitMemoryReference(I, I.getOperand(0), 0, 0, + visitMemoryReference(I, I.getOperand(0), ~0u, 0, 0, MemRef::Read | MemRef::Write); } void Lint::visitIndirectBrInst(IndirectBrInst &I) { - visitMemoryReference(I, I.getAddress(), 0, 0, MemRef::Branchee); + visitMemoryReference(I, I.getAddress(), ~0u, 0, 0, MemRef::Branchee); } void Lint::visitExtractElementInst(ExtractElementInst &I) { if (ConstantInt *CI = - dyn_cast<ConstantInt>(I.getIndexOperand()->stripPointerCasts())) + dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), + /*OffsetOk=*/false))) Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), "Undefined result: extractelement index out of range", &I); } void Lint::visitInsertElementInst(InsertElementInst &I) { if (ConstantInt *CI = - dyn_cast<ConstantInt>(I.getOperand(2)->stripPointerCasts())) + dyn_cast<ConstantInt>(findValue(I.getOperand(2), + /*OffsetOk=*/false))) Assert1(CI->getValue().ult(I.getType()->getNumElements()), "Undefined result: insertelement index out of range", &I); } @@ -472,6 +546,91 @@ void Lint::visitUnreachableInst(UnreachableInst &I) { "side effects", &I); } +/// findValue - Look through bitcasts and simple memory reference patterns +/// to identify an equivalent, but more informative, value. If OffsetOk +/// is true, look through getelementptrs with non-zero offsets too. +/// +/// Most analysis passes don't require this logic, because instcombine +/// will simplify most of these kinds of things away. But it's a goal of +/// this Lint pass to be useful even on non-optimized IR. +Value *Lint::findValue(Value *V, bool OffsetOk) const { + SmallPtrSet<Value *, 4> Visited; + return findValueImpl(V, OffsetOk, Visited); +} + +/// findValueImpl - Implementation helper for findValue. +Value *Lint::findValueImpl(Value *V, bool OffsetOk, + SmallPtrSet<Value *, 4> &Visited) const { + // Detect self-referential values. + if (!Visited.insert(V)) + return UndefValue::get(V->getType()); + + // TODO: Look through sext or zext cast, when the result is known to + // be interpreted as signed or unsigned, respectively. + // TODO: Look through eliminable cast pairs. + // TODO: Look through calls with unique return values. + // TODO: Look through vector insert/extract/shuffle. + V = OffsetOk ? V->getUnderlyingObject() : V->stripPointerCasts(); + if (LoadInst *L = dyn_cast<LoadInst>(V)) { + BasicBlock::iterator BBI = L; + BasicBlock *BB = L->getParent(); + SmallPtrSet<BasicBlock *, 4> VisitedBlocks; + for (;;) { + if (!VisitedBlocks.insert(BB)) break; + if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), + BB, BBI, 6, AA)) + return findValueImpl(U, OffsetOk, Visited); + if (BBI != BB->begin()) break; + BB = BB->getUniquePredecessor(); + if (!BB) break; + BBI = BB->end(); + } + } else if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (Value *W = PN->hasConstantValue(DT)) + return findValueImpl(W, OffsetOk, Visited); + } else if (CastInst *CI = dyn_cast<CastInst>(V)) { + if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) : + Type::getInt64Ty(V->getContext()))) + return findValueImpl(CI->getOperand(0), OffsetOk, Visited); + } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { + if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), + Ex->idx_begin(), + Ex->idx_end())) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + // Same as above, but for ConstantExpr instead of Instruction. + if (Instruction::isCast(CE->getOpcode())) { + if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), + CE->getOperand(0)->getType(), + CE->getType(), + TD ? TD->getIntPtrType(V->getContext()) : + Type::getInt64Ty(V->getContext()))) + return findValueImpl(CE->getOperand(0), OffsetOk, Visited); + } else if (CE->getOpcode() == Instruction::ExtractValue) { + const SmallVector<unsigned, 4> &Indices = CE->getIndices(); + if (Value *W = FindInsertedValue(CE->getOperand(0), + Indices.begin(), + Indices.end())) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } + } + + // As a last resort, try SimplifyInstruction or constant folding. + if (Instruction *Inst = dyn_cast<Instruction>(V)) { + if (Value *W = SimplifyInstruction(Inst, TD)) + if (W != Inst) + return findValueImpl(W, OffsetOk, Visited); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (Value *W = ConstantFoldConstantExpression(CE, TD)) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } + + return V; +} + //===----------------------------------------------------------------------===// // Implement the public interfaces to this file... //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp new file mode 100644 index 0000000..2ba1d86 --- /dev/null +++ b/lib/Analysis/Loads.cpp @@ -0,0 +1,235 @@ +//===- Loads.cpp - Local load analysis ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines simple local analyses for load instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetData.h" +#include "llvm/GlobalAlias.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IntrinsicInst.h" +using namespace llvm; + +/// AreEquivalentAddressValues - Test if A and B will obviously have the same +/// value. This includes recognizing that %t0 and %t1 will have the same +/// value in code like this: +/// %t0 = getelementptr \@a, 0, 3 +/// store i32 0, i32* %t0 +/// %t1 = getelementptr \@a, 0, 3 +/// %t2 = load i32* %t1 +/// +static bool AreEquivalentAddressValues(const Value *A, const Value *B) { + // Test if the values are trivially equivalent. + if (A == B) return true; + + // Test if the values come from identical arithmetic instructions. + // Use isIdenticalToWhenDefined instead of isIdenticalTo because + // this function is only used when one address use dominates the + // other, which means that they'll always either have the same + // value or one of them will have an undefined value. + if (isa<BinaryOperator>(A) || isa<CastInst>(A) || + isa<PHINode>(A) || isa<GetElementPtrInst>(A)) + if (const Instruction *BI = dyn_cast<Instruction>(B)) + if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) + return true; + + // Otherwise they may not be equivalent. + return false; +} + +/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and +/// bitcasts to get back to the underlying object being addressed, keeping +/// track of the offset in bytes from the GEPs relative to the result. +/// This is closely related to Value::getUnderlyingObject but is located +/// here to avoid making VMCore depend on TargetData. +static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, + uint64_t &ByteOffset, + unsigned MaxLookup = 6) { + if (!V->getType()->isPointerTy()) + return V; + for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + if (!GEP->hasAllConstantIndices()) + return V; + SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end()); + ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), + &Indices[0], Indices.size()); + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + return V; + V = GA->getAliasee(); + } else { + return V; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } + return V; +} + +/// isSafeToLoadUnconditionally - Return true if we know that executing a load +/// from this value cannot trap. If it is not obviously safe to load from the +/// specified pointer, we do a quick local scan of the basic block containing +/// ScanFrom, to determine if the address is already accessed. +bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, + unsigned Align, const TargetData *TD) { + uint64_t ByteOffset = 0; + Value *Base = V; + if (TD) + Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); + + const Type *BaseType = 0; + unsigned BaseAlign = 0; + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { + // An alloca is safe to load from as load as it is suitably aligned. + BaseType = AI->getAllocatedType(); + BaseAlign = AI->getAlignment(); + } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) { + // Global variables are safe to load from but their size cannot be + // guaranteed if they are overridden. + if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) { + BaseType = GV->getType()->getElementType(); + BaseAlign = GV->getAlignment(); + } + } + + if (BaseType && BaseType->isSized()) { + if (TD && BaseAlign == 0) + BaseAlign = TD->getPrefTypeAlignment(BaseType); + + if (Align <= BaseAlign) { + if (!TD) + return true; // Loading directly from an alloca or global is OK. + + // Check if the load is within the bounds of the underlying object. + const PointerType *AddrTy = cast<PointerType>(V->getType()); + uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); + if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && + (Align == 0 || (ByteOffset % Align) == 0)) + return true; + } + } + + // Otherwise, be a little bit aggressive by scanning the local block where we + // want to check to see if the pointer is already being loaded or stored + // from/to. If so, the previous load or store would have already trapped, + // so there is no harm doing an extra load (also, CSE will later eliminate + // the load entirely). + BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + + while (BBI != E) { + --BBI; + + // If we see a free or a call which may write to memory (i.e. which might do + // a free) the pointer could be marked invalid. + if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() && + !isa<DbgInfoIntrinsic>(BBI)) + return false; + + if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true; + } + } + return false; +} + +/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the +/// instruction before ScanFrom) checking to see if we have the value at the +/// memory address *Ptr locally available within a small number of instructions. +/// If the value is available, return it. +/// +/// If not, return the iterator for the last validated instruction that the +/// value would be live through. If we scanned the entire block and didn't find +/// something that invalidates *Ptr or provides it, ScanFrom would be left at +/// begin() and this returns null. ScanFrom could also be left +/// +/// MaxInstsToScan specifies the maximum instructions to scan in the block. If +/// it is set to 0, it will scan the whole block. You can also optionally +/// specify an alias analysis implementation, which makes this more precise. +Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, + AliasAnalysis *AA) { + if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; + + // If we're using alias analysis to disambiguate get the size of *Ptr. + unsigned AccessSize = 0; + if (AA) { + const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); + AccessSize = AA->getTypeStoreSize(AccessTy); + } + + while (ScanFrom != ScanBB->begin()) { + // We must ignore debug info directives when counting (otherwise they + // would affect codegen). + Instruction *Inst = --ScanFrom; + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + + // Restore ScanFrom to expected value in case next test succeeds + ScanFrom++; + + // Don't scan huge blocks. + if (MaxInstsToScan-- == 0) return 0; + + --ScanFrom; + // If this is a load of Ptr, the loaded value is available. + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) + return LI; + + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // If this is a store through Ptr, the value is available! + if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) + return SI->getOperand(0); + + // If Ptr is an alloca and this is a store to a different alloca, ignore + // the store. This is a trivial form of alias analysis that is important + // for reg2mem'd code. + if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) && + (isa<AllocaInst>(SI->getOperand(1)) || + isa<GlobalVariable>(SI->getOperand(1)))) + continue; + + // If we have alias analysis and it says the store won't modify the loaded + // value, ignore the store. + if (AA && + (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // Otherwise the store that may or may not alias the pointer, bail out. + ++ScanFrom; + return 0; + } + + // If this is some other instruction that may clobber Ptr, bail out. + if (Inst->mayWriteToMemory()) { + // If alias analysis claims that it really won't modify the load, + // ignore it. + if (AA && + (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // May modify the pointer, bail out. + ++ScanFrom; + return 0; + } + } + + // Got to the start of the block, we didn't find it, but are done for this + // block. + return 0; +} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 735e31f..818d0a9 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -266,15 +266,16 @@ unsigned Loop::getSmallConstantTripMultiple() const { bool Loop::isLCSSAForm(DominatorTree &DT) const { // Sort the blocks vector so that we can use binary search to do quick // lookups. - SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end()); + SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end()); for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { BasicBlock *BB = *BI; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { - BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); - if (PHINode *P = dyn_cast<PHINode>(*UI)) + User *U = *UI; + BasicBlock *UserBB = cast<Instruction>(U)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(U)) UserBB = P->getIncomingBlock(UI); // Check the current block, as a fast-path, before checking whether diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 89f9743..1ab18ca 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -101,9 +101,9 @@ static Value *computeArraySize(const CallInst *CI, const TargetData *TD, if (const StructType *ST = dyn_cast<StructType>(T)) ElementSize = TD->getStructLayout(ST)->getSizeInBytes(); - // If malloc calls' arg can be determined to be a multiple of ElementSize, + // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. - Value *MallocArg = CI->getOperand(1); + Value *MallocArg = CI->getArgOperand(0); Value *Multiple = NULL; if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) @@ -120,7 +120,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { Value *ArraySize = computeArraySize(CI, TD); if (ArraySize && - ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1)) + ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) return CI; // CI is a non-array malloc or we can't figure out that it is an array malloc. @@ -183,25 +183,25 @@ Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD, // free Call Utility Functions. // -/// isFreeCall - Returns true if the value is a call to the builtin free() -bool llvm::isFreeCall(const Value *I) { +/// isFreeCall - Returns non-null if the value is a call to the builtin free() +const CallInst *llvm::isFreeCall(const Value *I) { const CallInst *CI = dyn_cast<CallInst>(I); if (!CI) - return false; + return 0; Function *Callee = CI->getCalledFunction(); if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "free") - return false; + return 0; // Check free prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. const FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) - return false; + return 0; if (FTy->getNumParams() != 1) - return false; + return 0; if (FTy->param_begin()->get() != Type::getInt8PtrTy(Callee->getContext())) - return false; + return 0; - return true; + return CI; } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 2aa2f17..1f54d74 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -116,8 +116,8 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { Pointer = V->getOperand(0); PointerSize = AA->getTypeStoreSize(V->getType()); - } else if (isFreeCall(Inst)) { - Pointer = Inst->getOperand(1); + } else if (const CallInst *CI = isFreeCall(Inst)) { + Pointer = CI->getArgOperand(0); // calls to free() erase the entire structure PointerSize = ~0ULL; } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) { @@ -197,9 +197,9 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point. AliasAnalysis::AliasResult R = - AA->alias(II->getOperand(3), ~0U, MemPtr, ~0U); + AA->alias(II->getArgOperand(2), ~0U, MemPtr, ~0U); if (R == AliasAnalysis::MustAlias) { - InvariantTag = II->getOperand(1); + InvariantTag = II->getArgOperand(0); continue; } @@ -210,7 +210,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point. AliasAnalysis::AliasResult R = - AA->alias(II->getOperand(2), ~0U, MemPtr, ~0U); + AA->alias(II->getArgOperand(1), ~0U, MemPtr, ~0U); if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(II); } @@ -365,25 +365,26 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { MemPtr = LI->getPointerOperand(); MemSize = AA->getTypeStoreSize(LI->getType()); } - } else if (isFreeCall(QueryInst)) { - MemPtr = QueryInst->getOperand(1); + } else if (const CallInst *CI = isFreeCall(QueryInst)) { + MemPtr = CI->getArgOperand(0); // calls to free() erase the entire structure, not just a field. MemSize = ~0UL; } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { int IntrinsicID = 0; // Intrinsic IDs start at 1. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst)) + IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst); + if (II) IntrinsicID = II->getIntrinsicID(); switch (IntrinsicID) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: - MemPtr = QueryInst->getOperand(2); - MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue(); + MemPtr = II->getArgOperand(1); + MemSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); break; case Intrinsic::invariant_end: - MemPtr = QueryInst->getOperand(3); - MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue(); + MemPtr = II->getArgOperand(2); + MemSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); break; default: CallSite QueryCS = CallSite::get(QueryInst); @@ -456,7 +457,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { // Okay, we have a cache entry. If we know it is not dirty, just return it // with no computation. if (!CacheP.second) { - NumCacheNonLocal++; + ++NumCacheNonLocal; return Cache; } @@ -478,7 +479,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { BasicBlock *QueryBB = QueryCS.getInstruction()->getParent(); for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI) DirtyBlocks.push_back(*PI); - NumUncacheNonLocal++; + ++NumUncacheNonLocal; } // isReadonlyCall - If this is a read-only call, we can be more aggressive. diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index f0f3a05..7354afa 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -67,10 +67,11 @@ PostDominanceFrontier::calculate(const PostDominatorTree &DT, if (BB) for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI) { + BasicBlock *P = *SI; // Does Node immediately dominate this predecessor? - DomTreeNode *SINode = DT[*SI]; + DomTreeNode *SINode = DT[P]; if (SINode && SINode->getIDom() != Node) - S.insert(*SI); + S.insert(P); } // At this point, S is DFlocal. Now we union in DFup's of our children... diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 662576e..38dcd25 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -577,8 +577,6 @@ static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::s template<> bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) { - bool hasNoSuccessors = false; - double inWeight = 0; std::set<Edge> inMissing; std::set<const BasicBlock*> ProcessedPreds; @@ -596,10 +594,8 @@ bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *B std::set<Edge> outMissing; std::set<const BasicBlock*> ProcessedSuccs; succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); - if (sbbi == sbbe) { + if (sbbi == sbbe) readEdge(this,getEdge(BB,0),outWeight,outMissing); - hasNoSuccessors = true; - } for ( ; sbbi != sbbe; ++sbbi ) { if (ProcessedSuccs.insert(*sbbi).second) { readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 6870268..413b3b4 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -822,7 +822,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) return getConstant( - cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty))); + cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), + getEffectiveSCEVType(Ty)))); // trunc(trunc(x)) --> trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) @@ -844,9 +845,9 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, return getAddRecExpr(Operands, AddRec->getLoop()); } - // The cast wasn't folded; create an explicit cast node. - // Recompute the insert position, as it may have been invalidated. - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // The cast wasn't folded; create an explicit cast node. We can reuse + // the existing insert position since if we get here, we won't have + // made any changes which would invalidate it. SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); @@ -862,12 +863,10 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Ty = getEffectiveSCEVType(Ty); // Fold if the operand is constant. - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) { - const Type *IntTy = getEffectiveSCEVType(Ty); - Constant *C = ConstantExpr::getZExt(SC->getValue(), IntTy); - if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty); - return getConstant(cast<ConstantInt>(C)); - } + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), + getEffectiveSCEVType(Ty)))); // zext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) @@ -997,12 +996,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Ty = getEffectiveSCEVType(Ty); // Fold if the operand is constant. - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) { - const Type *IntTy = getEffectiveSCEVType(Ty); - Constant *C = ConstantExpr::getSExt(SC->getValue(), IntTy); - if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty); - return getConstant(cast<ConstantInt>(C)); - } + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), + getEffectiveSCEVType(Ty)))); // sext(sext(x)) --> sext(x) if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) @@ -1208,8 +1205,19 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, ScalarEvolution &SE) { bool Interesting = false; - // Iterate over the add operands. - for (unsigned i = 0, e = NumOperands; i != e; ++i) { + // Iterate over the add operands. They are sorted, with constants first. + unsigned i = 0; + while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { + ++i; + // Pull a buried constant out to the outside. + if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) + Interesting = true; + AccumulatedConstant += Scale * C->getValue()->getValue(); + } + + // Next comes everything else. We're especially interested in multiplies + // here, but they're in the middle, so just visit the rest with one loop. + for (; i != NumOperands; ++i) { const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]); if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) { APInt NewScale = @@ -1237,11 +1245,6 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, Interesting = true; } } - } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { - // Pull a buried constant out to the outside. - if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) - Interesting = true; - AccumulatedConstant += Scale * C->getValue()->getValue(); } else { // An ordinary operand. Update the map. std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = @@ -1275,9 +1278,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) - assert(getEffectiveSCEVType(Ops[i]->getType()) == - getEffectiveSCEVType(Ops[0]->getType()) && + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVAddExpr operand types don't match!"); #endif @@ -1400,8 +1403,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) { // If we have an add, expand the add operands onto the end of the operands // list. - Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); Ops.erase(Ops.begin()+Idx); + Ops.append(Add->op_begin(), Add->op_end()); DeletedAdd = true; } @@ -1549,9 +1552,11 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, AddRec->op_end()); AddRecOps[0] = getAddExpr(LIOps); - // It's tempting to propagate NUW/NSW flags here, but nuw/nsw addition - // is not associative so this isn't necessarily safe. - const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop); + // Build the new addrec. Propagate the NUW and NSW flags if both the + // outer add and the inner addrec are guaranteed to have no overflow. + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, + HasNUW && AddRec->hasNoUnsignedWrap(), + HasNSW && AddRec->hasNoSignedWrap()); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1578,7 +1583,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, AddRec->op_end()); for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) { if (i >= NewOps.size()) { - NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i, + NewOps.append(OtherAddRec->op_begin()+i, OtherAddRec->op_end()); break; } @@ -1711,8 +1716,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { // If we have an mul, expand the mul operands onto the end of the operands // list. - Ops.insert(Ops.end(), Mul->op_begin(), Mul->op_end()); Ops.erase(Ops.begin()+Idx); + Ops.append(Mul->op_begin(), Mul->op_end()); DeletedMul = true; } @@ -1747,23 +1752,15 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} SmallVector<const SCEV *, 4> NewOps; NewOps.reserve(AddRec->getNumOperands()); - if (LIOps.size() == 1) { - const SCEV *Scale = LIOps[0]; - for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) - NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); - } else { - for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { - SmallVector<const SCEV *, 4> MulOps(LIOps.begin(), LIOps.end()); - MulOps.push_back(AddRec->getOperand(i)); - NewOps.push_back(getMulExpr(MulOps)); - } - } + const SCEV *Scale = getMulExpr(LIOps); + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) + NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); - // It's tempting to propagate the NSW flag here, but nsw multiplication - // is not associative so this isn't necessarily safe. + // Build the new addrec. Propagate the NUW and NSW flags if both the + // outer mul and the inner addrec are guaranteed to have no overflow. const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(), HasNUW && AddRec->hasNoUnsignedWrap(), - /*HasNSW=*/false); + HasNSW && AddRec->hasNoSignedWrap()); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1942,8 +1939,7 @@ const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, Operands.push_back(Start); if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) if (StepChrec->getLoop() == L) { - Operands.insert(Operands.end(), StepChrec->op_begin(), - StepChrec->op_end()); + Operands.append(StepChrec->op_begin(), StepChrec->op_end()); return getAddRecExpr(Operands, L); } @@ -2106,8 +2102,8 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { if (Idx < Ops.size()) { bool DeletedSMax = false; while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) { - Ops.insert(Ops.end(), SMax->op_begin(), SMax->op_end()); Ops.erase(Ops.begin()+Idx); + Ops.append(SMax->op_begin(), SMax->op_end()); DeletedSMax = true; } @@ -2211,8 +2207,8 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { if (Idx < Ops.size()) { bool DeletedUMax = false; while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) { - Ops.insert(Ops.end(), UMax->op_begin(), UMax->op_end()); Ops.erase(Ops.begin()+Idx); + Ops.append(UMax->op_begin(), UMax->op_end()); DeletedUMax = true; } @@ -2278,7 +2274,8 @@ const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - C = ConstantFoldConstantExpression(CE, TD); + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2286,7 +2283,8 @@ const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) { Constant *C = ConstantExpr::getAlignOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - C = ConstantFoldConstantExpression(CE, TD); + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2302,7 +2300,8 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy, Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - C = ConstantFoldConstantExpression(CE, TD); + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2311,7 +2310,8 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy, Constant *FieldNo) { Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - C = ConstantFoldConstantExpression(CE, TD); + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2398,13 +2398,6 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) { return S; } -/// getIntegerSCEV - Given a SCEVable type, create a constant for the -/// specified signed integer value and return a SCEV for the constant. -const SCEV *ScalarEvolution::getIntegerSCEV(int64_t Val, const Type *Ty) { - const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); - return getConstant(ConstantInt::get(ITy, Val)); -} - /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V /// const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { @@ -2772,7 +2765,11 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { /// const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { - bool InBounds = GEP->isInBounds(); + // Don't blindly transfer the inbounds flag from the GEP instruction to the + // Add expression, because the Instruction may be guarded by control flow + // and the no-overflow bits may not be valid for the expression in any + // context. + const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); Value *Base = GEP->getOperand(0); // Don't attempt to analyze GEPs over unsized objects. @@ -2788,23 +2785,30 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); - TotalOffset = getAddExpr(TotalOffset, - getOffsetOfExpr(STy, FieldNo), - /*HasNUW=*/false, /*HasNSW=*/InBounds); + const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); + + // Add the field offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, FieldOffset); } else { // For an array, add the element offset, explicitly scaled. - const SCEV *LocalOffset = getSCEV(Index); + const SCEV *ElementSize = getSizeOfExpr(*GTI); + const SCEV *IndexS = getSCEV(Index); // Getelementptr indices are signed. - LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy); - // Lower "inbounds" GEPs to NSW arithmetic. - LocalOffset = getMulExpr(LocalOffset, getSizeOfExpr(*GTI), - /*HasNUW=*/false, /*HasNSW=*/InBounds); - TotalOffset = getAddExpr(TotalOffset, LocalOffset, - /*HasNUW=*/false, /*HasNSW=*/InBounds); + IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); + + // Multiply the index by the element size to compute the element offset. + const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize); + + // Add the element offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, LocalOffset); } } - return getAddExpr(getSCEV(Base), TotalOffset, - /*HasNUW=*/false, /*HasNSW=*/InBounds); + + // Get the SCEV for the GEP base. + const SCEV *BaseS = getSCEV(Base); + + // Add the total offset from all the GEP indices to the base. + return getAddExpr(BaseS, TotalOffset); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -2963,7 +2967,8 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) if (!C->getValue()->isZero()) ConservativeResult = - ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)); + ConservativeResult.intersectWith( + ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0))); // TODO: non-affine addrec if (AddRec->isAffine()) { @@ -3196,15 +3201,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { Operator *U = cast<Operator>(V); switch (Opcode) { case Instruction::Add: - // Don't transfer the NSW and NUW bits from the Add instruction to the - // Add expression, because the Instruction may be guarded by control - // flow and the no-overflow bits may not be valid for the expression in - // any context. return getAddExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); case Instruction::Mul: - // Don't transfer the NSW and NUW bits from the Mul instruction to the - // Mul expression, as with Add. return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); case Instruction::UDiv: @@ -3658,6 +3657,26 @@ void ScalarEvolution::forgetValue(Value *V) { ConstantEvolutionLoopExitValue.erase(PN); } + // If there's a SCEVUnknown tying this value into the SCEV + // space, remove it from the folding set map. The SCEVUnknown + // object and any other SCEV objects which reference it + // (transitively) remain allocated, effectively leaked until + // the underlying BumpPtrAllocator is freed. + // + // This permits SCEV pointers to be used as keys in maps + // such as the ValuesAtScopes map. + FoldingSetNodeID ID; + ID.AddInteger(scUnknown); + ID.AddPointer(I); + void *IP; + if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { + UniqueSCEVs.RemoveNode(S); + + // This isn't necessary, but we might as well remove the + // value from the ValuesAtScopes map too. + ValuesAtScopes.erase(S); + } + PushDefUseChildren(I, Worklist); } } @@ -4139,8 +4158,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { // constant or derived from a PHI node themselves. PHINode *PHI = 0; for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op) - if (!(isa<Constant>(I->getOperand(Op)) || - isa<GlobalValue>(I->getOperand(Op)))) { + if (!isa<Constant>(I->getOperand(Op))) { PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L); if (P == 0) return 0; // Not evolving from PHI if (PHI == 0) @@ -4161,11 +4179,9 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal, const TargetData *TD) { if (isa<PHINode>(V)) return PHIVal; if (Constant *C = dyn_cast<Constant>(V)) return C; - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV; Instruction *I = cast<Instruction>(V); - std::vector<Constant*> Operands; - Operands.resize(I->getNumOperands()); + std::vector<Constant*> Operands(I->getNumOperands()); for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD); @@ -4207,8 +4223,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, return RetVal = 0; // Must be a constant. Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - PHINode *PN2 = getConstantEvolvingPHI(BEValue, L); - if (PN2 != PN) + if (getConstantEvolvingPHI(BEValue, L) != PN && + !isa<Constant>(BEValue)) return RetVal = 0; // Not derived from same PHI. // Execute the loop symbolically to determine the exit value. @@ -4243,8 +4259,11 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, PHINode *PN = getConstantEvolvingPHI(Cond, L); if (PN == 0) return getCouldNotCompute(); - // Since the loop is canonicalized, the PHI node must have two entries. One - // entry must be a constant (coming in from outside of the loop), and the + // If the loop is canonicalized, the PHI will have exactly two entries. + // That's the only form we support here. + if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); + + // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); Constant *StartCST = @@ -4252,8 +4271,9 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - PHINode *PN2 = getConstantEvolvingPHI(BEValue, L); - if (PN2 != PN) return getCouldNotCompute(); // Not derived from same PHI. + if (getConstantEvolvingPHI(BEValue, L) != PN && + !isa<Constant>(BEValue)) + return getCouldNotCompute(); // Not derived from same PHI. // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of @@ -4341,54 +4361,51 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // the arguments into constants, and if so, try to constant propagate the // result. This is particularly useful for computing loop exit values. if (CanConstantFold(I)) { - std::vector<Constant*> Operands; - Operands.reserve(I->getNumOperands()); + SmallVector<Constant *, 4> Operands; + bool MadeImprovement = false; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Value *Op = I->getOperand(i); if (Constant *C = dyn_cast<Constant>(Op)) { Operands.push_back(C); - } else { - // If any of the operands is non-constant and if they are - // non-integer and non-pointer, don't even try to analyze them - // with scev techniques. - if (!isSCEVable(Op->getType())) - return V; - - const SCEV *OpV = getSCEVAtScope(Op, L); - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) { - Constant *C = SC->getValue(); - if (C->getType() != Op->getType()) - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - Op->getType(), - false), - C, Op->getType()); - Operands.push_back(C); - } else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV)) { - if (Constant *C = dyn_cast<Constant>(SU->getValue())) { - if (C->getType() != Op->getType()) - C = - ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - Op->getType(), - false), - C, Op->getType()); - Operands.push_back(C); - } else - return V; - } else { - return V; - } + continue; } + + // If any of the operands is non-constant and if they are + // non-integer and non-pointer, don't even try to analyze them + // with scev techniques. + if (!isSCEVable(Op->getType())) + return V; + + const SCEV *OrigV = getSCEV(Op); + const SCEV *OpV = getSCEVAtScope(OrigV, L); + MadeImprovement |= OrigV != OpV; + + Constant *C = 0; + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) + C = SC->getValue(); + if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV)) + C = dyn_cast<Constant>(SU->getValue()); + if (!C) return V; + if (C->getType() != Op->getType()) + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + Op->getType(), + false), + C, Op->getType()); + Operands.push_back(C); } - Constant *C = 0; - if (const CmpInst *CI = dyn_cast<CmpInst>(I)) - C = ConstantFoldCompareInstOperands(CI->getPredicate(), - Operands[0], Operands[1], TD); - else - C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size(), TD); - if (C) + // Check to see if getSCEVAtScope actually made an improvement. + if (MadeImprovement) { + Constant *C = 0; + if (const CmpInst *CI = dyn_cast<CmpInst>(I)) + C = ConstantFoldCompareInstOperands(CI->getPredicate(), + Operands[0], Operands[1], TD); + else + C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), + &Operands[0], Operands.size(), TD); + if (!C) return V; return getSCEV(C); + } } } @@ -4438,7 +4455,29 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // If this is a loop recurrence for a loop that does not contain L, then we // are dealing with the final value computed by the loop. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) { - if (!L || !AddRec->getLoop()->contains(L)) { + // First, attempt to evaluate each operand. + // Avoid performing the look-up in the common case where the specified + // expression has no loop-variant portions. + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { + const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L); + if (OpAtScope == AddRec->getOperand(i)) + continue; + + // Okay, at least one of these operands is loop variant but might be + // foldable. Build a new instance of the folded commutative expression. + SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(), + AddRec->op_begin()+i); + NewOps.push_back(OpAtScope); + for (++i; i != e; ++i) + NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); + + AddRec = cast<SCEVAddRecExpr>(getAddRecExpr(NewOps, AddRec->getLoop())); + break; + } + + // If the scope is outside the addrec's loop, evaluate it by using the + // loop exit value of the addrec. + if (!AddRec->getLoop()->contains(L)) { // To evaluate this recurrence, we need to know how many times the AddRec // loop iterates. Compute this now. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); @@ -4447,6 +4486,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Then, evaluate the AddRec. return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); } + return AddRec; } @@ -4696,23 +4736,6 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { return getCouldNotCompute(); } -/// getLoopPredecessor - If the given loop's header has exactly one unique -/// predecessor outside the loop, return it. Otherwise return null. -/// This is less strict that the loop "preheader" concept, which requires -/// the predecessor to have only one single successor. -/// -BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) { - BasicBlock *Header = L->getHeader(); - BasicBlock *Pred = 0; - for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); - PI != E; ++PI) - if (!L->contains(*PI)) { - if (Pred && Pred != *PI) return 0; // Multiple predecessors. - Pred = *PI; - } - return Pred; -} - /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB /// (which may not be an immediate predecessor) which has exactly one /// successor from which BB is reachable, or null if no such block is @@ -4730,7 +4753,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { // If the header has a unique predecessor outside the loop, it must be // a block that has exactly one successor that can reach the loop. if (Loop *L = LI->getLoopFor(BB)) - return std::make_pair(getLoopPredecessor(L), L->getHeader()); + return std::make_pair(L->getLoopPredecessor(), L->getHeader()); return std::pair<BasicBlock *, BasicBlock *>(); } @@ -5181,7 +5204,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, // as there are predecessors that can be found that have unique successors // leading to the original header. for (std::pair<BasicBlock *, BasicBlock *> - Pair(getLoopPredecessor(L), L->getHeader()); + Pair(L->getLoopPredecessor(), L->getHeader()); Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 17b254f..58711b8 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -12,7 +12,7 @@ // // This differs from traditional loop dependence analysis in that it tests // for dependencies within a single iteration of a loop, rather than -// dependences between different iterations. +// dependencies between different iterations. // // ScalarEvolution has a more complete understanding of pointer arithmetic // than BasicAliasAnalysis' collection of ad-hoc analyses. @@ -106,6 +106,12 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { AliasAnalysis::AliasResult ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize, const Value *B, unsigned BSize) { + // If either of the memory references is empty, it doesn't matter what the + // pointer values are. This allows the code below to ignore this special + // case. + if (ASize == 0 || BSize == 0) + return NoAlias; + // This is ScalarEvolutionAliasAnalysis. Get the SCEVs! const SCEV *AS = SE->getSCEV(const_cast<Value *>(A)); const SCEV *BS = SE->getSCEV(const_cast<Value *>(B)); @@ -118,14 +124,32 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize, if (SE->getEffectiveSCEVType(AS->getType()) == SE->getEffectiveSCEVType(BS->getType())) { unsigned BitWidth = SE->getTypeSizeInBits(AS->getType()); - APInt AI(BitWidth, ASize); + APInt ASizeInt(BitWidth, ASize); + APInt BSizeInt(BitWidth, BSize); + + // Compute the difference between the two pointers. const SCEV *BA = SE->getMinusSCEV(BS, AS); - if (AI.ule(SE->getUnsignedRange(BA).getUnsignedMin())) { - APInt BI(BitWidth, BSize); - const SCEV *AB = SE->getMinusSCEV(AS, BS); - if (BI.ule(SE->getUnsignedRange(AB).getUnsignedMin())) - return NoAlias; - } + + // Test whether the difference is known to be great enough that memory of + // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt + // are non-zero, which is special-cased above. + if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) && + (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax())) + return NoAlias; + + // Folding the subtraction while preserving range information can be tricky + // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS + // and try again to see if things fold better that way. + + // Compute the difference between the two pointers. + const SCEV *AB = SE->getMinusSCEV(AS, BS); + + // Test whether the difference is known to be great enough that memory of + // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt + // are non-zero, which is special-cased above. + if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) && + (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax())) + return NoAlias; } // If ScalarEvolution can find an underlying object, form a new query. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 0012b84..d4a4b26 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -21,6 +21,43 @@ #include "llvm/ADT/STLExtras.h" using namespace llvm; +/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, +/// reusing an existing cast if a suitable one exists, moving an existing +/// cast if a suitable one exists but isn't in the right place, or +/// creating a new one. +Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty, + Instruction::CastOps Op, + BasicBlock::iterator IP) { + // Check to see if there is already a cast! + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + User *U = *UI; + if (U->getType() == Ty) + if (CastInst *CI = dyn_cast<CastInst>(U)) + if (CI->getOpcode() == Op) { + // If the cast isn't where we want it, fix it. + if (BasicBlock::iterator(CI) != IP) { + // Create a new cast, and leave the old cast in place in case + // it is being used as an insert point. Clear its operand + // so that it doesn't hold anything live. + Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP); + NewCI->takeName(CI); + CI->replaceAllUsesWith(NewCI); + CI->setOperand(0, UndefValue::get(V->getType())); + rememberInstruction(NewCI); + return NewCI; + } + rememberInstruction(CI); + return CI; + } + } + + // Create a new cast. + Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP); + rememberInstruction(I); + return I; +} + /// InsertNoopCastOfTo - Insert a cast of V to the specified type, /// which must be possible with a noop cast, doing what we can to share /// the casts. @@ -54,71 +91,29 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { return CE->getOperand(0); } + // Fold a cast of a constant. if (Constant *C = dyn_cast<Constant>(V)) return ConstantExpr::getCast(Op, C, Ty); + // Cast the argument at the beginning of the entry block, after + // any bitcasts of other arguments. if (Argument *A = dyn_cast<Argument>(V)) { - // Check to see if there is already a cast! - for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) - if ((*UI)->getType() == Ty) - if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI))) - if (CI->getOpcode() == Op) { - // If the cast isn't the first instruction of the function, move it. - if (BasicBlock::iterator(CI) != - A->getParent()->getEntryBlock().begin()) { - // Recreate the cast at the beginning of the entry block. - // The old cast is left in place in case it is being used - // as an insert point. - Instruction *NewCI = - CastInst::Create(Op, V, Ty, "", - A->getParent()->getEntryBlock().begin()); - NewCI->takeName(CI); - CI->replaceAllUsesWith(NewCI); - return NewCI; - } - return CI; - } - - Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), - A->getParent()->getEntryBlock().begin()); - rememberInstruction(I); - return I; + BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); + while ((isa<BitCastInst>(IP) && + isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && + cast<BitCastInst>(IP)->getOperand(0) != A) || + isa<DbgInfoIntrinsic>(IP)) + ++IP; + return ReuseOrCreateCast(A, Ty, Op, IP); } + // Cast the instruction immediately after the instruction. Instruction *I = cast<Instruction>(V); - - // Check to see if there is already a cast. If there is, use it. - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - if ((*UI)->getType() == Ty) - if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI))) - if (CI->getOpcode() == Op) { - BasicBlock::iterator It = I; ++It; - if (isa<InvokeInst>(I)) - It = cast<InvokeInst>(I)->getNormalDest()->begin(); - while (isa<PHINode>(It)) ++It; - if (It != BasicBlock::iterator(CI)) { - // Recreate the cast after the user. - // The old cast is left in place in case it is being used - // as an insert point. - Instruction *NewCI = CastInst::Create(Op, V, Ty, "", It); - NewCI->takeName(CI); - CI->replaceAllUsesWith(NewCI); - rememberInstruction(NewCI); - return NewCI; - } - rememberInstruction(CI); - return CI; - } - } BasicBlock::iterator IP = I; ++IP; if (InvokeInst *II = dyn_cast<InvokeInst>(I)) IP = II->getNormalDest()->begin(); - while (isa<PHINode>(IP)) ++IP; - Instruction *CI = CastInst::Create(Op, V, Ty, V->getName(), IP); - rememberInstruction(CI); - return CI; + while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP; + return ReuseOrCreateCast(I, Ty, Op, IP); } /// InsertBinop - Insert the specified binary operator, doing a small amount @@ -295,11 +290,11 @@ static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, // the sum into a single value, so just use that. Ops.clear(); if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) - Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); + Ops.append(Add->op_begin(), Add->op_end()); else if (!Sum->isZero()) Ops.push_back(Sum); // Then append the addrecs. - Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end()); + Ops.append(AddRecs.begin(), AddRecs.end()); } /// SplitAddRecs - Flatten a list of add operands, moving addrec start values @@ -322,7 +317,7 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, A->getLoop())); if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { Ops[i] = Zero; - Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); + Ops.append(Add->op_begin(), Add->op_end()); e += Add->getNumOperands(); } else { Ops[i] = Start; @@ -330,7 +325,7 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, } if (!AddRecs.empty()) { // Add the addrecs onto the end of the list. - Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end()); + Ops.append(AddRecs.begin(), AddRecs.end()); // Resort the operand list, moving any constants to the front. SimplifyAddOperands(Ops, Ty, SE); } @@ -1070,7 +1065,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); BasicBlock::iterator NewInsertPt = llvm::next(BasicBlock::iterator(cast<Instruction>(V))); - while (isa<PHINode>(NewInsertPt)) ++NewInsertPt; + while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt)) + ++NewInsertPt; V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, NewInsertPt); restoreInsertPoint(SaveInsertBB, SaveInsertPt); @@ -1107,8 +1103,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { } // {0,+,1} --> Insert a canonical induction variable into the loop! - if (S->isAffine() && - S->getOperand(1) == SE.getConstant(Ty, 1)) { + if (S->isAffine() && S->getOperand(1)->isOne()) { // If there's a canonical IV, just use it. if (CanonicalIV) { assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && @@ -1125,17 +1120,19 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Constant *One = ConstantInt::get(Ty, 1); for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); - HPI != HPE; ++HPI) - if (L->contains(*HPI)) { + HPI != HPE; ++HPI) { + BasicBlock *HP = *HPI; + if (L->contains(HP)) { // Insert a unit add instruction right before the terminator // corresponding to the back-edge. Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next", - (*HPI)->getTerminator()); + HP->getTerminator()); rememberInstruction(Add); - PN->addIncoming(Add, *HPI); + PN->addIncoming(Add, HP); } else { - PN->addIncoming(Constant::getNullValue(Ty), *HPI); + PN->addIncoming(Constant::getNullValue(Ty), HP); } + } } // {0,+,F} --> {0,+,1} * F @@ -1312,7 +1309,9 @@ Value *SCEVExpander::expand(const SCEV *S) { } void SCEVExpander::rememberInstruction(Value *I) { - if (PostIncLoops.empty()) + if (!PostIncLoops.empty()) + InsertedPostIncValues.insert(I); + else InsertedValues.insert(I); // If we just claimed an existing instruction and that instruction had diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index 75c381d..563fd2f 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -105,22 +105,25 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, case NormalizeAutodetect: if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace)) if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) { - Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); + const SCEV *TransformedStep = + TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), + User, OperandValToReplace, Loops, SE, DT); + Result = SE.getMinusSCEV(Result, TransformedStep); Loops.insert(L); } break; case Normalize: - if (Loops.count(L)) - Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); - break; - case Denormalize: if (Loops.count(L)) { const SCEV *TransformedStep = TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), User, OperandValToReplace, Loops, SE, DT); - Result = SE.getAddExpr(Result, TransformedStep); + Result = SE.getMinusSCEV(Result, TransformedStep); } break; + case Denormalize: + if (Loops.count(L)) + Result = SE.getAddExpr(Result, AR->getStepRecurrence(SE)); + break; } return Result; } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 7e8ec2e..b4c9884 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -953,7 +953,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) // sqrt(-0.0) = -0.0, no other negative results are possible. if (II->getIntrinsicID() == Intrinsic::sqrt) - return CannotBeNegativeZero(II->getOperand(1), Depth+1); + return CannotBeNegativeZero(II->getArgOperand(0), Depth+1); if (const CallInst *CI = dyn_cast<CallInst>(I)) if (const Function *F = CI->getCalledFunction()) { @@ -966,7 +966,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (F->getName() == "fabsl") return true; if (F->getName() == "sqrt" || F->getName() == "sqrtf" || F->getName() == "sqrtl") - return CannotBeNegativeZero(CI->getOperand(1), Depth+1); + return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1); } } diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index 21d4f65..7eeeb59 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -366,8 +366,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // Check for errors opening or creating archive file. if (!ArchiveFile.is_open() || ArchiveFile.bad()) { - if (TmpArchive.exists()) - TmpArchive.eraseFromDisk(); + TmpArchive.eraseFromDisk(); if (ErrMsg) *ErrMsg = "Error opening archive file: " + archPath.str(); return true; @@ -387,8 +386,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { if (writeMember(*I, ArchiveFile, CreateSymbolTable, TruncateNames, Compress, ErrMsg)) { - if (TmpArchive.exists()) - TmpArchive.eraseFromDisk(); + TmpArchive.eraseFromDisk(); ArchiveFile.close(); return true; } @@ -420,8 +418,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, std::ofstream FinalFile(FinalFilePath.c_str(), io_mode); if (!FinalFile.is_open() || FinalFile.bad()) { - if (TmpArchive.exists()) - TmpArchive.eraseFromDisk(); + TmpArchive.eraseFromDisk(); if (ErrMsg) *ErrMsg = "Error opening archive file: " + FinalFilePath.str(); return true; @@ -438,8 +435,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, if (foreignST) { if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) { FinalFile.close(); - if (TmpArchive.exists()) - TmpArchive.eraseFromDisk(); + TmpArchive.eraseFromDisk(); return true; } } diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 9b4370f..f4c0e50 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -492,6 +492,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(private); KEYWORD(linker_private); + KEYWORD(linker_private_weak); KEYWORD(internal); KEYWORD(available_externally); KEYWORD(linkonce); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 226d8d3..6752181 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -196,19 +196,20 @@ bool LLParser::ParseTopLevelEntities() { // optional leading prefixes, the production is: // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal // OptionalAddrSpace ('constant'|'global') ... - case lltok::kw_private : // OptionalLinkage - case lltok::kw_linker_private: // OptionalLinkage - case lltok::kw_internal: // OptionalLinkage - case lltok::kw_weak: // OptionalLinkage - case lltok::kw_weak_odr: // OptionalLinkage - case lltok::kw_linkonce: // OptionalLinkage - case lltok::kw_linkonce_odr: // OptionalLinkage - case lltok::kw_appending: // OptionalLinkage - case lltok::kw_dllexport: // OptionalLinkage - case lltok::kw_common: // OptionalLinkage - case lltok::kw_dllimport: // OptionalLinkage - case lltok::kw_extern_weak: // OptionalLinkage - case lltok::kw_external: { // OptionalLinkage + case lltok::kw_private: // OptionalLinkage + case lltok::kw_linker_private: // OptionalLinkage + case lltok::kw_linker_private_weak: // OptionalLinkage + case lltok::kw_internal: // OptionalLinkage + case lltok::kw_weak: // OptionalLinkage + case lltok::kw_weak_odr: // OptionalLinkage + case lltok::kw_linkonce: // OptionalLinkage + case lltok::kw_linkonce_odr: // OptionalLinkage + case lltok::kw_appending: // OptionalLinkage + case lltok::kw_dllexport: // OptionalLinkage + case lltok::kw_common: // OptionalLinkage + case lltok::kw_dllimport: // OptionalLinkage + case lltok::kw_extern_weak: // OptionalLinkage + case lltok::kw_external: { // OptionalLinkage unsigned Linkage, Visibility; if (ParseOptionalLinkage(Linkage) || ParseOptionalVisibility(Visibility) || @@ -629,7 +630,8 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, Linkage != GlobalValue::WeakODRLinkage && Linkage != GlobalValue::InternalLinkage && Linkage != GlobalValue::PrivateLinkage && - Linkage != GlobalValue::LinkerPrivateLinkage) + Linkage != GlobalValue::LinkerPrivateLinkage && + Linkage != GlobalValue::LinkerPrivateWeakLinkage) return Error(LinkageLoc, "invalid linkage type for alias"); Constant *Aliasee; @@ -1013,11 +1015,13 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { /// ::= /*empty*/ /// ::= 'private' /// ::= 'linker_private' +/// ::= 'linker_private_weak' /// ::= 'internal' /// ::= 'weak' /// ::= 'weak_odr' /// ::= 'linkonce' /// ::= 'linkonce_odr' +/// ::= 'available_externally' /// ::= 'appending' /// ::= 'dllexport' /// ::= 'common' @@ -1030,6 +1034,9 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { default: Res=GlobalValue::ExternalLinkage; return false; case lltok::kw_private: Res = GlobalValue::PrivateLinkage; break; case lltok::kw_linker_private: Res = GlobalValue::LinkerPrivateLinkage; break; + case lltok::kw_linker_private_weak: + Res = GlobalValue::LinkerPrivateWeakLinkage; + break; case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break; case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break; case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break; @@ -2704,6 +2711,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { break; case GlobalValue::PrivateLinkage: case GlobalValue::LinkerPrivateLinkage: + case GlobalValue::LinkerPrivateWeakLinkage: case GlobalValue::InternalLinkage: case GlobalValue::AvailableExternallyLinkage: case GlobalValue::LinkOnceAnyLinkage: @@ -3791,8 +3799,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, } } - if (Size && !Size->getType()->isIntegerTy(32)) - return Error(SizeLoc, "element count must be i32"); + if (Size && !Size->getType()->isIntegerTy()) + return Error(SizeLoc, "element count must have integer type"); if (isAlloca) { Inst = new AllocaInst(Ty, Size, Alignment); @@ -3801,6 +3809,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, // Autoupgrade old malloc instruction to malloc call. // FIXME: Remove in LLVM 3.0. + if (Size && !Size->getType()->isIntegerTy(32)) + return Error(SizeLoc, "element count must be i32"); const Type *IntPtrTy = Type::getInt32Ty(Context); Constant *AllocSize = ConstantExpr::getSizeOf(Ty); AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, IntPtrTy); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 5eed170..2703134 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -37,9 +37,9 @@ namespace lltok { kw_declare, kw_define, kw_global, kw_constant, - kw_private, kw_linker_private, kw_internal, kw_linkonce, kw_linkonce_odr, - kw_weak, kw_weak_odr, kw_appending, kw_dllimport, kw_dllexport, kw_common, - kw_available_externally, + kw_private, kw_linker_private, kw_linker_private_weak, kw_internal, + kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending, + kw_dllimport, kw_dllexport, kw_common, kw_available_externally, kw_default, kw_hidden, kw_protected, kw_extern_weak, kw_external, kw_thread_local, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 69adead..527ae49 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -75,6 +75,7 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) { case 11: return GlobalValue::LinkOnceODRLinkage; case 12: return GlobalValue::AvailableExternallyLinkage; case 13: return GlobalValue::LinkerPrivateLinkage; + case 14: return GlobalValue::LinkerPrivateWeakLinkage; } } @@ -252,17 +253,18 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() { // at once. while (!Placeholder->use_empty()) { Value::use_iterator UI = Placeholder->use_begin(); + User *U = *UI; // If the using object isn't uniqued, just update the operands. This // handles instructions and initializers for global variables. - if (!isa<Constant>(*UI) || isa<GlobalValue>(*UI)) { + if (!isa<Constant>(U) || isa<GlobalValue>(U)) { UI.getUse().set(RealVal); continue; } // Otherwise, we have a constant that uses the placeholder. Replace that // constant with a new constant that has *all* placeholder uses updated. - Constant *UserC = cast<Constant>(*UI); + Constant *UserC = cast<Constant>(U); for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end(); I != E; ++I) { Value *NewOp; @@ -2178,13 +2180,18 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { InstructionList.push_back(I); break; } - case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, op, align] - if (Record.size() < 3) + case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align] + // For backward compatibility, tolerate a lack of an opty, and use i32. + // LLVM 3.0: Remove this. + if (Record.size() < 3 || Record.size() > 4) return Error("Invalid ALLOCA record"); + unsigned OpNum = 0; const PointerType *Ty = - dyn_cast_or_null<PointerType>(getTypeByID(Record[0])); - Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context)); - unsigned Align = Record[2]; + dyn_cast_or_null<PointerType>(getTypeByID(Record[OpNum++])); + const Type *OpTy = Record.size() == 4 ? getTypeByID(Record[OpNum++]) : + Type::getInt32Ty(Context); + Value *Size = getFnValueByID(Record[OpNum++], OpTy); + unsigned Align = Record[OpNum++]; if (!Ty || !Size) return Error("Invalid ALLOCA record"); I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1); InstructionList.push_back(I); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 9bda6dc..fa1b2c4 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -313,6 +313,7 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) { case GlobalValue::LinkOnceODRLinkage: return 11; case GlobalValue::AvailableExternallyLinkage: return 12; case GlobalValue::LinkerPrivateLinkage: return 13; + case GlobalValue::LinkerPrivateWeakLinkage: return 14; } } @@ -577,10 +578,9 @@ static void WriteFunctionLocalMetadata(const Function &F, BitstreamWriter &Stream) { bool StartedMetadataBlock = false; SmallVector<uint64_t, 64> Record; - const ValueEnumerator::ValueList &Vals = VE.getMDValues(); - + const SmallVector<const MDNode *, 8> &Vals = VE.getFunctionLocalMDValues(); for (unsigned i = 0, e = Vals.size(); i != e; ++i) - if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) + if (const MDNode *N = Vals[i]) if (N->isFunctionLocal() && N->getFunction() == &F) { if (!StartedMetadataBlock) { Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); @@ -588,7 +588,7 @@ static void WriteFunctionLocalMetadata(const Function &F, } WriteMDNode(N, VE, Stream, Record); } - + if (StartedMetadataBlock) Stream.ExitBlock(); } @@ -1114,6 +1114,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::Alloca: Code = bitc::FUNC_CODE_INST_ALLOCA; Vals.push_back(VE.getTypeID(I.getType())); + Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); Vals.push_back(VE.getValueID(I.getOperand(0))); // size. Vals.push_back(Log2_32(cast<AllocaInst>(I).getAlignment())+1); break; @@ -1134,26 +1135,25 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals.push_back(cast<StoreInst>(I).isVolatile()); break; case Instruction::Call: { - const PointerType *PTy = cast<PointerType>(I.getOperand(0)->getType()); + const CallInst &CI = cast<CallInst>(I); + const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType()); const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); Code = bitc::FUNC_CODE_INST_CALL; - const CallInst *CI = cast<CallInst>(&I); - Vals.push_back(VE.getAttributeID(CI->getAttributes())); - Vals.push_back((CI->getCallingConv() << 1) | unsigned(CI->isTailCall())); - PushValueAndType(CI->getOperand(0), InstID, Vals, VE); // Callee + Vals.push_back(VE.getAttributeID(CI.getAttributes())); + Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall())); + PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - Vals.push_back(VE.getValueID(I.getOperand(i+1))); // fixed param. + Vals.push_back(VE.getValueID(CI.getArgOperand(i))); // fixed param. // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { - unsigned NumVarargs = I.getNumOperands()-1-FTy->getNumParams(); - for (unsigned i = I.getNumOperands()-NumVarargs, e = I.getNumOperands(); + for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands(); i != e; ++i) - PushValueAndType(I.getOperand(i), InstID, Vals, VE); // varargs + PushValueAndType(CI.getArgOperand(i), InstID, Vals, VE); // varargs } break; } @@ -1662,15 +1662,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) { WriteBitcodeToStream( M, Stream ); - // If writing to stdout, set binary mode. - if (&llvm::outs() == &Out) - sys::Program::ChangeStdoutToBinary(); - // Write the generated bitstream to "Out". Out.write((char*)&Buffer.front(), Buffer.size()); - - // Make sure it hits disk now. - Out.flush(); } /// WriteBitcodeToStream - Write the specified module to the specified output diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index d2baec7..7fa425a 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -72,7 +72,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) { // Enumerate types used by the type symbol table. EnumerateTypeSymbolTable(M->getTypeSymbolTable()); - // Insert constants and metadata that are named at module level into the slot + // Insert constants and metadata that are named at module level into the slot // pool so that the module symbol table can refer to them... EnumerateValueSymbolTable(M->getValueSymbolTable()); EnumerateMDSymbolTable(M->getMDSymbolTable()); @@ -257,6 +257,8 @@ void ValueEnumerator::EnumerateMetadata(const Value *MD) { else EnumerateType(Type::getVoidTy(MD->getContext())); } + if (N->isFunctionLocal() && N->getFunction()) + FunctionLocalMDs.push_back(N); return; } @@ -414,7 +416,8 @@ void ValueEnumerator::incorporateFunction(const Function &F) { FirstInstID = Values.size(); - SmallVector<MDNode *, 8> FunctionLocalMDs; + FunctionLocalMDs.clear(); + SmallVector<MDNode *, 8> FnLocalMDVector; // Add all of the instructions. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { @@ -423,7 +426,7 @@ void ValueEnumerator::incorporateFunction(const Function &F) { if (MDNode *MD = dyn_cast<MDNode>(*OI)) if (MD->isFunctionLocal() && MD->getFunction()) // Enumerate metadata after the instructions they might refer to. - FunctionLocalMDs.push_back(MD); + FnLocalMDVector.push_back(MD); } if (!I->getType()->isVoidTy()) EnumerateValue(I); @@ -431,8 +434,8 @@ void ValueEnumerator::incorporateFunction(const Function &F) { } // Add all of the function-local metadata. - for (unsigned i = 0, e = FunctionLocalMDs.size(); i != e; ++i) - EnumerateOperandType(FunctionLocalMDs[i]); + for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) + EnumerateOperandType(FnLocalMDVector[i]); } void ValueEnumerator::purgeFunction() { diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 4f8ebf5..2b9b15f 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -15,6 +15,7 @@ #define VALUE_ENUMERATOR_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Attributes.h" #include <vector> @@ -26,7 +27,7 @@ class Instruction; class BasicBlock; class Function; class Module; -class MetadataBase; +class MDNode; class NamedMDNode; class AttrListPtr; class TypeSymbolTable; @@ -49,6 +50,7 @@ private: ValueMapType ValueMap; ValueList Values; ValueList MDValues; + SmallVector<const MDNode *, 8> FunctionLocalMDs; ValueMapType MDValueMap; typedef DenseMap<void*, unsigned> AttributeMapType; @@ -105,6 +107,9 @@ public: const ValueList &getValues() const { return Values; } const ValueList &getMDValues() const { return MDValues; } + const SmallVector<const MDNode *, 8> &getFunctionLocalMDValues() const { + return FunctionLocalMDs; + } const TypeList &getTypes() const { return Types; } const std::vector<const BasicBlock*> &getBasicBlocks() const { return BasicBlocks; diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 4008a6a..a7189ac 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -114,6 +115,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi, TargetSubtarget::RegClassVector& CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), AllocatableSet(TRI->getAllocatableSet(MF)), State(NULL) { @@ -163,25 +165,27 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } - } else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + } + + // In a non-return block, examine the live-in regs of all successors. + // Note a return block can have successors if the return instruction is + // predicated. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - State->UnionGroups(Reg, 0); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - State->UnionGroups(AliasReg, 0); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; - } + unsigned Reg = *I; + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + State->UnionGroups(AliasReg, 0); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; } - } + } // Mark live-out callee-saved registers. In a return block this is // all callee-saved registers. In non-return this is any @@ -390,7 +394,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq()) { + if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -443,6 +448,26 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); + // If MI's uses have special allocation requirement, don't allow + // any use registers to be changed. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->getDesc().isCall() || + MI->getDesc().hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + // Scan the register uses for this instruction and update // live-ranges, groups and RegRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -459,10 +484,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // for the register. HandleLastUse(Reg, Count, "(last-use)"); - // If MI's uses have special allocation requirement, don't allow - // any use registers to be changed. Also assume all registers - // used in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraSrcRegAllocReq()) { + if (Special) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -604,8 +626,12 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // order. If that register is available, and the corresponding // registers are available for the other group subregisters, then we // can use those registers to rename. + + // FIXME: Using getMinimalPhysRegClass is very conservative. We should + // check every use of the register and find the largest register class + // that can be used in all of them. const TargetRegisterClass *SuperRC = - TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other); + TRI->getMinimalPhysRegClass(SuperReg, MVT::Other); const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF); const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF); @@ -905,6 +931,19 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) { Q->second.Operand->setReg(NewReg); + // If the SU for the instruction being updated has debug + // information related to the anti-dependency register, make + // sure to update that as well. + const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()]; + if (!SU) continue; + for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) { + MachineInstr *DI = SU->DbgInstrList[i]; + assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() + && "Non register dbg_value attached to SUnit!"); + if (DI->getOperand(0).getReg() == AntiDepReg) + DI->getOperand(0).setReg(NewReg); + } } // We just went back in time and modified history; the diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 506d43e..91ebb85 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -115,6 +115,7 @@ namespace llvm { class AggressiveAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; /// AllocatableSet - The set of allocatable registers. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 5a0c27b..d9387a8 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -199,7 +199,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { case GlobalValue::LinkOnceODRLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: - case GlobalValue::LinkerPrivateLinkage: + case GlobalValue::LinkerPrivateWeakLinkage: if (MAI->getWeakDefDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); @@ -225,6 +225,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { break; case GlobalValue::PrivateLinkage: case GlobalValue::InternalLinkage: + case GlobalValue::LinkerPrivateLinkage: break; default: llvm_unreachable("Unknown linkage type!"); @@ -330,7 +331,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { else if (GVKind.isThreadData()) { OutStreamer.SwitchSection(TheSection); - EmitLinkage(GV->getLinkage(), MangSym); EmitAlignment(AlignLog, GV); OutStreamer.EmitLabel(MangSym); @@ -353,7 +353,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer unsigned PtrSize = TD->getPointerSizeInBits()/8; - OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__tlv_bootstrap"), + OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), PtrSize, 0); OutStreamer.EmitIntValue(0, PtrSize, 0); OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0); @@ -428,20 +428,12 @@ void AsmPrinter::EmitFunctionHeader() { // Emit pre-function debug and/or EH information. if (DE) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(EHTimerName, DWARFGroupName); - DE->BeginFunction(MF); - } else { - DE->BeginFunction(MF); - } + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->BeginFunction(MF); } if (DD) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->beginFunction(MF); - } else { - DD->beginFunction(MF); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->beginFunction(MF); } } @@ -458,14 +450,11 @@ void AsmPrinter::EmitFunctionEntryLabel() { } -/// EmitComments - Pretty-print comments for instructions. -static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { - const MachineFunction *MF = MI.getParent()->getParent(); - const TargetMachine &TM = MF->getTarget(); - - DebugLoc DL = MI.getDebugLoc(); +static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF, + raw_ostream &CommentOS) { + const LLVMContext &Ctx = MF->getFunction()->getContext(); if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(MF->getFunction()->getContext())); + DIScope Scope(DL.getScope(Ctx)); // Omit the directory, because it's likely to be long and uninteresting. if (Scope.Verify()) CommentOS << Scope.getFilename(); @@ -474,6 +463,23 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { CommentOS << ':' << DL.getLine(); if (DL.getCol() != 0) CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << "[ "; + EmitDebugLoc(InlinedAtDL, MF, CommentOS); + CommentOS << " ]"; + } + } +} + +/// EmitComments - Pretty-print comments for instructions. +static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { + const MachineFunction *MF = MI.getParent()->getParent(); + const TargetMachine &TM = MF->getTarget(); + + DebugLoc DL = MI.getDebugLoc(); + if (!DL.isUnknown()) { // Print source line info. + EmitDebugLoc(DL, MF, CommentOS); CommentOS << '\n'; } @@ -611,12 +617,8 @@ void AsmPrinter::EmitFunctionBody() { } if (ShouldPrintDebugScopes) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->beginScope(II); - } else { - DD->beginScope(II); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->beginScope(II); } if (isVerbose()) @@ -649,12 +651,8 @@ void AsmPrinter::EmitFunctionBody() { } if (ShouldPrintDebugScopes) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->endScope(II); - } else { - DD->endScope(II); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endScope(II); } } } @@ -692,20 +690,12 @@ void AsmPrinter::EmitFunctionBody() { // Emit post-function debug information. if (DD) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->endFunction(MF); - } else { - DD->endFunction(MF); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endFunction(MF); } if (DE) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(EHTimerName, DWARFGroupName); - DE->EndFunction(); - } else { - DE->EndFunction(); - } + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->EndFunction(); } MMI->EndFunction(); @@ -730,19 +720,15 @@ bool AsmPrinter::doFinalization(Module &M) { // Finalize debug and EH information. if (DE) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(EHTimerName, DWARFGroupName); - DE->EndModule(); - } else { + { + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); DE->EndModule(); } delete DE; DE = 0; } if (DD) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->endModule(); - } else { + { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->endModule(); } delete DD; DD = 0; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index ba6fed2..f6f3bae 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -83,7 +83,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { // Tell SrcMgr about this buffer, it takes ownership of the buffer. SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); - AsmParser Parser(SrcMgr, OutContext, OutStreamer, *MAI); + AsmParser Parser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI); OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" @@ -279,7 +279,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // Okay, we finally have a value number. Ask the target to print this // operand! if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { - unsigned OpNo = 1; + unsigned OpNo = 2; bool Error = false; diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index b2c70d5..21396ca 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -201,6 +201,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_data8: Size = 8; break; case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; + case dwarf::DW_FORM_addr: Size = Asm->getTargetData().getPointerSize(); break; default: llvm_unreachable("DIE Value form not supported yet"); } Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/); @@ -221,6 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_data8: return sizeof(int64_t); case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); + case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize(); default: llvm_unreachable("DIE Value form not supported yet"); break; } return 0; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 890507c..65c1d19 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -44,7 +44,8 @@ using namespace llvm; static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden, cl::desc("Print DbgScope information for each machine instruction")); -static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, +static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", + cl::Hidden, cl::desc("Disable debug info printing")); static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, @@ -79,15 +80,13 @@ class CompileUnit { /// IndexTyDie - An anonymous type for index type. Owned by CUDie. DIE *IndexTyDie; - /// GVToDieMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton /// variables to debug information entries. - /// FIXME : Rename GVToDieMap -> NodeToDieMap - DenseMap<const MDNode *, DIE *> GVToDieMap; + DenseMap<const MDNode *, DIE *> MDNodeToDieMap; - /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton /// descriptors to debug information entries using a DIEEntry proxy. - /// FIXME : Rename - DenseMap<const MDNode *, DIEEntry *> GVToDIEEntryMap; + DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; /// Globals - A map of globally visible named entities for this unit. /// @@ -123,25 +122,25 @@ public: /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. - DIE *getDIE(const MDNode *N) { return GVToDieMap.lookup(N); } + DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } /// insertDIE - Insert DIE into the map. void insertDIE(const MDNode *N, DIE *D) { - GVToDieMap.insert(std::make_pair(N, D)); + MDNodeToDieMap.insert(std::make_pair(N, D)); } /// getDIEEntry - Returns the debug information entry for the speciefied /// debug variable. DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap<const MDNode *, DIEEntry *>::iterator I = GVToDIEEntryMap.find(N); - if (I == GVToDIEEntryMap.end()) + DenseMap<const MDNode *, DIEEntry *>::iterator I = MDNodeToDIEEntryMap.find(N); + if (I == MDNodeToDIEEntryMap.end()) return NULL; return I->second; } /// insertDIEEntry - Insert debug information entry into the map. void insertDIEEntry(const MDNode *N, DIEEntry *E) { - GVToDIEEntryMap.insert(std::make_pair(N, E)); + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); } /// addDie - Adds or interns the DIE to the compile unit. @@ -321,12 +320,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; + DwarfDebugLineSectionSym = CurrentLineSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - beginModule(M); - } else { - beginModule(M); + DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); + { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + beginModule(M); } } DwarfDebug::~DwarfDebug() { @@ -378,7 +377,8 @@ DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); + DIEValue *Value = Integer == 1 ? + DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); Die->addValue(Attribute, Form, Value); } @@ -866,6 +866,10 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { } else if (Context.isNameSpace()) { DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); ContextDIE->addChild(Die); + } else if (Context.isSubprogram()) { + DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context), + /*MakeDecl=*/false); + ContextDIE->addChild(Die); } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) ContextDIE->addChild(Die); else @@ -1055,6 +1059,10 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (DIDescriptor(ContainingType).isCompositeType()) addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, getOrCreateTypeDIE(DIType(ContainingType))); + else { + DIDescriptor Context = CTy.getContext(); + addToContextOwner(&Buffer, Context); + } break; } default: @@ -1065,8 +1073,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type + || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + { // Add size if non-zero (derived types might be zero-sized.) if (Size) addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); @@ -1329,6 +1338,9 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { // DW_TAG_inlined_subroutine may refer to this DIE. SPCU->insertDIE(SP, SPDie); + // Add to context owner. + addToContextOwner(SPDie, SP.getContext()); + return SPDie; } @@ -1379,6 +1391,7 @@ static bool isSubprogramContext(const MDNode *Context) { DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { CompileUnit *SPCU = getCompileUnit(SPNode); DIE *SPDie = SPCU->getDIE(SPNode); + assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); @@ -1412,6 +1425,14 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { SPCU->addDie(SPDie); } + // Pick up abstract subprogram DIE. + if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + SPDie = new DIE(dwarf::DW_TAG_subprogram); + addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsSPDIE); + SPCU->addDie(SPDie); + } + addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, @@ -1483,7 +1504,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); - if (StartLabel == FunctionBeginSym || EndLabel == 0) { + if (StartLabel == 0 || EndLabel == 0) { assert (0 && "Unexpected Start and End labels for a inlined scope!"); return 0; } @@ -1605,11 +1626,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // FIXME : Handle getNumOperands != 3 if (DVInsn->getNumOperands() == 3) { if (DVInsn->getOperand(0).isReg()) - updated = addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0)); + updated = + addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isImm()) updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isFPImm()) - updated = addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0)); + updated = + addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0)); } else { MachineLocation Location = Asm->getDebugValueLocation(DVInsn); if (Location.getReg()) { @@ -1682,8 +1705,13 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { if (Scope->getInlinedAt()) ScopeDIE = constructInlinedScopeDIE(Scope); else if (DS.isSubprogram()) { - if (Scope->isAbstractScope()) + ProcessedSPNodes.insert(DS); + if (Scope->isAbstractScope()) { ScopeDIE = getCompileUnit(DS)->getDIE(DS); + // Note down abstract DIE. + if (ScopeDIE) + AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); + } else ScopeDIE = updateSubprogramScopeDIE(DS); } @@ -1782,11 +1810,11 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This // simplifies debug range entries. - addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_data4, 0); + addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. It is always zero when only one - // compile unit is emitted in one object file. - addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + // compile unit in debug_line section. This offset is calculated + // during endMoudle(). + addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); if (!Dir.empty()) addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); @@ -1996,6 +2024,40 @@ void DwarfDebug::beginModule(Module *M) { /// void DwarfDebug::endModule() { if (!FirstCU) return; + const Module *M = MMI->getModule(); + if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) { + for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) { + if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue; + DISubprogram SP(AllSPs->getOperand(SI)); + if (!SP.Verify()) continue; + + // Collect info for variables that were optimized out. + StringRef FName = SP.getLinkageName(); + if (FName.empty()) + FName = SP.getName(); + NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName))); + if (!NMD) continue; + unsigned E = NMD->getNumOperands(); + if (!E) continue; + DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL); + for (unsigned I = 0; I != E; ++I) { + DIVariable DV(NMD->getOperand(I)); + if (!DV.Verify()) continue; + Scope->addVariable(new DbgVariable(DV)); + } + + // Construct subprogram DIE and add variables DIEs. + constructSubprogramDIE(SP); + DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP); + const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables(); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) { + DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); + if (VariableDIE) + ScopeDIE->addChild(VariableDIE); + } + } + } // Attach DW_AT_inline attribute with inlined subprogram DIEs. for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), @@ -2037,15 +2099,15 @@ void DwarfDebug::endModule() { // Compute DIE offsets and sizes. computeSizeAndOffsets(); + // Emit source line correspondence into a debug line section. + emitDebugLines(); + // Emit all the DIEs into a debug info section emitDebugInfo(); // Corresponding abbreviations into a abbrev section. emitAbbreviations(); - // Emit source line correspondence into a debug line section. - emitDebugLines(); - // Emit info into a debug pubnames section. emitDebugPubNames(); @@ -2150,8 +2212,9 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { } /// collectVariableInfo - Populate DbgScope entries with variables' info. -void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { - SmallPtrSet<const MDNode *, 16> Processed; +void +DwarfDebug::collectVariableInfo(const MachineFunction *MF, + SmallPtrSet<const MDNode *, 16> &Processed) { /// collection info from MMI table. collectVariableInfoFromMMITable(MF, Processed); @@ -2180,16 +2243,23 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { if (Processed.count(DV) != 0) continue; + const MachineInstr *PrevMI = MInsn; for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1, ME = DbgValues.end(); MI != ME; ++MI) { const MDNode *Var = (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata(); - if (Var == DV && isDbgValueInDefinedReg(*MI)) + if (Var == DV && isDbgValueInDefinedReg(*MI) && + !PrevMI->isIdenticalTo(*MI)) MultipleValues.push_back(*MI); + PrevMI = *MI; } DbgScope *Scope = findDbgScope(MInsn); - if (!Scope && DV.getTag() == dwarf::DW_TAG_arg_variable) + bool CurFnArg = false; + if (DV.getTag() == dwarf::DW_TAG_arg_variable && + DISubprogram(DV.getContext()).describes(MF->getFunction())) + CurFnArg = true; + if (!Scope && CurFnArg) Scope = CurrentFnDbgScope; // If variable scope is not found then skip this variable. if (!Scope) @@ -2198,7 +2268,7 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { Processed.insert(DV); DbgVariable *RegVar = new DbgVariable(DV); Scope->addVariable(RegVar); - if (DV.getTag() != dwarf::DW_TAG_arg_variable) + if (!CurFnArg) DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn); if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { DbgVariableToDbgInstMap[AbsVar] = MInsn; @@ -2217,7 +2287,8 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { const MachineInstr *Begin = NULL; const MachineInstr *End = NULL; for (SmallVector<const MachineInstr *, 4>::iterator - MVI = MultipleValues.begin(), MVE = MultipleValues.end(); MVI != MVE; ++MVI) { + MVI = MultipleValues.begin(), MVE = MultipleValues.end(); + MVI != MVE; ++MVI) { if (!Begin) { Begin = *MVI; continue; @@ -2241,8 +2312,11 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { } // Collect info for variables that were optimized out. + const Function *F = MF->getFunction(); + const Module *M = F->getParent(); if (NamedMDNode *NMD = - MF->getFunction()->getParent()->getNamedMetadata("llvm.dbg.lv")) { + M->getNamedMetadata(Twine("llvm.dbg.lv.", + getRealLinkageName(F->getName())))) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i))); if (!DV || !Processed.insert(DV)) @@ -2319,7 +2393,8 @@ void DwarfDebug::endScope(const MachineInstr *MI) { } /// getOrCreateDbgScope - Create DbgScope for the scope. -DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt) { +DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, + const MDNode *InlinedAt) { if (!InlinedAt) { DbgScope *WScope = DbgScopeMap.lookup(Scope); if (WScope) @@ -2335,13 +2410,20 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl if (!WScope->getParent()) { StringRef SPName = DISubprogram(Scope).getLinkageName(); - if (SPName == Asm->MF->getFunction()->getName()) + // We used to check only for a linkage name, but that fails + // since we began omitting the linkage name for private + // functions. The new way is to check for the name in metadata, + // but that's not supported in old .ll test cases. Ergo, we + // check both. + if (SPName == Asm->MF->getFunction()->getName() || + DISubprogram(Scope).getFunction() == Asm->MF->getFunction()) CurrentFnDbgScope = WScope; } return WScope; } + getOrCreateAbstractScope(Scope); DbgScope *WScope = DbgScopeMap.lookup(InlinedAt); if (WScope) return WScope; @@ -2355,7 +2437,6 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl Parent->addScope(WScope); ConcreteScopes[InlinedAt] = WScope; - getOrCreateAbstractScope(Scope); return WScope; } @@ -2365,8 +2446,6 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl static bool hasValidLocation(LLVMContext &Ctx, const MachineInstr *MInsn, const MDNode *&Scope, const MDNode *&InlinedAt) { - if (MInsn->isDebugValue()) - return false; DebugLoc DL = MInsn->getDebugLoc(); if (DL.isUnknown()) return false; @@ -2488,7 +2567,8 @@ bool DwarfDebug::extractScopeInformation() { // current instruction scope does not match scope of first instruction // in this range then create a new instruction range. DbgRange R(RangeBeginMI, PrevMI); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, + PrevInlinedAt); MIRanges.push_back(R); } @@ -2565,7 +2645,6 @@ void DwarfDebug::identifyScopeMarkers() { RE = Ranges.end(); RI != RE; ++RI) { assert(RI->first && "DbgRange does not have first instruction!"); assert(RI->second && "DbgRange does not have second instruction!"); - InsnsBeginScopeSet.insert(RI->first); InsnsEndScopeSet.insert(RI->second); } } @@ -2616,6 +2695,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { recordSourceLine(Line, Col, Scope); + /// ProcessedArgs - Collection of arguments already processed. + SmallPtrSet<const MDNode *, 8> ProcessedArgs; + DebugLoc PrevLoc; for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) @@ -2624,14 +2706,19 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { const MachineInstr *MI = II; DebugLoc DL = MI->getDebugLoc(); if (MI->isDebugValue()) { - // DBG_VALUE needs a label if the variable is local variable or - // an argument whose location is changing. assert (MI->getNumOperands() > 1 && "Invalid machine instruction!"); DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata()); if (!DV.Verify()) continue; - if (DV.getTag() != dwarf::DW_TAG_arg_variable) + // If DBG_VALUE is for a local variable then it needs a label. + if (DV.getTag() != dwarf::DW_TAG_arg_variable + && isDbgValueInUndefinedReg(MI) == false) InsnNeedsLabel.insert(MI); - else if (!ProcessedArgs.insert(DV)) + // DBG_VALUE for inlined functions argument needs a label. + else if (!DISubprogram(getDISubprogram(DV.getContext())). + describes(MF->getFunction())) + InsnNeedsLabel.insert(MI); + // DBG_VALUE indicating argument location change needs a label. + else if (isDbgValueInUndefinedReg(MI) == false && !ProcessedArgs.insert(DV)) InsnNeedsLabel.insert(MI); } else { // If location is unknown then instruction needs a location only if @@ -2664,7 +2751,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionEndSym); - collectVariableInfo(MF); + SmallPtrSet<const MDNode *, 16> ProcessedVars; + collectVariableInfo(MF, ProcessedVars); // Get function line info. if (!Lines.empty()) { @@ -2679,9 +2767,31 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Construct abstract scopes. for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(), - AE = AbstractScopesList.end(); AI != AE; ++AI) - constructScopeDIE(*AI); - + AE = AbstractScopesList.end(); AI != AE; ++AI) { + DISubprogram SP((*AI)->getScopeNode()); + if (SP.Verify()) { + // Collect info for variables that were optimized out. + StringRef FName = SP.getLinkageName(); + if (FName.empty()) + FName = SP.getName(); + const Module *M = MF->getFunction()->getParent(); + if (NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", + getRealLinkageName(FName)))) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i))); + if (!DV || !ProcessedVars.insert(DV)) + continue; + DbgScope *Scope = AbstractScopes.lookup(DV.getContext()); + if (Scope) + Scope->addVariable(new DbgVariable(DV)); + } + } + } + if (ProcessedSPNodes.count((*AI)->getScopeNode()) == 0) + constructScopeDIE(*AI); + } + DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); if (!DisableFramePointerElim(*MF)) @@ -2696,13 +2806,11 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Clear debug info CurrentFnDbgScope = NULL; InsnNeedsLabel.clear(); - ProcessedArgs.clear(); DbgVariableToFrameIndexMap.clear(); VarToAbstractVarMap.clear(); DbgVariableToDbgInstMap.clear(); DbgVariableLabelsMap.clear(); DeleteContainerSeconds(DbgScopeMap); - InsnsBeginScopeSet.clear(); InsnsEndScopeSet.clear(); ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); @@ -2764,7 +2872,8 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. -MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S) { +MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, + const MDNode *S) { StringRef Dir; StringRef Fn; @@ -2790,6 +2899,16 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode Src = GetOrCreateSourceID(Dir, Fn); } +#if 0 + if (!Lines.empty()) { + SrcLineInfo lastSrcLineInfo = Lines.back(); + // Emitting sequential line records with the same line number (but + // different addresses) seems to confuse GDB. Avoid this. + if (lastSrcLineInfo.getLine() == Line) + return NULL; + } +#endif + MCSymbol *Label = MMI->getContext().CreateTempSymbol(); Lines.push_back(SrcLineInfo(Line, Col, Src, Label)); @@ -2898,7 +3017,8 @@ void DwarfDebug::EmitSectionLabels() { if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) EmitSectionSym(Asm, MacroInfo); - EmitSectionSym(Asm, TLOF.getDwarfLineSection()); + DwarfDebugLineSectionSym = + EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); EmitSectionSym(Asm, TLOF.getDwarfLocSection()); EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); @@ -2961,6 +3081,11 @@ void DwarfDebug::emitDIE(DIE *Die) { 4); break; } + case dwarf::DW_AT_stmt_list: { + Asm->EmitLabelDifference(CurrentLineSectionSym, + DwarfDebugLineSectionSym, 4); + break; + } case dwarf::DW_AT_location: { if (UseDotDebugLocEntry.count(Die) != 0) { DIELabel *L = cast<DIELabel>(Values[i]); @@ -3106,6 +3231,8 @@ void DwarfDebug::emitDebugLines() { Asm->getObjFileLowering().getDwarfLineSection()); // Construct the section header. + CurrentLineSectionSym = Asm->GetTempSymbol("section_line_begin"); + Asm->OutStreamer.EmitLabel(CurrentLineSectionSym); Asm->OutStreamer.AddComment("Length of Source Line Info"); Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"), Asm->GetTempSymbol("line_begin"), 4); @@ -3491,8 +3618,9 @@ void DwarfDebug::emitDebugLoc() { unsigned char Size = Asm->getTargetData().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; - for (SmallVector<DotDebugLocEntry, 4>::iterator I = DotDebugLocEntries.begin(), - E = DotDebugLocEntries.end(); I != E; ++I, ++index) { + for (SmallVector<DotDebugLocEntry, 4>::iterator + I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); + I != E; ++I, ++index) { DotDebugLocEntry Entry = *I; if (Entry.isEmpty()) { Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 0d6116f..5a281c8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -156,6 +156,9 @@ class DwarfDebug { /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s. DenseMap<const MDNode *, DbgScope *> AbstractScopes; + /// AbstractSPDies - Collection of abstract subprogram DIEs. + DenseMap<const MDNode *, DIE *> AbstractSPDies; + /// AbstractScopesList - Tracks abstract scopes constructed while processing /// a function. This list is cleared during endFunction(). SmallVector<DbgScope *, 4>AbstractScopesList; @@ -210,7 +213,7 @@ class DwarfDebug { DenseMap<DIE *, const MDNode *> ContainingTypeMap; typedef SmallVector<DbgScope *, 2> ScopeVector; - SmallPtrSet<const MachineInstr *, 8> InsnsBeginScopeSet; + SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet; /// InlineInfo - Keep track of inlined functions and their location. This @@ -219,6 +222,10 @@ class DwarfDebug { DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo; SmallVector<const MDNode *, 4> InlinedSPNodes; + // ProcessedSPNodes - This is a collection of subprogram MDNodes that + // are processed to create DIEs. + SmallPtrSet<const MDNode *, 16> ProcessedSPNodes; + /// LabelsBeforeInsn - Maps instruction with label emitted before /// instruction. DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn; @@ -231,9 +238,6 @@ class DwarfDebug { /// a debuggging information entity. SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel; - /// ProcessedArgs - Collection of arguments already processed. - SmallPtrSet<const MDNode *, 8> ProcessedArgs; - SmallVector<const MCSymbol *, 8> DebugRangeSymbols; /// Previous instruction's location information. This is used to determine @@ -257,7 +261,10 @@ class DwarfDebug { MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; MCSymbol *DwarfDebugLocSectionSym; + MCSymbol *DwarfDebugLineSectionSym, *CurrentLineSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; + + DIEInteger *DIEIntegerOne; private: /// getSourceDirectoryAndFileIds - Return the directory and file ids that @@ -593,7 +600,8 @@ private: bool extractScopeInformation(); /// collectVariableInfo - Populate DbgScope entries with variables' info. - void collectVariableInfo(const MachineFunction *); + void collectVariableInfo(const MachineFunction *, + SmallPtrSet<const MDNode *, 16> &ProcessedVars); /// collectVariableInfoFromMMITable - Collect variable information from /// side table maintained by MMI. diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index f92127f..c8a63cf 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -52,13 +52,13 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); SymName += "__"; SymName += Id; - + // Capitalize the first letter of the module name. SymName[Letter] = toupper(SymName[Letter]); - + SmallString<128> TmpStr; AP.Mang->getNameWithPrefix(TmpStr, SymName); - + MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr); AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 9dec22e..7f98df0 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -358,23 +358,10 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, } /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything -/// after it, replacing it with an unconditional branch to NewDest. This -/// returns true if OldInst's block is modified, false if NewDest is modified. +/// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { - MachineBasicBlock *OldBB = OldInst->getParent(); - - // Remove all the old successors of OldBB from the CFG. - while (!OldBB->succ_empty()) - OldBB->removeSuccessor(OldBB->succ_begin()); - - // Remove all the dead instructions from the end of OldBB. - OldBB->erase(OldInst, OldBB->end()); - - // If OldBB isn't immediately before OldBB, insert a branch to it. - if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest)) - TII->InsertBranch(*OldBB, NewDest, 0, SmallVector<MachineOperand, 0>()); - OldBB->addSuccessor(NewDest); + TII->ReplaceTailWithBranchTo(OldInst, NewDest); ++NumTailMerge; } @@ -383,6 +370,9 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, /// iterator. This returns the new MBB. MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MachineBasicBlock::iterator BBI1) { + if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) + return 0; + MachineFunction &MF = *CurMBB.getParent(); // Create the fall-through block. @@ -443,18 +433,20 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB)); MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; + DebugLoc dl; // FIXME: this is nowhere if (I != MF->end() && !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { MachineBasicBlock *NextBB = I; if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->ReverseBranchCondition(Cond)) { TII->RemoveBranch(*CurMBB); - TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond); + TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl); return; } } } - TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>()); + TII->InsertBranch(*CurMBB, SuccBB, NULL, + SmallVector<MachineOperand, 0>(), dl); } bool @@ -625,9 +617,10 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist /// only of the common tail. Create a block that does by splitting one. -unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - unsigned maxCommonTailLength) { - unsigned commonTailIndex = 0; +bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex) { + commonTailIndex = 0; unsigned TimeEstimate = ~0U; for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { // Use PredBB if possible; that doesn't require a new branch. @@ -655,6 +648,11 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, << maxCommonTailLength); MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + if (!newMBB) { + DEBUG(dbgs() << "... failed!"); + return false; + } + SameTails[commonTailIndex].setBlock(newMBB); SameTails[commonTailIndex].setTailStartPos(newMBB->begin()); @@ -662,7 +660,7 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, if (PredBB == MBB) PredBB = newMBB; - return commonTailIndex; + return true; } // See if any of the blocks in MergePotentials (which all have a common single @@ -757,7 +755,11 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength); + if (!CreateCommonTailOnlyBlock(PredBB, + maxCommonTailLength, commonTailIndex)) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } } MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); @@ -874,10 +876,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { } // Remove the unconditional branch at the end, if any. if (TBB && (Cond.empty() || FBB)) { + DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PBB); if (!Cond.empty()) // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond); + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); } MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } @@ -976,6 +979,7 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; MachineFunction &MF = *MBB->getParent(); + DebugLoc dl; // FIXME: this is nowhere ReoptimizeBlock: MachineFunction::iterator FallThrough = MBB; @@ -1027,7 +1031,7 @@ ReoptimizeBlock: TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1066,7 +1070,7 @@ ReoptimizeBlock: // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1079,7 +1083,7 @@ ReoptimizeBlock: SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond); + TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1116,7 +1120,7 @@ ReoptimizeBlock: << "To make fallthrough to: " << *PriorTBB << "\n"); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond); + TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); // Move this block to the end of the function. MBB->moveAfter(--MF.end()); @@ -1145,7 +1149,7 @@ ReoptimizeBlock: SmallVector<MachineOperand, 4> NewCond(CurCond); if (!TII->ReverseBranchCondition(NewCond)) { TII->RemoveBranch(*MBB); - TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond); + TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1200,7 +1204,7 @@ ReoptimizeBlock: PriorFBB = MBB; } TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1226,7 +1230,7 @@ ReoptimizeBlock: if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl); MadeChange = true; ++NumBranchOpts; PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); @@ -1246,7 +1250,7 @@ ReoptimizeBlock: } // Add the branch back if the block is more than just an uncond branch. - TII->InsertBranch(*MBB, CurTBB, 0, CurCond); + TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl); } } @@ -1286,7 +1290,7 @@ ReoptimizeBlock: if (CurFallsThru) { MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond); + TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl); } MBB->moveAfter(PredBB); MadeChange = true; diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index b087395..15dfa7f 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -102,8 +102,9 @@ namespace llvm { MachineBasicBlock *PredBB); void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); - unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - unsigned maxCommonTailLength); + bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex); bool OptimizeBranches(MachineFunction &MF); bool OptimizeBlock(MachineBasicBlock *MBB); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 3e38872..ffeff1e 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -1,19 +1,20 @@ add_llvm_library(LLVMCodeGen - Analysis.cpp AggressiveAntiDepBreaker.cpp + Analysis.cpp BranchFolding.cpp CalcSpillWeights.cpp + CallingConvLower.cpp CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp DwarfEHPrepare.cpp ELFCodeEmitter.cpp ELFWriter.cpp - ExactHazardRecognizer.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCStrategy.cpp IfConversion.cpp + InlineSpiller.cpp IntrinsicLowering.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp @@ -45,6 +46,7 @@ add_llvm_library(LLVMCodeGen OptimizePHIs.cpp PHIElimination.cpp Passes.cpp + PostRAHazardRecognizer.cpp PostRASchedulerList.cpp PreAllocSplitting.cpp ProcessImplicitDefs.cpp @@ -52,7 +54,6 @@ add_llvm_library(LLVMCodeGen PseudoSourceValue.cpp RegAllocFast.cpp RegAllocLinearScan.cpp - RegAllocLocal.cpp RegAllocPBQP.cpp RegisterCoalescer.cpp RegisterScavenging.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index a328d0e..240a7b9 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -116,7 +116,7 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { SmallVector<LiveInterval*, 4> spillIs; if (lis->isReMaterializable(li, spillIs, isLoad)) { // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If non of the defs are + // it is a preferred candidate for spilling. If none of the defs are // loads, then it's potentially very cheap to re-materialize. // FIXME: this gets much more complicated once we support non-trivial // re-materialization. diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 4e6c1fc..62ad817 100644 --- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -80,13 +80,12 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, /// CheckReturn - Analyze the return values of a function, returning true if /// the return can be performed without sret-demotion, and false otherwise. -bool CCState::CheckReturn(const SmallVectorImpl<EVT> &OutTys, - const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, +bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, CCAssignFn Fn) { // Determine which register each value should be copied into. - for (unsigned i = 0, e = OutTys.size(); i != e; ++i) { - EVT VT = OutTys[i]; - ISD::ArgFlagsTy ArgFlags = ArgsFlags[i]; + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) return false; } @@ -99,7 +98,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, CCAssignFn Fn) { // Determine which register each value should be copied into. for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - EVT VT = Outs[i].Val.getValueType(); + EVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG @@ -111,14 +110,13 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, } } - /// AnalyzeCallOperands - Analyze the outgoing arguments to a call, /// incorporating info about the passed values into this state. void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, CCAssignFn Fn) { unsigned NumOps = Outs.size(); for (unsigned i = 0; i != NumOps; ++i) { - EVT ArgVT = Outs[i].Val.getValueType(); + EVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 3ff2a04..e0e315c 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -178,6 +178,8 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, continue; // Move the block. + DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber() + << " to top of loop.\n"); Changed = true; // Move it and all the blocks that can reach it via fallthrough edges @@ -297,6 +299,8 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, continue; // Move the block. + DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber() + << " to be contiguous with loop.\n"); Changed = true; // Process this block and all loop blocks contiguous with it, to keep diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index fd957b1..e3746a9 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -29,6 +30,7 @@ CriticalAntiDepBreaker:: CriticalAntiDepBreaker(MachineFunction& MFi) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), AllocatableSet(TRI->getAllocatableSet(MF)) { @@ -71,25 +73,27 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } - } else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + } + + // In a non-return block, examine the live-in regs of all successors. + // Note a return block can have successors if the return instruction is + // predicated. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; - } + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; } - } + } // Mark live-out callee-saved registers. In a return block this is // all callee-saved registers. In non-return this is any @@ -164,6 +168,26 @@ static const SDep *CriticalPathStep(const SUnit *SU) { } void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { + // It's not safe to change register allocation for source operands of + // that have special allocation requirements. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->getDesc().isCall() || + MI->getDesc().hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + // Scan the register operands for this instruction and update // Classes and RegRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -199,9 +223,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) RegRefs.insert(std::make_pair(Reg, &MO)); - // It's not safe to change register allocation for source operands of - // that have special allocation requirements. - if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) { + if (MO.isUse() && Special) { if (KeepRegs.insert(Reg)) { for (const unsigned *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) @@ -216,38 +238,43 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Update liveness. // Proceding upwards, registers that are defed but not used in this // instruction are now dead. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI->isRegTiedToUseOperand(i)) continue; - - DefIndices[Reg] = Count; - KillIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); - KeepRegs.erase(Reg); - Classes[Reg] = 0; - RegRefs.erase(Reg); - // Repeat, for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - unsigned SubregReg = *Subreg; - DefIndices[SubregReg] = Count; - KillIndices[SubregReg] = ~0u; - KeepRegs.erase(SubregReg); - Classes[SubregReg] = 0; - RegRefs.erase(SubregReg); - } - // Conservatively mark super-registers as unusable. - for (const unsigned *Super = TRI->getSuperRegisters(Reg); - *Super; ++Super) { - unsigned SuperReg = *Super; - Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); + + if (!TII->isPredicated(MI)) { + // Predicated defs are modeled as read + write, i.e. similar to two + // address updates. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + DefIndices[Reg] = Count; + KillIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + KeepRegs.erase(Reg); + Classes[Reg] = 0; + RegRefs.erase(Reg); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + unsigned SubregReg = *Subreg; + DefIndices[SubregReg] = Count; + KillIndices[SubregReg] = ~0u; + KeepRegs.erase(SubregReg); + Classes[SubregReg] = 0; + RegRefs.erase(SubregReg); + } + // Conservatively mark super-registers as unusable. + for (const unsigned *Super = TRI->getSuperRegisters(Reg); + *Super; ++Super) { + unsigned SuperReg = *Super; + Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); + } } } for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -334,10 +361,15 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; + // Keep a map of the MachineInstr*'s back to the SUnit representing them. + // This is used for updating debug information. + DenseMap<MachineInstr*,const SUnit*> MISUnitMap; + // Find the node at the bottom of the critical path. const SUnit *Max = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { const SUnit *SU = &SUnits[i]; + MISUnitMap[SU->getInstr()] = SU; if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } @@ -473,7 +505,11 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, PrescanInstruction(MI); - if (MI->getDesc().hasExtraDefRegAllocReq()) + // If MI's defs have a special allocation requirement, don't allow + // any def registers to be changed. Also assume all registers + // defined in a call must not be changed (ABI). + if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; @@ -485,7 +521,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - if (MO.isUse() && AntiDepReg == Reg) { + if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { AntiDepReg = 0; break; } @@ -519,8 +555,22 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, std::multimap<unsigned, MachineOperand *>::iterator> Range = RegRefs.equal_range(AntiDepReg); for (std::multimap<unsigned, MachineOperand *>::iterator - Q = Range.first, QE = Range.second; Q != QE; ++Q) + Q = Range.first, QE = Range.second; Q != QE; ++Q) { Q->second->setReg(NewReg); + // If the SU for the instruction being updated has debug information + // related to the anti-dependency register, make sure to update that + // as well. + const SUnit *SU = MISUnitMap[Q->second->getParent()]; + if (!SU) continue; + for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) { + MachineInstr *DI = SU->DbgInstrList[i]; + assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() + && "Non register dbg_value attached to SUnit!"); + if (DI->getOperand(0).getReg() == AntiDepReg) + DI->getOperand(0).setReg(NewReg); + } + } // We just went back in time and modified history; the // liveness information for the anti-depenence reg is now diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index cc42dd2..5406300 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -22,15 +22,18 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include <map> namespace llvm { +class TargetInstrInfo; +class TargetRegisterInfo; + class CriticalAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; /// AllocatableSet - The set of allocatable registers. diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index f6739f4..01b31b4 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/CallSite.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -87,10 +88,13 @@ namespace { /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still /// use the ".llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. - bool CleanupSelectors(); + bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels); + + bool HasCatchAllInSelector(IntrinsicInst *); /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. - void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels); + void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels, + SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels); /// FindAllURoRInvokes - Find all URoR invokes in the function. void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes); @@ -150,7 +154,7 @@ namespace { Changed = true; } - return false; + return Changed; } public: @@ -186,25 +190,32 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) { return new DwarfEHPrepare(tm, fast); } +/// HasCatchAllInSelector - Return true if the intrinsic instruction has a +/// catch-all. +bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) { + if (!EHCatchAllValue) return false; + + unsigned ArgIdx = II->getNumArgOperands() - 1; + GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx)); + return GV == EHCatchAllValue; +} + /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. void DwarfEHPrepare:: -FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) { +FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels, + SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) { for (Value::use_iterator I = SelectorIntrinsic->use_begin(), E = SelectorIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *SI = cast<IntrinsicInst>(I); - if (!SI || SI->getParent()->getParent() != F) continue; - - unsigned NumOps = SI->getNumOperands(); - if (NumOps > 4) continue; - bool IsCleanUp = (NumOps == 3); + IntrinsicInst *II = cast<IntrinsicInst>(I); - if (!IsCleanUp) - if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getOperand(3))) - IsCleanUp = (CI->getZExtValue() == 0); + if (II->getParent()->getParent() != F) + continue; - if (IsCleanUp) - Sels.insert(SI); + if (!HasCatchAllInSelector(II)) + Sels.insert(II); + else + CatchAllSels.insert(II); } } @@ -222,7 +233,7 @@ FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) { /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use /// the ".llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. -bool DwarfEHPrepare::CleanupSelectors() { +bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) { if (!EHCatchAllValue) return false; if (!SelectorIntrinsic) { @@ -232,17 +243,15 @@ bool DwarfEHPrepare::CleanupSelectors() { } bool Changed = false; - for (Value::use_iterator - I = SelectorIntrinsic->use_begin(), - E = SelectorIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(I); - if (!Sel || Sel->getParent()->getParent() != F) continue; + for (SmallPtrSet<IntrinsicInst*, 32>::iterator + I = Sels.begin(), E = Sels.end(); I != E; ++I) { + IntrinsicInst *Sel = *I; // Index of the ".llvm.eh.catch.all.value" variable. - unsigned OpIdx = Sel->getNumOperands() - 1; - GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getOperand(OpIdx)); + unsigned OpIdx = Sel->getNumArgOperands() - 1; + GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx)); if (GV != EHCatchAllValue) continue; - Sel->setOperand(OpIdx, EHCatchAllValue->getInitializer()); + Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer()); Changed = true; } @@ -293,8 +302,6 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, /// function. This is a candidate to merge the selector associated with the URoR /// invoke with the one from the URoR's landing pad. bool DwarfEHPrepare::HandleURoRInvokes() { - if (!DT) return CleanupSelectors(); // We require DominatorTree information. - if (!EHCatchAllValue) { EHCatchAllValue = F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value"); @@ -307,14 +314,20 @@ bool DwarfEHPrepare::HandleURoRInvokes() { if (!SelectorIntrinsic) return false; } + SmallPtrSet<IntrinsicInst*, 32> Sels; + SmallPtrSet<IntrinsicInst*, 32> CatchAllSels; + FindAllCleanupSelectors(Sels, CatchAllSels); + + if (!DT) + // We require DominatorTree information. + return CleanupSelectors(CatchAllSels); + if (!URoR) { URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); - if (!URoR) return CleanupSelectors(); + if (!URoR) return CleanupSelectors(CatchAllSels); } - SmallPtrSet<IntrinsicInst*, 32> Sels; SmallPtrSet<InvokeInst*, 32> URoRInvokes; - FindAllCleanupSelectors(Sels); FindAllURoRInvokes(URoRInvokes); SmallPtrSet<IntrinsicInst*, 32> SelsToConvert; @@ -340,7 +353,8 @@ bool DwarfEHPrepare::HandleURoRInvokes() { if (!ExceptionValueIntrinsic) { ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception); - if (!ExceptionValueIntrinsic) return CleanupSelectors(); + if (!ExceptionValueIntrinsic) + return CleanupSelectors(CatchAllSels); } for (Value::use_iterator @@ -360,21 +374,9 @@ bool DwarfEHPrepare::HandleURoRInvokes() { // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we // need to convert it to a 'catch-all'. for (SmallPtrSet<IntrinsicInst*, 8>::iterator - SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) { - IntrinsicInst *II = *SI; - unsigned NumOps = II->getNumOperands(); - - if (NumOps <= 4) { - bool IsCleanUp = (NumOps == 3); - - if (!IsCleanUp) - if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(3))) - IsCleanUp = (CI->getZExtValue() == 0); - - if (IsCleanUp) - SelsToConvert.insert(II); - } - } + SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) + if (!HasCatchAllInSelector(*SI)) + SelsToConvert.insert(*SI); } } } @@ -388,12 +390,22 @@ bool DwarfEHPrepare::HandleURoRInvokes() { SI = SelsToConvert.begin(), SE = SelsToConvert.end(); SI != SE; ++SI) { IntrinsicInst *II = *SI; - SmallVector<Value*, 8> Args; // Use the exception object pointer and the personality function // from the original selector. - Args.push_back(II->getOperand(1)); // Exception object pointer. - Args.push_back(II->getOperand(2)); // Personality function. + CallSite CS(II); + IntrinsicInst::op_iterator I = CS.arg_begin(); + IntrinsicInst::op_iterator E = CS.arg_end(); + IntrinsicInst::op_iterator B = prior(E); + + // Exclude last argument if it is an integer. + if (isa<ConstantInt>(B)) E = B; + + // Add exception object pointer (front). + // Add personality function (next). + // Add in any filter IDs (rest). + SmallVector<Value*, 8> Args(I, E); + Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. CallInst *NewSelector = @@ -409,7 +421,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() { } } - Changed |= CleanupSelectors(); + Changed |= CleanupSelectors(CatchAllSels); return Changed; } diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index 8416d3b..36b0e65 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -90,7 +90,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(), MRE = JTRelocations.end(); MRI != MRE; ++MRI) { MachineRelocation &MR = *MRI; - unsigned MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock()); + uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock()); MR.setResultPointer((void*)MBBOffset); MR.setConstantVal(ES->SectionIdx); JTSection.addRelocation(MR); diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h deleted file mode 100644 index 91c81a9..0000000 --- a/lib/CodeGen/ExactHazardRecognizer.h +++ /dev/null @@ -1,86 +0,0 @@ -//=- llvm/CodeGen/ExactHazardRecognizer.h - Scheduling Support -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ExactHazardRecognizer class, which -// implements hazard-avoidance heuristics for scheduling, based on the -// scheduling itineraries specified for the target. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H -#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H - -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetInstrItineraries.h" - -namespace llvm { - class ExactHazardRecognizer : public ScheduleHazardRecognizer { - // ScoreBoard to track function unit usage. ScoreBoard[0] is a - // mask of the FUs in use in the cycle currently being - // schedule. ScoreBoard[1] is a mask for the next cycle. The - // ScoreBoard is used as a circular buffer with the current cycle - // indicated by Head. - class ScoreBoard { - unsigned *Data; - - // The maximum number of cycles monitored by the Scoreboard. This - // value is determined based on the target itineraries to ensure - // that all hazards can be tracked. - size_t Depth; - // Indices into the Scoreboard that represent the current cycle. - size_t Head; - public: - ScoreBoard():Data(NULL), Depth(0), Head(0) { } - ~ScoreBoard() { - delete[] Data; - } - - size_t getDepth() const { return Depth; } - unsigned& operator[](size_t idx) const { - assert(Depth && "ScoreBoard was not initialized properly!"); - - return Data[(Head + idx) % Depth]; - } - - void reset(size_t d = 1) { - if (Data == NULL) { - Depth = d; - Data = new unsigned[Depth]; - } - - memset(Data, 0, Depth * sizeof(Data[0])); - Head = 0; - } - - void advance() { - Head = (Head + 1) % Depth; - } - - // Print the scoreboard. - void dump() const; - }; - - // Itinerary data for the target. - const InstrItineraryData &ItinData; - - ScoreBoard ReservedScoreboard; - ScoreBoard RequiredScoreboard; - - public: - ExactHazardRecognizer(const InstrItineraryData &ItinData); - - virtual HazardType getHazardType(SUnit *SU); - virtual void Reset(); - virtual void EmitInstruction(SUnit *SU); - virtual void AdvanceCycle(); - }; -} - -#endif diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 790cb21..71506cc 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -271,7 +271,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { case Intrinsic::gcwrite: if (LowerWr) { // Replace a write barrier with a simple store. - Value *St = new StoreInst(CI->getOperand(1), CI->getOperand(3), CI); + Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI); CI->replaceAllUsesWith(St); CI->eraseFromParent(); } @@ -279,7 +279,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { case Intrinsic::gcread: if (LowerRd) { // Replace a read barrier with a simple load. - Value *Ld = new LoadInst(CI->getOperand(2), "", CI); + Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); Ld->takeName(CI); CI->replaceAllUsesWith(Ld); CI->eraseFromParent(); @@ -290,7 +290,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { // Initialize the GC root, but do not delete the intrinsic. The // backend needs the intrinsic to flag the stack slot. Roots.push_back(cast<AllocaInst>( - CI->getOperand(1)->stripPointerCasts())); + CI->getArgOperand(0)->stripPointerCasts())); } break; default: diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index c61fd17..6b445e0 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -20,6 +20,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -33,20 +34,22 @@ using namespace llvm; static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden); -static cl::opt<bool> DisableSimple("disable-ifcvt-simple", +static cl::opt<bool> DisableSimple("disable-ifcvt-simple", cl::init(false), cl::Hidden); -static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false", +static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false", cl::init(false), cl::Hidden); -static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle", +static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle", cl::init(false), cl::Hidden); -static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev", +static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev", cl::init(false), cl::Hidden); -static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false", +static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false", cl::init(false), cl::Hidden); -static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev", +static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev", cl::init(false), cl::Hidden); -static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond", +static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond", cl::init(false), cl::Hidden); +static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold", + cl::init(true), cl::Hidden); STATISTIC(NumSimple, "Number of simple if-conversions performed"); STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed"); @@ -115,7 +118,7 @@ namespace { BB(0), TrueBB(0), FalseBB(0) {} }; - /// IfcvtToken - Record information about pending if-conversions to attemp: + /// IfcvtToken - Record information about pending if-conversions to attempt: /// BBI - Corresponding BBInfo. /// Kind - Type of block. See IfcvtKind. /// NeedSubsumption - True if the to-be-predicated BB has already been @@ -146,6 +149,7 @@ namespace { const TargetLowering *TLI; const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; bool MadeChange; int FnNum; public: @@ -167,8 +171,7 @@ namespace { std::vector<IfcvtToken*> &Tokens); bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, bool isTriangle = false, bool RevBranch = false); - bool AnalyzeBlocks(MachineFunction &MF, - std::vector<IfcvtToken*> &Tokens); + void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens); void InvalidatePreds(MachineBasicBlock *BB); void RemoveExtraEdges(BBInfo &BBI); bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); @@ -177,14 +180,22 @@ namespace { unsigned NumDups1, unsigned NumDups2); void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, - SmallVectorImpl<MachineOperand> &Cond); + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs, bool IgnoreBr = false); - void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI); + void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); - bool MeetIfcvtSizeLimit(unsigned Size) const { - return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit(); + bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const { + return Size > 0 && TII->isProfitableToIfCvt(BB, Size); + } + + bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize, + MachineBasicBlock &FBB, unsigned FSize) const { + return TSize > 0 && FSize > 0 && + TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize); } // blockAlwaysFallThrough - Block ends without a terminator. @@ -227,8 +238,15 @@ FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); if (!TII) return false; + // Tail merge tend to expose more if-conversion opportunities. + BranchFolder BF(true); + bool BFChange = BF.OptimizeFunction(MF, TII, + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); + DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" << MF.getFunction()->getName() << "\'"); @@ -253,7 +271,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) { // Do an initial analysis for each basic block and find all the potential // candidates to perform if-conversion. - bool Change = AnalyzeBlocks(MF, Tokens); + bool Change = false; + AnalyzeBlocks(MF, Tokens); while (!Tokens.empty()) { IfcvtToken *Token = Tokens.back(); Tokens.pop_back(); @@ -281,7 +300,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; - DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") + DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? + " false" : "") << "): BB#" << BBI.BB->getNumber() << " (" << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() @@ -289,8 +309,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { RetVal = IfConvertSimple(BBI, Kind); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { - if (isFalse) NumSimpleFalse++; - else NumSimple++; + if (isFalse) ++NumSimpleFalse; + else ++NumSimple; } break; } @@ -316,11 +336,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) { - if (isRev) NumTriangleFRev++; - else NumTriangleFalse++; + if (isRev) ++NumTriangleFRev; + else ++NumTriangleFalse; } else { - if (isRev) NumTriangleRev++; - else NumTriangle++; + if (isRev) ++NumTriangleRev; + else ++NumTriangle; } } break; @@ -332,7 +352,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); - if (RetVal) NumDiamonds++; + if (RetVal) ++NumDiamonds; break; } } @@ -361,13 +381,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { Roots.clear(); BBAnalysis.clear(); - if (MadeChange) { + if (MadeChange && IfCvtBranchFold) { BranchFolder BF(false); BF.OptimizeFunction(MF, TII, MF.getTarget().getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); } + MadeChange |= BFChange; return MadeChange; } @@ -387,9 +408,10 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, /// ReverseBranchCondition - Reverse the condition of the end of the block /// branch. Swap block's 'true' and 'false' successors. bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { + DebugLoc dl; // FIXME: this is nowhere if (!TII->ReverseBranchCondition(BBI.BrCond)) { TII->RemoveBranch(*BBI.BB); - TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond); + TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl); std::swap(BBI.TrueBB, BBI.FalseBB); return true; } @@ -420,7 +442,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { if (TrueBBI.BB->pred_size() > 1) { if (TrueBBI.CannotBeCopied || - TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit()) + !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize)) return false; Dups = TrueBBI.NonPredSize; } @@ -431,7 +453,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { /// ValidTriangle - Returns true if the 'true' and 'false' blocks (along /// with their common predecessor) forms a valid triangle shape for ifcvt. /// If 'FalseBranch' is true, it checks if 'true' block's false branch -/// branches to the false branch rather than the other way around. It also +/// branches to the 'false' block rather than the other way around. It also /// returns the number of instructions that the ifcvt would need to duplicate /// if performed in 'Dups'. bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, @@ -457,7 +479,7 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, ++Size; } } - if (Size > TLI->getIfCvtDupBlockSizeLimit()) + if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size)) return false; Dups = Size; } @@ -514,7 +536,27 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, MachineBasicBlock::iterator TI = TrueBBI.BB->begin(); MachineBasicBlock::iterator FI = FalseBBI.BB->begin(); - while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) { + MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); + MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); + // Skip dbg_value instructions + while (TI != TIE && TI->isDebugValue()) + ++TI; + while (FI != FIE && FI->isDebugValue()) + ++FI; + while (TI != TIE && FI != FIE) { + // Skip dbg_value instructions. These do not count. + if (TI->isDebugValue()) { + while (TI != TIE && TI->isDebugValue()) + ++TI; + if (TI == TIE) + break; + } + if (FI->isDebugValue()) { + while (FI != FIE && FI->isDebugValue()) + ++FI; + if (FI == FIE) + break; + } if (!TI->isIdenticalTo(FI)) break; ++Dups1; @@ -524,7 +566,27 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, TI = firstNonBranchInst(TrueBBI.BB, TII); FI = firstNonBranchInst(FalseBBI.BB, TII); - while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) { + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); + // Skip dbg_value instructions at end of the bb's. + while (TI != TIB && TI->isDebugValue()) + --TI; + while (FI != FIB && FI->isDebugValue()) + --FI; + while (TI != TIB && FI != FIB) { + // Skip dbg_value instructions. These do not count. + if (TI->isDebugValue()) { + while (TI != TIB && TI->isDebugValue()) + --TI; + if (TI == TIB) + break; + } + if (FI->isDebugValue()) { + while (FI != FIB && FI->isDebugValue()) + --FI; + if (FI == FIB) + break; + } if (!TI->isIdenticalTo(FI)) break; ++Dups2; @@ -556,7 +618,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { // No false branch. This BB must end with a conditional branch and a // fallthrough. if (!BBI.FalseBB) - BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); + BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); if (!BBI.FalseBB) { // Malformed bcc? True and false blocks are the same? BBI.IsUnpredicable = true; @@ -569,6 +631,9 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { BBI.ClobbersPred = false; for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); I != E; ++I) { + if (I->isDebugValue()) + continue; + const TargetInstrDesc &TID = I->getDesc(); if (TID.isNotDuplicable()) BBI.CannotBeCopied = true; @@ -702,8 +767,8 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, bool FNeedSub = FalseBBI.Predicate.size() > 0; bool Enqueued = false; if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2), + *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) && FeasibilityAnalysis(TrueBBI, BBI.BrCond) && FeasibilityAnalysis(FalseBBI, RevCond)) { // Diamond: @@ -720,7 +785,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, } if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { // Triangle: // EBB @@ -732,23 +797,23 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups)); Enqueued = true; } - + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } if (ValidSimple(TrueBBI, Dups) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { // Simple (split, no rejoin): // EBB // | \_ // | | // | TBB---> exit - // | + // | // FBB Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups)); Enqueued = true; @@ -757,21 +822,21 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (CanRevCond) { // Try the other path... if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } if (ValidSimple(FalseBBI, Dups) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; @@ -785,11 +850,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, } /// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion -/// candidates. It returns true if any CFG restructuring is done to expose more -/// if-conversion opportunities. -bool IfConverter::AnalyzeBlocks(MachineFunction &MF, +/// candidates. +void IfConverter::AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens) { - bool Change = false; std::set<MachineBasicBlock*> Visited; for (unsigned i = 0, e = Roots.size(); i != e; ++i) { for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited), @@ -801,20 +864,23 @@ bool IfConverter::AnalyzeBlocks(MachineFunction &MF, // Sort to favor more complex ifcvt scheme. std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); - - return Change; } /// canFallThroughTo - Returns true either if ToBB is the next block after BB or /// that all the intervening blocks are empty (given BB can fall through to its /// next block). static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { - MachineFunction::iterator I = BB; + MachineFunction::iterator PI = BB; + MachineFunction::iterator I = llvm::next(PI); MachineFunction::iterator TI = ToBB; MachineFunction::iterator E = BB->getParent()->end(); - while (++I != TI) - if (I == E || !I->empty()) + while (I != TI) { + // Check isSuccessor to avoid case where the next block is empty, but + // it's not a successor. + if (I == E || !I->empty() || !PI->isSuccessor(I)) return false; + PI = I++; + } return true; } @@ -836,8 +902,9 @@ void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { /// static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, const TargetInstrInfo *TII) { + DebugLoc dl; // FIXME: this is nowhere SmallVector<MachineOperand, 0> NoCond; - TII->InsertBranch(*BB, ToBB, NULL, NoCond); + TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl); } /// RemoveExtraEdges - Remove true / false edges if either / both are no longer @@ -849,6 +916,66 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } +/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are +/// modeled as read + write (sort like two-address instructions). These +/// routines track register liveness and add implicit uses to if-converted +/// instructions to conform to the model. +static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs, + const TargetRegisterInfo *TRI) { + for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), + E = BB->livein_end(); I != E; ++I) { + unsigned Reg = *I; + Redefs.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Redefs.insert(*Subreg); + } +} + +static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, + const TargetRegisterInfo *TRI, + bool AddImpUse = false) { + SmallVector<unsigned, 4> Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + Defs.push_back(Reg); + else if (MO.isKill()) { + Redefs.erase(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + Redefs.erase(*SR); + } + } + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + if (Redefs.count(Reg)) { + if (AddImpUse) + // Treat predicated update as read + write. + MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/,false/*IsKill*/)); + } else { + Redefs.insert(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + Redefs.insert(*SR); + } + } +} + +static void UpdatePredRedefs(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallSet<unsigned,4> &Redefs, + const TargetRegisterInfo *TRI) { + while (I != E) { + UpdatePredRedefs(I, Redefs, TRI); + ++I; + } +} + /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. /// bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { @@ -873,13 +1000,19 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { if (TII->ReverseBranchCondition(Cond)) assert(false && "Unable to reverse branch condition!"); + // Initialize liveins to the first BB. These are potentiall redefined by + // predicated instructions. + SmallSet<unsigned, 4> Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); } else { - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); // Merge converted block into entry block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -922,6 +1055,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; BBInfo *CvtBBI = &TrueBBI; BBInfo *NextBBI = &FalseBBI; + DebugLoc dl; // FIXME: this is nowhere SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) @@ -957,21 +1091,26 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { } } + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + SmallSet<unsigned, 4> Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + bool HasEarlyExit = CvtBBI->FalseBB != NULL; - bool DupBB = CvtBBI->BB->pred_size() > 1; - if (DupBB) { + if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); // Now merge the entry of the triangle with the true block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); - MergeBlocks(BBI, *CvtBBI); + MergeBlocks(BBI, *CvtBBI, false); } // If 'true' block has a 'false' successor, add an exit branch to it. @@ -980,7 +1119,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { CvtBBI->BrCond.end()); if (TII->ReverseBranchCondition(RevCond)) assert(false && "Unable to reverse branch condition!"); - TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond); + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl); BBI.BB->addSuccessor(CvtBBI->FalseBB); } @@ -1009,7 +1148,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { RemoveExtraEdges(BBI); // Update block info. BB can be iteratively if-converted. - if (!IterIfcvt) + if (!IterIfcvt) BBI.IsDone = true; InvalidatePreds(BBI.BB); CvtBBI->IsDone = true; @@ -1044,9 +1183,9 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, return false; } - // Merge the 'true' and 'false' blocks by copying the instructions - // from the 'false' block to the 'true' block. That is, unless the true - // block would clobber the predicate, in that case, do the opposite. + // Put the predicated instructions from the 'true' block before the + // instructions from the 'false' block, unless the true block would clobber + // the predicate, in which case, do the opposite. BBInfo *BBI1 = &TrueBBI; BBInfo *BBI2 = &FalseBBI; SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); @@ -1071,39 +1210,72 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Remove the conditional branch from entry to the blocks. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + SmallSet<unsigned, 4> Redefs; + InitPredRedefs(BBI1->BB, Redefs, TRI); + // Remove the duplicated instructions at the beginnings of both paths. MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); MachineBasicBlock::iterator DI2 = BBI2->BB->begin(); + MachineBasicBlock::iterator DIE1 = BBI1->BB->end(); + MachineBasicBlock::iterator DIE2 = BBI2->BB->end(); + // Skip dbg_value instructions + while (DI1 != DIE1 && DI1->isDebugValue()) + ++DI1; + while (DI2 != DIE2 && DI2->isDebugValue()) + ++DI2; BBI1->NonPredSize -= NumDups1; BBI2->NonPredSize -= NumDups1; + + // Skip past the dups on each side separately since there may be + // differing dbg_value entries. + for (unsigned i = 0; i < NumDups1; ++DI1) { + if (!DI1->isDebugValue()) + ++i; + } while (NumDups1 != 0) { - ++DI1; ++DI2; - --NumDups1; + if (!DI2->isDebugValue()) + --NumDups1; } + + UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI); BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); // Predicate the 'true' block after removing its branch. BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); DI1 = BBI1->BB->end(); - for (unsigned i = 0; i != NumDups2; ++i) + for (unsigned i = 0; i != NumDups2; ) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI1 != BBI1->BB->begin()); --DI1; + // skip dbg_value instructions + if (!DI1->isDebugValue()) + ++i; + } BBI1->BB->erase(DI1, BBI1->BB->end()); - PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs); // Predicate the 'false' block. BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); while (NumDups2 != 0) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI2 != BBI2->BB->begin()); --DI2; - --NumDups2; + // skip dbg_value instructions + if (!DI2->isDebugValue()) + --NumDups2; } - PredicateBlock(*BBI2, DI2, *Cond2); + PredicateBlock(*BBI2, DI2, *Cond2, Redefs); // Merge the true block into the entry of the diamond. - MergeBlocks(BBI, *BBI1); - MergeBlocks(BBI, *BBI2); + MergeBlocks(BBI, *BBI1, TailBB == 0); + MergeBlocks(BBI, *BBI2, TailBB == 0); // If the if-converted block falls through or unconditionally branches into // the tail block, and the tail block does not have other predecessors, then @@ -1111,16 +1283,32 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // tail, add a unconditional branch to it. if (TailBB) { BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; - if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) { - BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + bool CanMergeTail = !TailBBI.HasFallThrough; + // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; + // check if there are any other predecessors besides those. + unsigned NumPreds = TailBB->pred_size(); + if (NumPreds > 1) + CanMergeTail = false; + else if (NumPreds == 1 && CanMergeTail) { + MachineBasicBlock::pred_iterator PI = TailBB->pred_begin(); + if (*PI != BBI1->BB && *PI != BBI2->BB) + CanMergeTail = false; + } + if (CanMergeTail) { MergeBlocks(BBI, TailBBI); TailBBI.IsDone = true; } else { + BBI.BB->addSuccessor(TailBB); InsertUncondBranch(BBI.BB, TailBB, TII); BBI.HasFallThrough = false; } } + // RemoveExtraEdges won't work if the block has an unanalyzable branch, + // which can happen here if TailBB is unanalyzable and is merged, so + // explicitly remove BBI1 and BBI2 as successors. + BBI.BB->removeSuccessor(BBI1->BB); + BBI.BB->removeSuccessor(BBI2->BB); RemoveExtraEdges(BBI); // Update block info. @@ -1135,9 +1323,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, /// specified end with the specified condition. void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, - SmallVectorImpl<MachineOperand> &Cond) { + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs) { for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { - if (TII->isPredicated(I)) + if (I->isDebugValue() || TII->isPredicated(I)) continue; if (!TII->PredicateInstruction(I, Cond)) { #ifndef NDEBUG @@ -1145,6 +1334,10 @@ void IfConverter::PredicateBlock(BBInfo &BBI, #endif llvm_unreachable(0); } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(I, Redefs, TRI, true); } std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); @@ -1152,48 +1345,55 @@ void IfConverter::PredicateBlock(BBInfo &BBI, BBI.IsAnalyzed = false; BBI.NonPredSize = 0; - NumIfConvBBs++; + ++NumIfConvBBs; } /// CopyAndPredicateBlock - Copy and predicate instructions from source BB to /// the destination block. Skip end of block branches if IgnoreBr is true. void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> &Redefs, bool IgnoreBr) { MachineFunction &MF = *ToBBI.BB->getParent(); for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), E = FromBBI.BB->end(); I != E; ++I) { const TargetInstrDesc &TID = I->getDesc(); - bool isPredicated = TII->isPredicated(I); // Do not copy the end of the block branches. - if (IgnoreBr && !isPredicated && TID.isBranch()) + if (IgnoreBr && TID.isBranch()) break; MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; - if (!isPredicated) + if (!TII->isPredicated(I) && !MI->isDebugValue()) { if (!TII->PredicateInstruction(MI, Cond)) { #ifndef NDEBUG dbgs() << "Unable to predicate " << *I << "!\n"; #endif llvm_unreachable(0); } + } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(MI, Redefs, TRI, true); } - std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), - FromBBI.BB->succ_end()); - MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); - MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + if (!IgnoreBr) { + std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; - for (unsigned i = 0, e = Succs.size(); i != e; ++i) { - MachineBasicBlock *Succ = Succs[i]; - // Fallthrough edge can't be transferred. - if (Succ == FallThrough) - continue; - ToBBI.BB->addSuccessor(Succ); + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + ToBBI.BB->addSuccessor(Succ); + } } std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), @@ -1203,25 +1403,18 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, ToBBI.ClobbersPred |= FromBBI.ClobbersPred; ToBBI.IsAnalyzed = false; - NumDupBBs++; + ++NumDupBBs; } /// MergeBlocks - Move all instructions from FromBB to the end of ToBB. -/// -void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) { +/// This will leave FromBB as an empty block, so remove all of its +/// successor edges except for the fall-through edge. If AddEdges is true, +/// i.e., when FromBBI's branch is being moved, add those successor edges to +/// ToBBI. +void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { ToBBI.BB->splice(ToBBI.BB->end(), FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); - // Redirect all branches to FromBB to ToBB. - std::vector<MachineBasicBlock *> Preds(FromBBI.BB->pred_begin(), - FromBBI.BB->pred_end()); - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - MachineBasicBlock *Pred = Preds[i]; - if (Pred == ToBBI.BB) - continue; - Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB); - } - std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), FromBBI.BB->succ_end()); MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); @@ -1233,7 +1426,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) { if (Succ == FallThrough) continue; FromBBI.BB->removeSuccessor(Succ); - ToBBI.BB->addSuccessor(Succ); + if (AddEdges) + ToBBI.BB->addSuccessor(Succ); } // Now FromBBI always falls through to the next block! diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp new file mode 100644 index 0000000..12adcaa --- /dev/null +++ b/lib/CodeGen/InlineSpiller.cpp @@ -0,0 +1,408 @@ +//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The inline spiller modifies the machine function directly instead of +// inserting spills and restores in VirtRegMap. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "spiller" +#include "Spiller.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class InlineSpiller : public Spiller { + MachineFunction &mf_; + LiveIntervals &lis_; + VirtRegMap &vrm_; + MachineFrameInfo &mfi_; + MachineRegisterInfo &mri_; + const TargetInstrInfo &tii_; + const TargetRegisterInfo &tri_; + const BitVector reserved_; + + // Variables that are valid during spill(), but used by multiple methods. + LiveInterval *li_; + std::vector<LiveInterval*> *newIntervals_; + const TargetRegisterClass *rc_; + int stackSlot_; + const SmallVectorImpl<LiveInterval*> *spillIs_; + + // Values of the current interval that can potentially remat. + SmallPtrSet<VNInfo*, 8> reMattable_; + + // Values in reMattable_ that failed to remat at some point. + SmallPtrSet<VNInfo*, 8> usedValues_; + + ~InlineSpiller() {} + +public: + InlineSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) + : mf_(*mf), lis_(*lis), vrm_(*vrm), + mfi_(*mf->getFrameInfo()), + mri_(mf->getRegInfo()), + tii_(*mf->getTarget().getInstrInfo()), + tri_(*mf->getTarget().getRegisterInfo()), + reserved_(tri_.getReservedRegs(mf_)) {} + + void spill(LiveInterval *li, + std::vector<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex *earliestIndex); + +private: + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, + SlotIndex UseIdx); + bool reMaterializeFor(MachineBasicBlock::iterator MI); + void reMaterializeAll(); + + bool foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl<unsigned> &Ops); + void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI); + void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI); +}; +} + +namespace llvm { +Spiller *createInlineSpiller(MachineFunction *mf, + LiveIntervals *lis, + const MachineLoopInfo *mli, + VirtRegMap *vrm) { + return new InlineSpiller(mf, lis, vrm); +} +} + +/// allUsesAvailableAt - Return true if all registers used by OrigMI at +/// OrigIdx are also available with the same value at UseIdx. +bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI, + SlotIndex OrigIdx, + SlotIndex UseIdx) { + OrigIdx = OrigIdx.getUseIndex(); + UseIdx = UseIdx.getUseIndex(); + for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OrigMI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg) + continue; + // Reserved registers are OK. + if (MO.isUndef() || !lis_.hasInterval(MO.getReg())) + continue; + // We don't want to move any defs. + if (MO.isDef()) + return false; + // We cannot depend on virtual registers in spillIs_. They will be spilled. + for (unsigned si = 0, se = spillIs_->size(); si != se; ++si) + if ((*spillIs_)[si]->reg == MO.getReg()) + return false; + + LiveInterval &LI = lis_.getInterval(MO.getReg()); + const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx); + if (!OVNI) + continue; + if (OVNI != LI.getVNInfoAt(UseIdx)) + return false; + } + return true; +} + +/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of +/// reloading it. +bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { + SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex(); + VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx); + if (!OrigVNI) { + DEBUG(dbgs() << "\tadding <undef> flags: "); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) + MO.setIsUndef(); + } + DEBUG(dbgs() << UseIdx << '\t' << *MI); + return true; + } + if (!reMattable_.count(OrigVNI)) { + DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": " + << UseIdx << '\t' << *MI); + return false; + } + MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def); + if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) { + usedValues_.insert(OrigVNI); + DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); + return false; + } + + // If the instruction also writes li_->reg, it had better not require the same + // register for uses and defs. + bool Reads, Writes; + SmallVector<unsigned, 8> Ops; + tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops); + if (Writes) { + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) { + usedValues_.insert(OrigVNI); + DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); + return false; + } + } + } + + // Alocate a new register for the remat. + unsigned NewVReg = mri_.createVirtualRegister(rc_); + vrm_.grow(); + LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg); + NewLI.markNotSpillable(); + newIntervals_->push_back(&NewLI); + + // Finally we can rematerialize OrigMI before MI. + MachineBasicBlock &MBB = *MI->getParent(); + tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_); + MachineBasicBlock::iterator RematMI = MI; + SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex(); + DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *RematMI); + + // Replace operands + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) { + MO.setReg(NewVReg); + MO.setIsKill(); + } + } + DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); + + VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); + DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + return true; +} + +/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible, +/// and trim the live ranges after. +void InlineSpiller::reMaterializeAll() { + // Do a quick scan of the interval values to find if any are remattable. + reMattable_.clear(); + usedValues_.clear(); + for (LiveInterval::const_vni_iterator I = li_->vni_begin(), + E = li_->vni_end(); I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused() || !VNI->isDefAccurate()) + continue; + MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); + if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI)) + continue; + reMattable_.insert(VNI); + } + + // Often, no defs are remattable. + if (reMattable_.empty()) + return; + + // Try to remat before all uses of li_->reg. + bool anyRemat = false; + for (MachineRegisterInfo::use_nodbg_iterator + RI = mri_.use_nodbg_begin(li_->reg); + MachineInstr *MI = RI.skipInstruction();) + anyRemat |= reMaterializeFor(MI); + + if (!anyRemat) + return; + + // Remove any values that were completely rematted. + bool anyRemoved = false; + for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(), + E = reMattable_.end(); I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->hasPHIKill() || usedValues_.count(VNI)) + continue; + MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); + DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI); + lis_.RemoveMachineInstrFromMaps(DefMI); + vrm_.RemoveMachineInstrFromMaps(DefMI); + DefMI->eraseFromParent(); + li_->removeValNo(VNI); + anyRemoved = true; + } + + if (!anyRemoved) + return; + + // Removing values may cause debug uses where li_ is not live. + for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg); + MachineInstr *MI = RI.skipInstruction();) { + if (!MI->isDebugValue()) + continue; + // Try to preserve the debug value if li_ is live immediately after it. + MachineBasicBlock::iterator NextMI = MI; + ++NextMI; + if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { + SlotIndex NearIdx = lis_.getInstructionIndex(NextMI); + if (li_->liveAt(NearIdx)) + continue; + } + DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI); + MI->eraseFromParent(); + } +} + +/// foldMemoryOperand - Try folding stack slot references in Ops into MI. +/// Return true on success, and MI will be erased. +bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl<unsigned> &Ops) { + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied + // operands. + SmallVector<unsigned, 8> FoldOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + unsigned Idx = Ops[i]; + MachineOperand &MO = MI->getOperand(Idx); + if (MO.isImplicit()) + continue; + // FIXME: Teach targets to deal with subregs. + if (MO.getSubReg()) + return false; + // Tied use operands should not be passed to foldMemoryOperand. + if (!MI->isRegTiedToDefOperand(Idx)) + FoldOps.push_back(Idx); + } + + MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_); + if (!FoldMI) + return false; + lis_.ReplaceMachineInstrInMaps(MI, FoldMI); + vrm_.addSpillSlotUse(stackSlot_, FoldMI); + MI->eraseFromParent(); + DEBUG(dbgs() << "\tfolded: " << *FoldMI); + return true; +} + +/// insertReload - Insert a reload of NewLI.reg before MI. +void InlineSpiller::insertReload(LiveInterval &NewLI, + MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); + tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_); + --MI; // Point to load instruction. + SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + vrm_.addSpillSlotUse(stackSlot_, MI); + DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); + VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); +} + +/// insertSpill - Insert a spill of NewLI.reg after MI. +void InlineSpiller::insertSpill(LiveInterval &NewLI, + MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); + tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_); + --MI; // Point to store instruction. + SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + vrm_.addSpillSlotUse(stackSlot_, MI); + DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); + VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); +} + +void InlineSpiller::spill(LiveInterval *li, + std::vector<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex *earliestIndex) { + DEBUG(dbgs() << "Inline spilling " << *li << "\n"); + assert(li->isSpillable() && "Attempting to spill already spilled value."); + assert(!li->isStackSlot() && "Trying to spill a stack slot."); + + li_ = li; + newIntervals_ = &newIntervals; + rc_ = mri_.getRegClass(li->reg); + spillIs_ = &spillIs; + + reMaterializeAll(); + + // Remat may handle everything. + if (li_->empty()) + return; + + stackSlot_ = vrm_.assignVirt2StackSlot(li->reg); + + // Iterate over instructions using register. + for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg); + MachineInstr *MI = RI.skipInstruction();) { + + // Debug values are not allowed to affect codegen. + if (MI->isDebugValue()) { + // Modify DBG_VALUE now that the value is in a spill slot. + uint64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = MI->getOperand(2).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_, + Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + } else { + DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); + MI->eraseFromParent(); + } + continue; + } + + // Analyze instruction. + bool Reads, Writes; + SmallVector<unsigned, 8> Ops; + tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops); + + // Attempt to fold memory ops. + if (foldMemoryOperand(MI, Ops)) + continue; + + // Allocate interval around instruction. + // FIXME: Infer regclass from instruction alone. + unsigned NewVReg = mri_.createVirtualRegister(rc_); + vrm_.grow(); + LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg); + NewLI.markNotSpillable(); + + if (Reads) + insertReload(NewLI, MI); + + // Rewrite instruction operands. + bool hasLiveDef = false; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + MO.setReg(NewVReg); + if (MO.isUse()) { + if (!MI->isRegTiedToDefOperand(Ops[i])) + MO.setIsKill(); + } else { + if (!MO.isDead()) + hasLiveDef = true; + } + } + + // FIXME: Use a second vreg if instruction has no tied ops. + if (Writes && hasLiveDef) + insertSpill(NewLI, MI); + + DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + newIntervals.push_back(&NewLI); + } +} diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 63bb5f2..03ae214 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -16,6 +16,7 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" @@ -314,21 +315,22 @@ static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, const char *Dname, const char *LDname) { - switch (CI->getOperand(1)->getType()->getTypeID()) { + CallSite CS(CI); + switch (CI->getArgOperand(0)->getType()->getTypeID()) { default: llvm_unreachable("Invalid type in intrinsic"); case Type::FloatTyID: - ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(), + ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(), Type::getFloatTy(CI->getContext())); break; case Type::DoubleTyID: - ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(), + ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(), Type::getDoubleTy(CI->getContext())); break; case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: - ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(), - CI->getOperand(1)->getType()); + ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(), + CI->getArgOperand(0)->getType()); break; } } @@ -340,6 +342,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { const Function *Callee = CI->getCalledFunction(); assert(Callee && "Cannot lower an indirect call!"); + CallSite CS(CI); switch (Callee->getIntrinsicID()) { case Intrinsic::not_intrinsic: report_fatal_error("Cannot lower a call to a non-intrinsic function '"+ @@ -353,7 +356,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { // by the lowerinvoke pass. In both cases, the right thing to do is to // convert the call to an explicit setjmp or longjmp call. case Intrinsic::setjmp: { - Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(), + Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(), Type::getInt32Ty(Context)); if (!CI->getType()->isVoidTy()) CI->replaceAllUsesWith(V); @@ -365,32 +368,32 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; case Intrinsic::longjmp: { - ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(), + ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(), Type::getVoidTy(Context)); break; } case Intrinsic::siglongjmp: { // Insert the call to abort - ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), + ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), Type::getVoidTy(Context)); break; } case Intrinsic::ctpop: - CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI)); break; case Intrinsic::bswap: - CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI)); break; case Intrinsic::ctlz: - CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI)); break; case Intrinsic::cttz: { // cttz(x) -> ctpop(~X & (X-1)) - Value *Src = CI->getOperand(1); + Value *Src = CI->getArgOperand(0); Value *NotSrc = Builder.CreateNot(Src); NotSrc->setName(Src->getName() + ".not"); Value *SrcM1 = ConstantInt::get(Src->getType(), 1); @@ -451,37 +454,37 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::memcpy: { const IntegerType *IntPtr = TD.getIntPtrType(Context); - Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getOperand(1); - Ops[1] = CI->getOperand(2); + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); Ops[2] = Size; - ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType()); + ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); break; } case Intrinsic::memmove: { const IntegerType *IntPtr = TD.getIntPtrType(Context); - Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getOperand(1); - Ops[1] = CI->getOperand(2); + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); Ops[2] = Size; - ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType()); + ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); break; } case Intrinsic::memset: { const IntegerType *IntPtr = TD.getIntPtrType(Context); - Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getOperand(1); + Ops[0] = CI->getArgOperand(0); // Extend the amount to i32. - Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::getInt32Ty(Context), + Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context), /* isSigned */ false); Ops[2] = Size; - ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType()); + ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); break; } case Intrinsic::sqrt: { diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index b584704..bf3137e 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -329,12 +329,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) PM.add(createOptimizePHIsPass()); - // Delete dead machine instructions regardless of optimization level. - PM.add(createDeadMachineInstructionElimPass()); - printAndVerify(PM, "After codegen DCE pass", - /* allowDoubleDefs= */ true); - if (OptLevel != CodeGenOpt::None) { + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + PM.add(createDeadMachineInstructionElimPass()); + printAndVerify(PM, "After codegen DCE pass", + /* allowDoubleDefs= */ true); + PM.add(createOptimizeExtsPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); @@ -358,7 +361,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, /* allowDoubleDefs= */ true); // Perform register allocation. - PM.add(createRegisterAllocator()); + PM.add(createRegisterAllocator(OptLevel)); printAndVerify(PM, "After Register Allocation"); // Perform stack slot coloring and post-ra machine LICM. diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 03b4eab..b9527fa 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -118,7 +118,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { SUnit *LatencyPriorityQueue::pop() { if (empty()) return NULL; std::vector<SUnit *>::iterator Best = Queue.begin(); - for (std::vector<SUnit *>::iterator I = next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 025ad05..21a9b7d 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -68,6 +68,37 @@ bool LiveInterval::liveBeforeAndAt(SlotIndex I) const { return r->end == I; } +/// killedAt - Return true if a live range ends at index. Note that the kill +/// point is not contained in the half-open live range. It is usually the +/// getDefIndex() slot following its last use. +bool LiveInterval::killedAt(SlotIndex I) const { + Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I); + + // Now r points to the first interval with start >= I, or ranges.end(). + if (r == ranges.begin()) + return false; + + --r; + // Now r points to the last interval with end <= I. + // r->end is the kill point. + return r->end == I; +} + +/// killedInRange - Return true if the interval has kills in [Start,End). +bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const { + Ranges::const_iterator r = + std::lower_bound(ranges.begin(), ranges.end(), End); + + // Now r points to the first interval with start >= End, or ranges.end(). + if (r == ranges.begin()) + return false; + + --r; + // Now r points to the last interval with end <= End. + // r->end is the kill point. + return r->end >= Start && r->end < End; +} + // overlaps - Return true if the intersection of the two live intervals is // not empty. // @@ -149,7 +180,6 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { assert(I != ranges.end() && "Not a valid interval!"); VNInfo *ValNo = I->valno; - SlotIndex OldEnd = I->end; // Search for the first interval that we can't merge with. Ranges::iterator MergeTo = next(I); @@ -163,9 +193,6 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { // Erase any dead ranges. ranges.erase(next(I), MergeTo); - // Update kill info. - ValNo->removeKills(OldEnd, I->end.getPrevSlot()); - // If the newly formed range now touches the range after it and if they have // the same value number, merge the two ranges into one range. Ranges::iterator Next = next(I); @@ -245,9 +272,6 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { // endpoint as well. if (End > it->end) extendIntervalEndTo(it, End); - else if (End < it->end) - // Overlapping intervals, there might have been a kill here. - it->valno->removeKill(End); return it; } } else { @@ -288,7 +312,6 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, VNInfo *ValNo = I->valno; if (I->start == Start) { if (I->end == End) { - ValNo->removeKills(Start, End); if (RemoveDeadValNo) { // Check if val# is dead. bool isDead = true; @@ -296,7 +319,7 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, if (II != I && II->valno == ValNo) { isDead = false; break; - } + } if (isDead) { // Now that ValNo is dead, remove it. If it is the largest value // number, just nuke it (and any other deleted values neighboring it), @@ -320,7 +343,6 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, // Otherwise if the span we are removing is at the end of the LiveRange, // adjust the other way. if (I->end == End) { - ValNo->removeKills(Start, End); I->end = Start; return; } @@ -529,6 +551,7 @@ void LiveInterval::MergeValueInAsValue( SmallVector<VNInfo*, 4> ReplacedValNos; iterator IP = begin(); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo"); if (I->valno != RHSValNo) continue; SlotIndex Start = I->start, End = I->end; @@ -823,10 +846,12 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { else { OS << " = "; for (LiveInterval::Ranges::const_iterator I = ranges.begin(), - E = ranges.end(); I != E; ++I) - OS << *I; + E = ranges.end(); I != E; ++I) { + OS << *I; + assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo"); + } } - + // Print value number info. if (getNumValNums()) { OS << " "; @@ -843,21 +868,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { OS << "?"; else OS << vni->def; - unsigned ee = vni->kills.size(); - if (ee || vni->hasPHIKill()) { - OS << "-("; - for (unsigned j = 0; j != ee; ++j) { - OS << vni->kills[j]; - if (j != ee-1) - OS << " "; - } - if (vni->hasPHIKill()) { - if (ee) - OS << " "; - OS << "phi"; - } - OS << ")"; - } } } } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index a6d38ad..194d03d 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -50,9 +50,6 @@ using namespace llvm; static cl::opt<bool> DisableReMat("disable-rematerialization", cl::init(false), cl::Hidden); -static cl::opt<bool> EnableFastSpilling("fast-spill", - cl::init(false), cl::Hidden); - STATISTIC(numIntervals , "Number of original intervals"); STATISTIC(numFolds , "Number of loads/stores folded into instructions"); STATISTIC(numSplits , "Number of intervals split"); @@ -90,8 +87,8 @@ void LiveIntervals::releaseMemory() { r2iMap_.clear(); - // Release VNInfo memroy regions after all VNInfo objects are dtor'd. - VNInfoAllocator.DestroyAll(); + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); while (!CloneMIs.empty()) { MachineInstr *MI = CloneMIs.back(); CloneMIs.pop_back(); @@ -195,6 +192,10 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) if (SrcReg == li.reg || DstReg == li.reg) continue; + if (MI.isCopy()) + if (MI.getOperand(0).getReg() == li.reg || + MI.getOperand(1).getReg() == li.reg) + continue; // Check for operands using reg for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { @@ -218,10 +219,7 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, return false; } -/// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except -/// it checks for sub-register reference and it can check use as well. -bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li, - unsigned Reg, bool CheckUse, +bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg, SmallPtrSet<MachineInstr*,32> &JoinedCopies) { for (LiveInterval::Ranges::const_iterator I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { @@ -239,12 +237,11 @@ bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li, MachineOperand& MO = MI->getOperand(i); if (!MO.isReg()) continue; - if (MO.isUse() && !CheckUse) - continue; unsigned PhysReg = MO.getReg(); - if (PhysReg == 0 || TargetRegisterInfo::isVirtualRegister(PhysReg)) + if (PhysReg == 0 || PhysReg == Reg || + TargetRegisterInfo::isVirtualRegister(PhysReg)) continue; - if (tri_->isSubRegister(Reg, PhysReg)) + if (tri_->regsOverlap(Reg, PhysReg)) return true; } } @@ -272,7 +269,7 @@ bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { if (MO.getReg() == Reg && MO.isDef()) { assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() && MI.getOperand(MOIdx).getSubReg() && - MO.getSubReg()); + (MO.getSubReg() || MO.isImplicit())); return true; } } @@ -328,9 +325,10 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg() || - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (mi->isCopyLike() || + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) { CopyMI = mi; + } VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); @@ -356,7 +354,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, LiveRange LR(defIndex, killIdx, ValNo); interval.addRange(LR); DEBUG(dbgs() << " +" << LR << "\n"); - ValNo->addKill(killIdx); return; } } @@ -376,7 +373,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // valno in the killing blocks. assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); DEBUG(dbgs() << " phi-join"); - ValNo->addKill(indexes_->getTerminatorGap(mbb)); ValNo->setHasPHIKill(true); } else { // Iterate over all of the blocks that the variable is completely @@ -407,7 +403,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, } LiveRange LR(Start, killIdx, ValNo); interval.addRange(LR); - ValNo->addKill(killIdx); DEBUG(dbgs() << " +" << LR); } @@ -434,11 +429,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // are actually two values in the live interval. Because of this we // need to take the LiveRegion that defines this register and split it // into two values. - // Two-address vregs should always only be redefined once. This means - // that at this point, there should be exactly one value number in it. - assert((PartReDef || interval.containsOneValue()) && - "Unexpected 2-addr liveint!"); - SlotIndex DefIndex = interval.getValNumInfo(0)->def.getDefIndex(); SlotIndex RedefIndex = MIIdx.getDefIndex(); if (MO.isEarlyClobber()) RedefIndex = MIIdx.getUseIndex(); @@ -446,8 +436,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex.getUseIndex()); VNInfo *OldValNo = OldLR->valno; + SlotIndex DefIndex = OldValNo->def.getDefIndex(); - // Delete the initial value, which should be short and continuous, + // Delete the previous value, which should be short and continuous, // because the 2-addr copy must be in the same MBB as the redef. interval.removeRange(DefIndex, RedefIndex); @@ -464,15 +455,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ... unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (PartReDef && - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (PartReDef && (mi->isCopyLike() || + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))) OldValNo->setCopy(&*mi); // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); DEBUG(dbgs() << " replace range with " << LR); interval.addRange(LR); - ValNo->addKill(RedefIndex); // If this redefinition is dead, we need to add a dummy unit live // range covering the def slot. @@ -496,7 +486,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, VNInfo *ValNo; MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg()|| + if (mi->isCopyLike() || tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = mi; ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); @@ -504,7 +494,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); - ValNo->addKill(indexes_->getTerminatorGap(mbb)); ValNo->setHasPHIKill(true); DEBUG(dbgs() << " phi-join +" << LR); } else { @@ -600,7 +589,6 @@ exit: ValNo->setHasRedefByEC(true); LiveRange LR(start, end, ValNo); interval.addRange(LR); - LR.valno->addKill(end); DEBUG(dbgs() << " +" << LR << '\n'); } @@ -615,7 +603,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, else if (allocatableRegs_[MO.getReg()]) { MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg() || + if (MI->isCopyLike() || tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, @@ -701,7 +689,6 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, LiveRange LR(start, end, vni); interval.addRange(LR); - LR.valno->addKill(end); DEBUG(dbgs() << " +" << LR << '\n'); } @@ -787,37 +774,6 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { return NewLI; } -/// getVNInfoSourceReg - Helper function that parses the specified VNInfo -/// copy field and returns the source register that defines it. -unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { - if (!VNI->getCopy()) - return 0; - - if (VNI->getCopy()->isExtractSubreg()) { - // If it's extracting out of a physical register, return the sub-register. - unsigned Reg = VNI->getCopy()->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - unsigned SrcSubReg = VNI->getCopy()->getOperand(2).getImm(); - unsigned DstSubReg = VNI->getCopy()->getOperand(0).getSubReg(); - if (SrcSubReg == DstSubReg) - // %reg1034:3<def> = EXTRACT_SUBREG %EDX, 3 - // reg1034 can still be coalesced to EDX. - return Reg; - assert(DstSubReg == 0); - Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm()); - } - return Reg; - } else if (VNI->getCopy()->isInsertSubreg() || - VNI->getCopy()->isSubregToReg()) - return VNI->getCopy()->getOperand(2).getReg(); - - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*VNI->getCopy(), SrcReg, DstReg, SrcSubReg, DstSubReg)) - return SrcReg; - llvm_unreachable("Unrecognized copy instruction!"); - return 0; -} - //===----------------------------------------------------------------------===// // Register allocator hooks. // @@ -991,22 +947,22 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, if (DefMI && (MRInfo & VirtRegMap::isMod)) return false; - MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(*mf_, MI, FoldOps, Slot) - : tii_->foldMemoryOperand(*mf_, MI, FoldOps, DefMI); + MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot) + : tii_->foldMemoryOperand(MI, FoldOps, DefMI); if (fmi) { // Remember this instruction uses the spill slot. if (isSS) vrm.addSpillSlotUse(Slot, fmi); // Attempt to fold the memory reference into the instruction. If // we can do this, we don't need to insert spill code. - MachineBasicBlock &MBB = *MI->getParent(); if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot)) vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo); vrm.transferSpillPts(MI, fmi); vrm.transferRestorePts(MI, fmi); vrm.transferEmergencySpills(MI, fmi); ReplaceMachineInstrInMaps(MI, fmi); - MI = MBB.insert(MBB.erase(MI), fmi); + MI->eraseFromParent(); + MI = fmi; ++numFolds; return true; } @@ -1098,7 +1054,6 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (!mop.isReg()) continue; unsigned Reg = mop.getReg(); - unsigned RegI = Reg; if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (Reg != li.reg) @@ -1140,26 +1095,8 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, // // Keep track of whether we replace a use and/or def so that we can // create the spill interval with the appropriate range. - - HasUse = mop.isUse(); - HasDef = mop.isDef(); SmallVector<unsigned, 2> Ops; - Ops.push_back(i); - for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) { - const MachineOperand &MOj = MI->getOperand(j); - if (!MOj.isReg()) - continue; - unsigned RegJ = MOj.getReg(); - if (RegJ == 0 || TargetRegisterInfo::isPhysicalRegister(RegJ)) - continue; - if (RegJ == RegI) { - Ops.push_back(j); - if (!MOj.isUndef()) { - HasUse |= MOj.isUse(); - HasDef |= MOj.isDef(); - } - } - } + tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops); // Create a new virtual register for the spill interval. // Create the new register now so we can map the fold instruction @@ -1294,16 +1231,7 @@ bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, const VNInfo *VNI, MachineBasicBlock *MBB, SlotIndex Idx) const { - SlotIndex End = getMBBEndIdx(MBB); - for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) { - if (VNI->kills[j].isPHI()) - continue; - - SlotIndex KillIdx = VNI->kills[j]; - if (KillIdx > Idx && KillIdx <= End) - return true; - } - return false; + return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB)); } /// RewriteInfo - Keep track of machine instrs that will be rewritten @@ -1312,10 +1240,7 @@ namespace { struct RewriteInfo { SlotIndex Index; MachineInstr *MI; - bool HasUse; - bool HasDef; - RewriteInfo(SlotIndex i, MachineInstr *mi, bool u, bool d) - : Index(i), MI(mi), HasUse(u), HasDef(d) {} + RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {} }; struct RewriteInfoCompare { @@ -1394,7 +1319,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, // easily see a situation where both registers are reloaded before // the INSERT_SUBREG and both target registers that would overlap. continue; - RewriteMIs.push_back(RewriteInfo(index, MI, O.isUse(), O.isDef())); + RewriteMIs.push_back(RewriteInfo(index, MI)); } std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare()); @@ -1404,18 +1329,11 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, RewriteInfo &rwi = RewriteMIs[i]; ++i; SlotIndex index = rwi.Index; - bool MIHasUse = rwi.HasUse; - bool MIHasDef = rwi.HasDef; MachineInstr *MI = rwi.MI; // If MI def and/or use the same register multiple times, then there // are multiple entries. - unsigned NumUses = MIHasUse; while (i != e && RewriteMIs[i].MI == MI) { assert(RewriteMIs[i].Index == index); - bool isUse = RewriteMIs[i].HasUse; - if (isUse) ++NumUses; - MIHasUse |= isUse; - MIHasDef |= RewriteMIs[i].HasDef; ++i; } MachineBasicBlock *MBB = MI->getParent(); @@ -1440,7 +1358,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, // = use // It's better to start a new interval to avoid artifically // extend the new interval. - if (MIHasDef && !MIHasUse) { + if (MI->readsWritesVirtualRegister(li.reg) == + std::make_pair(false,true)) { MBBVRegsMap.erase(MBB->getNumber()); ThisVReg = 0; } @@ -1652,103 +1571,9 @@ LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) { } std::vector<LiveInterval*> LiveIntervals:: -addIntervalsForSpillsFast(const LiveInterval &li, - const MachineLoopInfo *loopInfo, - VirtRegMap &vrm) { - unsigned slot = vrm.assignVirt2StackSlot(li.reg); - - std::vector<LiveInterval*> added; - - assert(li.isSpillable() && "attempt to spill already spilled interval!"); - - DEBUG({ - dbgs() << "\t\t\t\tadding intervals for spills for interval: "; - li.dump(); - dbgs() << '\n'; - }); - - const TargetRegisterClass* rc = mri_->getRegClass(li.reg); - - MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg); - while (RI != mri_->reg_end()) { - MachineInstr* MI = &*RI; - - SmallVector<unsigned, 2> Indices; - bool HasUse = false; - bool HasDef = false; - - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& mop = MI->getOperand(i); - if (!mop.isReg() || mop.getReg() != li.reg) continue; - - HasUse |= MI->getOperand(i).isUse(); - HasDef |= MI->getOperand(i).isDef(); - - Indices.push_back(i); - } - - if (!tryFoldMemoryOperand(MI, vrm, NULL, getInstructionIndex(MI), - Indices, true, slot, li.reg)) { - unsigned NewVReg = mri_->createVirtualRegister(rc); - vrm.grow(); - vrm.assignVirt2StackSlot(NewVReg, slot); - - // create a new register for this spill - LiveInterval &nI = getOrCreateInterval(NewVReg); - nI.markNotSpillable(); - - // Rewrite register operands to use the new vreg. - for (SmallVectorImpl<unsigned>::iterator I = Indices.begin(), - E = Indices.end(); I != E; ++I) { - MI->getOperand(*I).setReg(NewVReg); - - if (MI->getOperand(*I).isUse()) - MI->getOperand(*I).setIsKill(true); - } - - // Fill in the new live interval. - SlotIndex index = getInstructionIndex(MI); - if (HasUse) { - LiveRange LR(index.getLoadIndex(), index.getUseIndex(), - nI.getNextValue(SlotIndex(), 0, false, - getVNInfoAllocator())); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - vrm.addRestorePoint(NewVReg, MI); - } - if (HasDef) { - LiveRange LR(index.getDefIndex(), index.getStoreIndex(), - nI.getNextValue(SlotIndex(), 0, false, - getVNInfoAllocator())); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - vrm.addSpillPoint(NewVReg, true, MI); - } - - added.push_back(&nI); - - DEBUG({ - dbgs() << "\t\t\t\tadded new interval: "; - nI.dump(); - dbgs() << '\n'; - }); - } - - - RI = mri_->reg_begin(li.reg); - } - - return added; -} - -std::vector<LiveInterval*> LiveIntervals:: addIntervalsForSpills(const LiveInterval &li, SmallVectorImpl<LiveInterval*> &SpillIs, const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { - - if (EnableFastSpilling) - return addIntervalsForSpillsFast(li, loopInfo, vrm); - assert(li.isSpillable() && "attempt to spill already spilled interval!"); DEBUG({ @@ -2184,7 +2009,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, SlotIndex(getInstructionIndex(startInst).getDefIndex()), startInst, true, getVNInfoAllocator()); VN->setHasPHIKill(true); - VN->kills.push_back(indexes_->getTerminatorGap(startInst->getParent())); LiveRange LR( SlotIndex(getInstructionIndex(startInst).getDefIndex()), getMBBEndIdx(startInst->getParent()), VN); diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index 798b9b9..709e2c6 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -35,8 +35,8 @@ void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { } void LiveStacks::releaseMemory() { - // Release VNInfo memroy regions after all VNInfo objects are dtor'd. - VNInfoAllocator.DestroyAll(); + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); S2IMap.clear(); S2RCMap.clear(); } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 079684e..41b891d 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -286,7 +286,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { MachineInstr *LastDef = PhysRegDef[Reg]; MachineInstr *LastUse = PhysRegUse[Reg]; if (!LastDef && !LastUse) - return false; + return 0; MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; @@ -609,7 +609,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Finally, if the last instruction in the block is a return, make sure to // mark it as using all of the live-out values in the function. - if (!MBB->empty() && MBB->back().getDesc().isReturn()) { + // Things marked both call and return are tail calls; do not do this for + // them. The tail callee need not take the same registers as input + // that it produces as output, and there are dependencies for its input + // registers elsewhere. + if (!MBB->empty() && MBB->back().getDesc().isReturn() + && !MBB->back().getDesc().isCall()) { MachineInstr *Ret = &MBB->back(); for (MachineRegisterInfo::liveout_iterator diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index b0348a5..dfd4eae 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -53,15 +53,15 @@ namespace { bool runOnMachineFunction(MachineFunction&); private: - bool LowerExtract(MachineInstr *MI); - bool LowerInsert(MachineInstr *MI); bool LowerSubregToReg(MachineInstr *MI); + bool LowerCopy(MachineInstr *MI); void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, const TargetRegisterInfo *TRI); void TransferKillFlag(MachineInstr *MI, unsigned SrcReg, const TargetRegisterInfo *TRI, bool AddIfNotFound = false); + void TransferImplicitDefs(MachineInstr *MI); }; char LowerSubregsInstructionPass::ID = 0; @@ -83,7 +83,7 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, if (MII->addRegisterDead(DstReg, TRI)) break; assert(MII != MI->getParent()->begin() && - "copyRegToReg output doesn't reference destination register!"); + "copyPhysReg output doesn't reference destination register!"); } } @@ -100,64 +100,24 @@ LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI, if (MII->addRegisterKilled(SrcReg, TRI, AddIfNotFound)) break; assert(MII != MI->getParent()->begin() && - "copyRegToReg output doesn't reference source register!"); + "copyPhysReg output doesn't reference source register!"); } } -bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { - MachineBasicBlock *MBB = MI->getParent(); - - assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && - MI->getOperand(1).isReg() && MI->getOperand(1).isUse() && - MI->getOperand(2).isImm() && "Malformed extract_subreg"); - - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SuperReg = MI->getOperand(1).getReg(); - unsigned SubIdx = MI->getOperand(2).getImm(); - unsigned SrcReg = TRI->getSubReg(SuperReg, SubIdx); - - assert(TargetRegisterInfo::isPhysicalRegister(SuperReg) && - "Extract supperg source must be a physical register"); - assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && - "Extract destination must be in a physical register"); - assert(SrcReg && "invalid subregister index for register"); - - DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); - - if (SrcReg == DstReg) { - // No need to insert an identity copy instruction. - if (MI->getOperand(1).isKill()) { - // We must make sure the super-register gets killed. Replace the - // instruction with KILL. - MI->setDesc(TII->get(TargetOpcode::KILL)); - MI->RemoveOperand(2); // SubIdx - DEBUG(dbgs() << "subreg: replace by: " << *MI); - return true; - } - - DEBUG(dbgs() << "subreg: eliminated!"); - } else { - // Insert copy - const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg); - const TargetRegisterClass *TRCD = TRI->getPhysicalRegisterRegClass(SrcReg); - bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Subreg and Dst must be of compatible register class"); - // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) - TransferDeadFlag(MI, DstReg, TRI); - if (MI->getOperand(1).isKill()) - TransferKillFlag(MI, SuperReg, TRI, true); - DEBUG({ - MachineBasicBlock::iterator dMI = MI; - dbgs() << "subreg: " << *(--dMI); - }); +/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered +/// replacement instructions immediately precede it. Copy any implicit-def +/// operands from MI to the replacement instruction. +void +LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) { + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isImplicit() || MO.isUse()) + continue; + CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true)); } - - DEBUG(dbgs() << '\n'); - MBB->erase(MI); - return true; } bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { @@ -166,10 +126,10 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { MI->getOperand(1).isImm() && (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && MI->getOperand(3).isImm() && "Invalid subreg_to_reg"); - + unsigned DstReg = MI->getOperand(0).getReg(); unsigned InsReg = MI->getOperand(2).getReg(); - unsigned InsSIdx = MI->getOperand(2).getSubReg(); + assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?"); unsigned SubIdx = MI->getOperand(3).getImm(); assert(SubIdx != 0 && "Invalid index for insert_subreg"); @@ -182,27 +142,25 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); - if (DstSubReg == InsReg && InsSIdx == 0) { + if (DstSubReg == InsReg) { // No need to insert an identify copy instruction. // Watch out for case like this: - // %RAX<def> = ... - // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3 - // The first def is defining RAX, not EAX so the top bits were not - // zero extended. + // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3 + // We must leave %RAX live. + if (DstReg != InsReg) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + MI->RemoveOperand(3); // SubIdx + MI->RemoveOperand(1); // Imm + DEBUG(dbgs() << "subreg: replace by: " << *MI); + return true; + } DEBUG(dbgs() << "subreg: eliminated!"); } else { - // Insert sub-register copy - const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg); - const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg); - bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Subreg and Dst must be of compatible register class"); + TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, + MI->getOperand(2).isKill()); // Transfer the kill/dead flags, if needed. if (MI->getOperand(0).isDead()) TransferDeadFlag(MI, DstSubReg, TRI); - if (MI->getOperand(2).isKill()) - TransferKillFlag(MI, InsReg, TRI); DEBUG({ MachineBasicBlock::iterator dMI = MI; dbgs() << "subreg: " << *(--dMI); @@ -214,87 +172,39 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { return true; } -bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { - MachineBasicBlock *MBB = MI->getParent(); - assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) && - (MI->getOperand(1).isReg() && MI->getOperand(1).isUse()) && - (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && - MI->getOperand(3).isImm() && "Invalid insert_subreg"); - - unsigned DstReg = MI->getOperand(0).getReg(); -#ifndef NDEBUG - unsigned SrcReg = MI->getOperand(1).getReg(); -#endif - unsigned InsReg = MI->getOperand(2).getReg(); - unsigned SubIdx = MI->getOperand(3).getImm(); +bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) { + MachineOperand &DstMO = MI->getOperand(0); + MachineOperand &SrcMO = MI->getOperand(1); - assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?"); - assert(SubIdx != 0 && "Invalid index for insert_subreg"); - unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx); - assert(DstSubReg && "invalid subregister index for register"); - assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && - "Insert superreg source must be in a physical register"); - assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && - "Inserted value must be in a physical register"); - - DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); - - if (DstSubReg == InsReg) { - // No need to insert an identity copy instruction. If the SrcReg was - // <undef>, we need to make sure it is alive by inserting a KILL - if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) { - MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(TargetOpcode::KILL), DstReg); - if (MI->getOperand(2).isUndef()) - MIB.addReg(InsReg, RegState::Undef); - else - MIB.addReg(InsReg, RegState::Kill); - } else { - DEBUG(dbgs() << "subreg: eliminated!\n"); - MBB->erase(MI); + if (SrcMO.getReg() == DstMO.getReg()) { + DEBUG(dbgs() << "identity copy: " << *MI); + // No need to insert an identity copy instruction, but replace with a KILL + // if liveness is changed. + if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) { + // We must make sure the super-register gets killed. Replace the + // instruction with KILL. + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); return true; } - } else { - // Insert sub-register copy - const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg); - const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg); - if (MI->getOperand(2).isUndef()) - // If the source register being inserted is undef, then this becomes a - // KILL. - BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(TargetOpcode::KILL), DstSubReg); - else { - bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Subreg and Dst must be of compatible register class"); - } - MachineBasicBlock::iterator CopyMI = MI; - --CopyMI; - - // INSERT_SUBREG is a two-address instruction so it implicitly kills SrcReg. - if (!MI->getOperand(1).isUndef()) - CopyMI->addOperand(MachineOperand::CreateReg(DstReg, false, true, true)); - - // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) { - TransferDeadFlag(MI, DstSubReg, TRI); - } else { - // Make sure the full DstReg is live after this replacement. - CopyMI->addOperand(MachineOperand::CreateReg(DstReg, true, true)); - } - - // Make sure the inserted register gets killed - if (MI->getOperand(2).isKill() && !MI->getOperand(2).isUndef()) - TransferKillFlag(MI, InsReg, TRI); + // Vanilla identity copy. + MI->eraseFromParent(); + return true; } - DEBUG({ - MachineBasicBlock::iterator dMI = MI; - dbgs() << "subreg: " << *(--dMI) << "\n"; - }); + DEBUG(dbgs() << "real copy: " << *MI); + TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), + DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); - MBB->erase(MI); + if (DstMO.isDead()) + TransferDeadFlag(MI, DstMO.getReg(), TRI); + if (MI->getNumOperands() > 2) + TransferImplicitDefs(MI); + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + dbgs() << "replaced by: " << *(--dMI); + }); + MI->eraseFromParent(); return true; } @@ -317,12 +227,13 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { mi != me;) { MachineBasicBlock::iterator nmi = llvm::next(mi); MachineInstr *MI = mi; - if (MI->isExtractSubreg()) { - MadeChange |= LowerExtract(MI); - } else if (MI->isInsertSubreg()) { - MadeChange |= LowerInsert(MI); - } else if (MI->isSubregToReg()) { + assert(!MI->isInsertSubreg() && "INSERT_SUBREG should no longer appear"); + assert(MI->getOpcode() != TargetOpcode::EXTRACT_SUBREG && + "EXTRACT_SUBREG should no longer appear"); + if (MI->isSubregToReg()) { MadeChange |= LowerSubregToReg(MI); + } else if (MI->isCopy()) { + MadeChange |= LowerCopy(MI); } mi = nmi; } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index eaaa1f8..a27ee47 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -13,7 +13,10 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/BasicBlock.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -136,6 +139,13 @@ void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) { Parent->getParent()->DeleteMachineInstr(MI); } +MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { + iterator I = begin(); + while (I != end() && I->isPHI()) + ++I; + return I; +} + MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { iterator I = end(); while (I != begin() && (--I)->getDesc().isTerminator()) @@ -245,6 +255,7 @@ void MachineBasicBlock::updateTerminator() { MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; + DebugLoc dl; // FIXME: this is nowhere bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); (void) B; assert(!B && "UpdateTerminators requires analyzable predecessors!"); @@ -259,7 +270,7 @@ void MachineBasicBlock::updateTerminator() { // its layout successor, insert a branch. TBB = *succ_begin(); if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, 0, Cond); + TII->InsertBranch(*this, TBB, 0, Cond, dl); } } else { if (FBB) { @@ -270,10 +281,10 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) return; TII->RemoveBranch(*this); - TII->InsertBranch(*this, FBB, 0, Cond); + TII->InsertBranch(*this, FBB, 0, Cond, dl); } else if (isLayoutSuccessor(FBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, 0, Cond); + TII->InsertBranch(*this, TBB, 0, Cond, dl); } } else { // The block has a fallthrough conditional branch. @@ -284,14 +295,14 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->InsertBranch(*this, MBBA, 0, Cond); + TII->InsertBranch(*this, MBBA, 0, Cond, dl); return; } TII->RemoveBranch(*this); - TII->InsertBranch(*this, MBBA, 0, Cond); + TII->InsertBranch(*this, MBBA, 0, Cond, dl); } else if (!isLayoutSuccessor(MBBA)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, MBBA, Cond); + TII->InsertBranch(*this, TBB, MBBA, Cond, dl); } } } @@ -331,12 +342,32 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { if (this == fromMBB) return; - for (MachineBasicBlock::succ_iterator I = fromMBB->succ_begin(), - E = fromMBB->succ_end(); I != E; ++I) - addSuccessor(*I); + while (!fromMBB->succ_empty()) { + MachineBasicBlock *Succ = *fromMBB->succ_begin(); + addSuccessor(Succ); + fromMBB->removeSuccessor(Succ); + } +} + +void +MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { + if (this == fromMBB) + return; - while (!fromMBB->succ_empty()) - fromMBB->removeSuccessor(fromMBB->succ_begin()); + while (!fromMBB->succ_empty()) { + MachineBasicBlock *Succ = *fromMBB->succ_begin(); + addSuccessor(Succ); + fromMBB->removeSuccessor(Succ); + + // Fix up any PHI nodes in the successor. + for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end(); + MI != ME && MI->isPHI(); ++MI) + for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { + MachineOperand &MO = MI->getOperand(i); + if (MO.getMBB() == fromMBB) + MO.setMBB(this); + } + } } bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { @@ -395,6 +426,82 @@ bool MachineBasicBlock::canFallThrough() { return FBB == 0; } +MachineBasicBlock * +MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { + MachineFunction *MF = getParent(); + DebugLoc dl; // FIXME: this is nowhere + + // We may need to update this's terminator, but we can't do that if AnalyzeBranch + // fails. If this uses a jump table, we won't touch it. + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) + return NULL; + + MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); + MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB); + DEBUG(dbgs() << "PHIElimination splitting critical edge:" + " BB#" << getNumber() + << " -- BB#" << NMBB->getNumber() + << " -- BB#" << Succ->getNumber() << '\n'); + + ReplaceUsesOfBlockWith(Succ, NMBB); + updateTerminator(); + + // Insert unconditional "jump Succ" instruction in NMBB if necessary. + NMBB->addSuccessor(Succ); + if (!NMBB->isLayoutSuccessor(Succ)) { + Cond.clear(); + MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl); + } + + // Fix PHI nodes in Succ so they refer to NMBB instead of this + for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end(); + i != e && i->isPHI(); ++i) + for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) + if (i->getOperand(ni+1).getMBB() == this) + i->getOperand(ni+1).setMBB(NMBB); + + if (LiveVariables *LV = + P->getAnalysisIfAvailable<LiveVariables>()) + LV->addNewBlock(NMBB, this, Succ); + + if (MachineDominatorTree *MDT = + P->getAnalysisIfAvailable<MachineDominatorTree>()) + MDT->addNewBlock(NMBB, this); + + if (MachineLoopInfo *MLI = + P->getAnalysisIfAvailable<MachineLoopInfo>()) + if (MachineLoop *TIL = MLI->getLoopFor(this)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { + if (TIL == DestLoop) { + // Both in the same loop, the NMBB joins loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (TIL->contains(DestLoop)) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (DestLoop->contains(TIL)) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(DestLoop->getHeader() == Succ && + "Should not create irreducible loops!"); + if (MachineLoop *P = DestLoop->getParentLoop()) + P->addBasicBlockToLoop(NMBB, MLI->getBase()); + } + } + } + + return NMBB; +} + /// removeFromParent - This method unlinks 'this' from the containing function, /// and returns it, but does not delete it. MachineBasicBlock *MachineBasicBlock::removeFromParent() { diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 6f4f7a8..833cc00 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -30,9 +30,7 @@ using namespace llvm; STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); - -static cl::opt<bool> CSEPhysDef("machine-cse-phys-defs", - cl::init(false), cl::Hidden); +STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated"); namespace { class MachineCSE : public MachineFunctionPass { @@ -128,6 +126,28 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, ++NumCoalesces; Changed = true; } + + if (!DefMI->isCopy()) + continue; + SrcReg = DefMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + continue; + if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) + continue; + const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); + if (!NewRC) + continue; + DEBUG(dbgs() << "Coalescing: " << *DefMI); + DEBUG(dbgs() << "*** to: " << *MI); + MO.setReg(SrcReg); + MRI->clearKillFlags(SrcReg); + if (NewRC != SRC) + MRI->setRegClass(SrcReg, NewRC); + DefMI->eraseFromParent(); + ++NumCoalesces; + Changed = true; } return Changed; @@ -172,7 +192,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, /// hasLivePhysRegDefUse - Return true if the specified instruction read / write /// physical registers (except for dead defs of physical registers). It also -/// returns the physical register def by reference if it's the only one. +/// returns the physical register def by reference if it's the only one and the +/// instruction does not uses a physical register. bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, const MachineBasicBlock *MBB, unsigned &PhysDef) const { @@ -186,9 +207,11 @@ bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (MO.isUse()) + if (MO.isUse()) { // Can't touch anything to read a physical register. + PhysDef = 0; return true; + } if (MO.isDead()) // If the def is dead, it's ok. continue; @@ -240,8 +263,8 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) { unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) || - MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg(); + return MI->isCopyLike() || + TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); } bool MachineCSE::isCSECandidate(MachineInstr *MI) { @@ -356,6 +379,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (!isCSECandidate(MI)) continue; + bool DefPhys = false; bool FoundCSE = VNT.count(MI); if (!FoundCSE) { // Look for trivial copy coalescing opportunities. @@ -376,11 +400,13 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // ... Unless the CS is local and it also defines the physical register // which is not clobbered in between. - if (PhysDef && CSEPhysDef) { + if (PhysDef) { unsigned CSVN = VNT.lookup(MI); MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefReaches(CSMI, MI, PhysDef)) + if (PhysRegDefReaches(CSMI, MI, PhysDef)) { FoundCSE = true; + DefPhys = true; + } } } @@ -426,6 +452,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { } MI->eraseFromParent(); ++NumCSEs; + if (DefPhys) + ++NumPhysCSEs; } else { DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 4088739..b5f8fbb 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -46,7 +46,6 @@ MachineDominatorTree::MachineDominatorTree() } MachineDominatorTree::~MachineDominatorTree() { - DT->releaseMemory(); delete DT; } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index a38c881..666120f 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -378,7 +378,7 @@ void MachineFunction::viewCFG() const #ifndef NDEBUG ViewGraph(this, "mf" + getFunction()->getNameStr()); #else - errs() << "SelectionDAG::viewGraph is only available in debug builds on " + errs() << "MachineFunction::viewCFG is only available in debug builds on " << "systems with Graphviz or gv!\n"; #endif // NDEBUG } @@ -388,7 +388,7 @@ void MachineFunction::viewCFGOnly() const #ifndef NDEBUG ViewGraph(this, "mf" + getFunction()->getNameStr(), true); #else - errs() << "SelectionDAG::viewGraph is only available in debug builds on " + errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " << "systems with Graphviz or gv!\n"; #endif // NDEBUG } @@ -438,10 +438,16 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, /// index with a negative value. /// int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable, bool isSS) { + bool Immutable) { assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); - Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable, - isSS)); + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. + unsigned StackAlign = TFI.getStackAlignment(); + unsigned Align = MinAlign(SPOffset, StackAlign); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/false)); return -++NumFixedObjects; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e54cd5c..6b2e985 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -111,6 +111,26 @@ void MachineOperand::setReg(unsigned Reg) { Contents.Reg.RegNo = Reg; } +void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + if (SubIdx && getSubReg()) + SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); + setReg(Reg); + if (SubIdx) + setSubReg(SubIdx); +} + +void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (getSubReg()) { + Reg = TRI.getSubReg(Reg, getSubReg()); + assert(Reg && "Invalid SubReg for physical register"); + setSubReg(0); + } + setReg(Reg); +} + /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. @@ -861,14 +881,14 @@ int MachineInstr::findFirstPredOperandIdx() const { bool MachineInstr:: isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { if (isInlineAsm()) { - assert(DefOpIdx >= 2); + assert(DefOpIdx >= 3); const MachineOperand &MO = getOperand(DefOpIdx); if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) return false; // Determine the actual operand index that corresponds to this index. unsigned DefNo = 0; unsigned DefPart = 0; - for (unsigned i = 1, e = getNumOperands(); i < e; ) { + for (unsigned i = 2, e = getNumOperands(); i < e; ) { const MachineOperand &FMO = getOperand(i); // After the normal asm operands there may be additional imp-def regs. if (!FMO.isImm()) @@ -883,7 +903,7 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { } ++DefNo; } - for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { + for (unsigned i = 2, e = getNumOperands(); i != e; ++i) { const MachineOperand &FMO = getOperand(i); if (!FMO.isImm()) continue; @@ -926,7 +946,7 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { // Find the flag operand corresponding to UseOpIdx unsigned FlagIdx, NumOps=0; - for (FlagIdx = 1; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { + for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { const MachineOperand &UFMO = getOperand(FlagIdx); // After the normal asm operands there may be additional imp-def regs. if (!UFMO.isImm()) @@ -944,9 +964,9 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { if (!DefOpIdx) return true; - unsigned DefIdx = 1; - // Remember to adjust the index. First operand is asm string, then there - // is a flag for each. + unsigned DefIdx = 2; + // Remember to adjust the index. First operand is asm string, second is + // the AlignStack bit, then there is a flag for each. while (DefNo) { const MachineOperand &FMO = getOperand(DefIdx); assert(FMO.isImm()); @@ -1017,6 +1037,29 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) { } } +void MachineInstr::substituteRegister(unsigned FromReg, + unsigned ToReg, + unsigned SubIdx, + const TargetRegisterInfo &RegInfo) { + if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (SubIdx) + ToReg = RegInfo.getSubReg(ToReg, SubIdx); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substPhysReg(ToReg, RegInfo); + } + } else { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substVirtReg(ToReg, SubIdx, RegInfo); + } + } +} + /// isSafeToMove - Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. @@ -1168,6 +1211,28 @@ void MachineInstr::dump() const { dbgs() << " " << *this; } +static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, + raw_ostream &CommentOS) { + const LLVMContext &Ctx = MF->getFunction()->getContext(); + if (!DL.isUnknown()) { // Print source line info. + DIScope Scope(DL.getScope(Ctx)); + // Omit the directory, because it's likely to be long and uninteresting. + if (Scope.Verify()) + CommentOS << Scope.getFilename(); + else + CommentOS << "<unknown>"; + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, MF, CommentOS); + CommentOS << " ]"; + } + } +} + void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. const MachineFunction *MF = 0; @@ -1240,6 +1305,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "!\"" << MDS->getString() << '\"'; else MO.print(OS, TM); + } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { + OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm()); } else MO.print(OS, TM); } @@ -1265,19 +1332,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; - - // TODO: print InlinedAtLoc information - - DIScope Scope(debugLoc.getScope(MF->getFunction()->getContext())); OS << " dbg:"; - // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) - OS << Scope.getFilename(); - else - OS << "<unknown>"; - OS << ':' << debugLoc.getLine(); - if (debugLoc.getCol() != 0) - OS << ':' << debugLoc.getCol(); + printDebugLoc(debugLoc, MF, OS); } OS << "\n"; @@ -1418,6 +1474,25 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg, true /*IsImp*/)); } +void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs, + const TargetRegisterInfo &TRI) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + bool Dead = true; + for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(), + E = UsedRegs.end(); I != E; ++I) + if (TRI.regsOverlap(*I, Reg)) { + Dead = false; + break; + } + // If there are no uses, including partial uses, the def is dead. + if (Dead) MO.setIsDead(); + } +} + unsigned MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { unsigned Hash = MI->getOpcode() * 37; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 6120617..956d21c 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -62,6 +62,7 @@ namespace { // State that is updated as we process loops bool Changed; // True if a loop is changed. + bool FirstInLoop; // True if it's the first LICM in the loop. MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. @@ -82,7 +83,6 @@ namespace { const char *getPassName() const { return "Machine Instruction LICM"; } - // FIXME: Loop preheaders? virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<MachineLoopInfo>(); @@ -127,8 +127,8 @@ namespace { void AddToLiveIns(unsigned Reg); /// IsLICMCandidate - Returns true if the instruction may be a suitable - /// candidate for LICM. e.g. If the instruction is a call, then it's obviously - /// not safe to hoist it. + /// candidate for LICM. e.g. If the instruction is a call, then it's + /// obviously not safe to hoist it. bool IsLICMCandidate(MachineInstr &I); /// IsLoopInvariantInst - Returns true if the instruction is loop @@ -181,6 +181,10 @@ namespace { /// current loop preheader that may become duplicates of instructions that /// are hoisted out of the loop. void InitCSEMap(MachineBasicBlock *BB); + + /// getCurPreheader - Get the preheader for the current loop, splitting + /// a critical edge if needed. + MachineBasicBlock *getCurPreheader(); }; } // end anonymous namespace @@ -192,12 +196,17 @@ FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { return new MachineLICM(PreRegAlloc); } -/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most -/// loop that has a preheader. -static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { +/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most +/// loop that has a unique predecessor. +static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { + // Check whether this loop even has a unique predecessor. + if (!CurLoop->getLoopPredecessor()) + return false; + // Ok, now check to see if any of its outer loops do. for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) - if (L->getLoopPreheader()) + if (L->getLoopPredecessor()) return false; + // None of them did, so this is the outermost with a unique predecessor. return true; } @@ -207,7 +216,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { else DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n"); - Changed = false; + Changed = FirstInLoop = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); @@ -220,23 +229,17 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DT = &getAnalysis<MachineDominatorTree>(); AA = &getAnalysis<AliasAnalysis>(); - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){ - CurLoop = *I; + SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); + while (!Worklist.empty()) { + CurLoop = Worklist.pop_back_val(); + CurPreheader = 0; // If this is done before regalloc, only visit outer-most preheader-sporting // loops. - if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop)) - continue; - - // Determine the block to which to hoist instructions. If we can't find a - // suitable loop preheader, we can't do any hoisting. - // - // FIXME: We are only hoisting if the basic block coming into this loop - // has only one successor. This isn't the case in general because we haven't - // broken critical edges or added preheaders. - CurPreheader = CurLoop->getLoopPreheader(); - if (!CurPreheader) + if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { + Worklist.append(CurLoop->begin(), CurLoop->end()); continue; + } if (!PreRegAlloc) HoistRegionPostRA(); @@ -244,6 +247,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { // CSEMap is initialized for loop header when the first instruction is // being hoisted. MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); + FirstInLoop = true; HoistRegion(N); CSEMap.clear(); } @@ -436,13 +440,16 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { /// operands that is safe to hoist, this instruction is called to do the /// dirty work. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) return; + // Now move the instructions to the predecessor, inserting it before any // terminator instructions. DEBUG({ dbgs() << "Hoisting " << *MI; - if (CurPreheader->getBasicBlock()) + if (Preheader->getBasicBlock()) dbgs() << " to MachineBasicBlock " - << CurPreheader->getName(); + << Preheader->getName(); if (MI->getParent()->getBasicBlock()) dbgs() << " from MachineBasicBlock " << MI->getParent()->getName(); @@ -451,7 +458,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { // Splice the instruction to the preheader. MachineBasicBlock *MBB = MI->getParent(); - CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI); + Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); // Add register to livein list to all the BBs in the current loop since a // loop invariant must be kept live throughout the whole loop. This is @@ -490,26 +497,16 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) { /// candidate for LICM. e.g. If the instruction is a call, then it's obviously /// not safe to hoist it. bool MachineLICM::IsLICMCandidate(MachineInstr &I) { + // It is not profitable to hoist implicitdefs. FIXME: Why not? what if they + // are an argument to some other otherwise-hoistable instruction? if (I.isImplicitDef()) return false; - - const TargetInstrDesc &TID = I.getDesc(); - // Ignore stuff that we obviously can't hoist. - if (TID.mayStore() || TID.isCall() || TID.isTerminator() || - TID.hasUnmodeledSideEffects()) + // Check if it's safe to move the instruction. + bool DontMoveAcrossStore = true; + if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) return false; - - if (TID.mayLoad()) { - // Okay, this instruction does a load. As a refinement, we allow the target - // to decide whether the loaded value is actually a constant. If so, we can - // actually use it as a load. - if (!I.isInvariantLoad(AA)) - // FIXME: we should be able to hoist loads with no other side effects if - // there are no other instructions which can change memory in this loop. - // This is a trivial form of alias analysis. - return false; - } + return true; } @@ -754,6 +751,9 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, /// that are safe to hoist, this instruction is called to do the dirty work. /// void MachineLICM::Hoist(MachineInstr *MI) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) return; + // First check whether we should hoist this instruction. if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { // If not, try unfolding a hoistable load. @@ -765,9 +765,9 @@ void MachineLICM::Hoist(MachineInstr *MI) { // terminator instructions. DEBUG({ dbgs() << "Hoisting " << *MI; - if (CurPreheader->getBasicBlock()) + if (Preheader->getBasicBlock()) dbgs() << " to MachineBasicBlock " - << CurPreheader->getName(); + << Preheader->getName(); if (MI->getParent()->getBasicBlock()) dbgs() << " from MachineBasicBlock " << MI->getParent()->getName(); @@ -776,7 +776,10 @@ void MachineLICM::Hoist(MachineInstr *MI) { // If this is the first instruction being hoisted to the preheader, // initialize the CSE map with potential common expressions. - InitCSEMap(CurPreheader); + if (FirstInLoop) { + InitCSEMap(Preheader); + FirstInLoop = false; + } // Look for opportunity to CSE the hoisted instruction. unsigned Opcode = MI->getOpcode(); @@ -784,7 +787,7 @@ void MachineLICM::Hoist(MachineInstr *MI) { CI = CSEMap.find(Opcode); if (!EliminateCSE(MI, CI)) { // Otherwise, splice the instruction to the preheader. - CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI); + Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); // Clear the kill flags of any register this instruction defines, // since they may need to be live throughout the entire loop @@ -808,3 +811,30 @@ void MachineLICM::Hoist(MachineInstr *MI) { ++NumHoisted; Changed = true; } + +MachineBasicBlock *MachineLICM::getCurPreheader() { + // Determine the block to which to hoist instructions. If we can't find a + // suitable loop predecessor, we can't do any hoisting. + + // If we've tried to get a preheader and failed, don't try again. + if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) + return 0; + + if (!CurPreheader) { + CurPreheader = CurLoop->getLoopPreheader(); + if (!CurPreheader) { + MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); + if (!Pred) { + CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); + return 0; + } + + CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); + if (!CurPreheader) { + CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); + return 0; + } + } + } + return CurPreheader; +} diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 70bf7e5..5d852f2 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -20,7 +20,7 @@ using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1. + RegClass2VRegMap = new std::vector<unsigned>[TRI.getNumRegClasses()]; UsedPhysRegs.resize(TRI.getNumRegs()); // Create the physreg use/def lists. @@ -37,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() { "PhysRegUseDefLists has entries after all instructions are deleted"); #endif delete [] PhysRegUseDefLists; + delete [] RegClass2VRegMap; } /// setRegClass - Set the register class of the specified virtual register. @@ -52,7 +53,7 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { // Remove from old register class's vregs list. This may be slow but // fortunately this operation is rarely needed. std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()]; - std::vector<unsigned>::iterator I=std::find(VRegs.begin(), VRegs.end(), VR); + std::vector<unsigned>::iterator I = std::find(VRegs.begin(), VRegs.end(), VR); VRegs.erase(I); // Add to new register class's vregs list. @@ -174,115 +175,36 @@ unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const { return 0; } -static cl::opt<bool> -SchedLiveInCopies("schedule-livein-copies", cl::Hidden, - cl::desc("Schedule copies of livein registers"), - cl::init(false)); - -/// EmitLiveInCopy - Emit a copy for a live in physical register. If the -/// physical register has only a single copy use, then coalesced the copy -/// if possible. -static void EmitLiveInCopy(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &InsertPos, - unsigned VirtReg, unsigned PhysReg, - const TargetRegisterClass *RC, - DenseMap<MachineInstr*, unsigned> &CopyRegMap, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const TargetInstrInfo &TII) { - unsigned NumUses = 0; - MachineInstr *UseMI = NULL; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg), - UE = MRI.use_end(); UI != UE; ++UI) { - UseMI = &*UI; - if (++NumUses > 1) - break; - } - - // If the number of uses is not one, or the use is not a move instruction, - // don't coalesce. Also, only coalesce away a virtual register to virtual - // register copy. - bool Coalesced = false; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (NumUses == 1 && - TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - TargetRegisterInfo::isVirtualRegister(DstReg)) { - VirtReg = DstReg; - Coalesced = true; - } - - // Now find an ideal location to insert the copy. - MachineBasicBlock::iterator Pos = InsertPos; - while (Pos != MBB->begin()) { - MachineInstr *PrevMI = prior(Pos); - DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI); - // copyRegToReg might emit multiple instructions to do a copy. - unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second; - if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg)) - // This is what the BB looks like right now: - // r1024 = mov r0 - // ... - // r1 = mov r1024 - // - // We want to insert "r1025 = mov r1". Inserting this copy below the - // move to r1024 makes it impossible for that move to be coalesced. - // - // r1025 = mov r1 - // r1024 = mov r0 - // ... - // r1 = mov 1024 - // r2 = mov 1025 - break; // Woot! Found a good location. - --Pos; - } - - bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC, - DebugLoc()); - assert(Emitted && "Unable to issue a live-in copy instruction!\n"); - (void) Emitted; - - CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); - if (Coalesced) { - if (&*InsertPos == UseMI) ++InsertPos; - MBB->erase(UseMI); - } -} - /// EmitLiveInCopies - Emit copies to initialize livein virtual registers /// into the given entry block. void MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII) { - if (SchedLiveInCopies) { - // Emit the copies at a heuristically-determined location in the block. - DenseMap<MachineInstr*, unsigned> CopyRegMap; - MachineBasicBlock::iterator InsertPos = EntryMBB->begin(); - for (MachineRegisterInfo::livein_iterator LI = livein_begin(), - E = livein_end(); LI != E; ++LI) - if (LI->second) { - const TargetRegisterClass *RC = getRegClass(LI->second); - EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first, - RC, CopyRegMap, *this, TRI, TII); + // Emit the copies into the top of the block. + for (unsigned i = 0, e = LiveIns.size(); i != e; ++i) + if (LiveIns[i].second) { + if (use_empty(LiveIns[i].second)) { + // The livein has no uses. Drop it. + // + // It would be preferable to have isel avoid creating live-in + // records for unused arguments in the first place, but it's + // complicated by the debug info code for arguments. + LiveIns.erase(LiveIns.begin() + i); + --i; --e; + } else { + // Emit a copy. + BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(), + TII.get(TargetOpcode::COPY), LiveIns[i].second) + .addReg(LiveIns[i].first); + + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); } - } else { - // Emit the copies into the top of the block. - for (MachineRegisterInfo::livein_iterator LI = livein_begin(), - E = livein_end(); LI != E; ++LI) - if (LI->second) { - const TargetRegisterClass *RC = getRegClass(LI->second); - bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), - LI->second, LI->first, RC, RC, - DebugLoc()); - assert(Emitted && "Unable to issue a live-in copy instruction!\n"); - (void) Emitted; - } - } - - // Add function live-ins to entry block live-in set. - for (MachineRegisterInfo::livein_iterator I = livein_begin(), - E = livein_end(); I != E; ++I) - EntryMBB->addLiveIn(I->first); + } else { + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); + } } void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) { diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 1610e6c..61334fc 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This pass moves instructions into successor blocks, when possible, so that +// This pass moves instructions into successor blocks when possible, so that // they aren't executed on paths where their results aren't needed. // // This pass is not intended to be a replacement or a complete alternative @@ -45,9 +45,9 @@ namespace { public: static char ID; // Pass identification MachineSinking() : MachineFunctionPass(&ID) {} - + virtual bool runOnMachineFunction(MachineFunction &MF); - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); @@ -63,7 +63,7 @@ namespace { bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const; }; } // end anonymous namespace - + char MachineSinking::ID = 0; static RegisterPass<MachineSinking> X("machine-sink", "Machine code sinking"); @@ -72,7 +72,7 @@ FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); } /// AllUsesDominatedByBlock - Return true if all uses of the specified register /// occur in blocks dominated by the specified block. -bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, +bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); @@ -80,27 +80,30 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // This may leave a referencing dbg_value in the original block, before // the definition of the vreg. Dwarf generator handles this although the // user might not get the right info at runtime. - for (MachineRegisterInfo::use_nodbg_iterator I = - RegInfo->use_nodbg_begin(Reg), - E = RegInfo->use_nodbg_end(); I != E; ++I) { + for (MachineRegisterInfo::use_nodbg_iterator + I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end(); + I != E; ++I) { // Determine the block of the use. MachineInstr *UseInst = &*I; MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseInst->isPHI()) { // PHI nodes use the operand in the predecessor block, not the block with // the PHI. UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); } + // Check that it dominates. if (!DT->dominates(MBB, UseBlock)) return false; } + return true; } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** Machine Sinking ********\n"); - + const TargetMachine &TM = MF.getTarget(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); @@ -111,19 +114,19 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { AllocatableSet = TRI->getAllocatableSet(MF); bool EverMadeChange = false; - + while (1) { bool MadeChange = false; // Process all basic blocks. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) MadeChange |= ProcessBlock(*I); - + // If this iteration over the code changed anything, keep iterating. if (!MadeChange) break; EverMadeChange = true; - } + } return EverMadeChange; } @@ -132,8 +135,8 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (MBB.succ_size() <= 1 || MBB.empty()) return false; // Don't bother sinking code out of unreachable blocks. In addition to being - // unprofitable, it can also lead to infinite looping, because in an unreachable - // loop there may be nowhere to stop. + // unprofitable, it can also lead to infinite looping, because in an + // unreachable loop there may be nowhere to stop. if (!DT->isReachableFromEntry(&MBB)) return false; bool MadeChange = false; @@ -144,7 +147,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { bool ProcessedBegin, SawStore = false; do { MachineInstr *MI = I; // The instruction to sink. - + // Predecrement I (if it's not begin) so that it isn't invalidated by // sinking. ProcessedBegin = I == MBB.begin(); @@ -156,10 +159,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (SinkInstruction(MI, SawStore)) ++NumSunk, MadeChange = true; - + // If we just processed the first instruction in the block, we're done. } while (!ProcessedBegin); - + return MadeChange; } @@ -169,7 +172,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Check if it's safe to move the instruction. if (!MI->isSafeToMove(TII, AA, SawStore)) return false; - + // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to @@ -177,22 +180,22 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. - + // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. MachineBasicBlock *ParentBlock = MI->getParent(); - + // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = 0; - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. - + unsigned Reg = MO.getReg(); if (Reg == 0) continue; - + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register @@ -200,13 +203,16 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // it could get allocated to something with a def during allocation. if (!RegInfo->def_empty(Reg)) return false; + if (AllocatableSet.test(Reg)) return false; + // Check for a def among the register's aliases too. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; if (!RegInfo->def_empty(AliasReg)) return false; + if (AllocatableSet.test(AliasReg)) return false; } @@ -221,28 +227,31 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // If it's not safe to move defs of the register class, then abort. if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg))) return false; - + // FIXME: This picks a successor to sink into based on having one // successor that dominates all the uses. However, there are cases where // sinking can happen but where the sink point isn't a successor. For // example: + // // x = computation // if () {} else {} // use x - // the instruction could be sunk over the whole diamond for the + // + // the instruction could be sunk over the whole diamond for the // if/then/else (or loop, etc), allowing it to be sunk into other blocks // after that. - + // Virtual register defs can only be sunk if all their uses are in blocks // dominated by one of the successors. if (SuccToSinkTo) { // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. - if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo)) + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo)) return false; + continue; } - + // Otherwise, we should look at all the successors and decide which one // we should sink to. for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), @@ -252,13 +261,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { break; } } - + // If we couldn't find a block to sink to, ignore this instruction. if (SuccToSinkTo == 0) return false; } } - + // If there are no outputs, it must have side-effects. if (SuccToSinkTo == 0) return false; @@ -267,15 +276,26 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // landing pad is implicitly defined. if (SuccToSinkTo->isLandingPad()) return false; - + // It is not possible to sink an instruction into its own block. This can // happen with loops. if (MI->getParent() == SuccToSinkTo) return false; - - DEBUG(dbgs() << "Sink instr " << *MI); - DEBUG(dbgs() << "to block " << *SuccToSinkTo); - + + // If the instruction to move defines a dead physical register which is live + // when leaving the basic block, don't move it because it could turn into a + // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (SuccToSinkTo->isLiveIn(Reg)) + return false; + } + + DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); + // If the block has multiple predecessors, this would introduce computation on // a path that it doesn't already exist. We could split the critical edge, // but for now we just punt. @@ -305,18 +325,18 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Otherwise we are OK with sinking along a critical edge. DEBUG(dbgs() << "Sinking along critical edge.\n"); } - - // Determine where to insert into. Skip phi nodes. + + // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; - + // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); - // Conservatively, clear any kill flags, since it's possible that - // they are no longer correct. + // Conservatively, clear any kill flags, since it's possible that they are no + // longer correct. MI->clearKillInfo(); return true; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 8baf01c..2297c90 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -390,7 +390,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && MBB->back().getDesc().isBarrier()) { + if (!MBB->empty() && MBB->back().getDesc().isBarrier() && + !TII->isPredicated(&MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp index 41fc204..dcdc243 100644 --- a/lib/CodeGen/OptimizeExts.cpp +++ b/lib/CodeGen/OptimizeExts.cpp @@ -118,6 +118,26 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; } + // It's an error to translate this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 + // + // into this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1027 = COPY %reg1025:4 + // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 + // + // The problem here is that SUBREG_TO_REG is there to assert that an + // implicit zext occurs. It doesn't insert a zext instruction. If we allow + // the COPY here, it will give us the value after the <sext>, + // not the original value of %reg1024 before <sext>. + if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) + continue; + MachineBasicBlock *UseMBB = UseMI->getParent(); if (UseMBB == MBB) { // Local uses that come after the extension. @@ -165,8 +185,8 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; unsigned NewVR = MRI->createVirtualRegister(RC); BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::EXTRACT_SUBREG), NewVR) - .addReg(DstReg).addImm(SubIdx); + TII->get(TargetOpcode::COPY), NewVR) + .addReg(DstReg, 0, SubIdx); UseMO->setReg(NewVR); ++NumReuse; Changed = true; diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 2717d4d..1613fe2 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -107,6 +107,11 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, SrcSubIdx == 0 && DstSubIdx == 0 && TargetRegisterInfo::isVirtualRegister(MvSrcReg)) SrcMI = MRI->getVRegDef(MvSrcReg); + else if (SrcMI && SrcMI->isCopy() && + !SrcMI->getOperand(0).getSubReg() && + !SrcMI->getOperand(1).getSubReg() && + TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) + SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); if (!SrcMI) return false; diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h index bd18b52..02938df 100644 --- a/lib/CodeGen/PBQP/HeuristicSolver.h +++ b/lib/CodeGen/PBQP/HeuristicSolver.h @@ -406,7 +406,7 @@ namespace PBQP { // Create node data objects. for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); - nItr != nEnd; ++nItr) { + nItr != nEnd; ++nItr) { nodeDataList.push_back(NodeData()); g.setNodeData(nItr, &nodeDataList.back()); } diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h index 30d34d9..4c1ce11 100644 --- a/lib/CodeGen/PBQP/Heuristics/Briggs.h +++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -18,7 +18,6 @@ #ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H #define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H -#include "llvm/Support/Compiler.h" #include "../HeuristicSolver.h" #include "../HeuristicBase.h" @@ -267,8 +266,8 @@ namespace PBQP { if (!nd.isHeuristic) return; - EdgeData &ed ATTRIBUTE_UNUSED = getHeuristicEdgeData(eItr); - + EdgeData &ed = getHeuristicEdgeData(eItr); + (void)ed; assert(ed.isUpToDate && "Edge data is not up to date."); // Update node. diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index edbc13f..ea6b094 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -34,7 +34,6 @@ using namespace llvm; STATISTIC(NumAtomic, "Number of atomic phis lowered"); -STATISTIC(NumSplits, "Number of critical edges split on demand"); STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; @@ -184,7 +183,6 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Create a new register for the incoming PHI arguments. MachineFunction &MF = *MBB.getParent(); - const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); unsigned IncomingReg = 0; bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? @@ -208,10 +206,12 @@ void llvm::PHIElimination::LowerAtomicPHINode( ++NumReused; DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); } else { + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } - TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC, - MPhi->getDebugLoc()); + BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), DestReg) + .addReg(IncomingReg); } // Update live variable information if there is any. @@ -293,8 +293,8 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Insert the copy. if (!reusedIncoming && IncomingReg) - TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC, - MPhi->getDebugLoc()); + BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg); // Now update live variable information if we have it. Otherwise we're done if (!LV) continue; @@ -391,57 +391,8 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, // (not considering PHI nodes). If the register is live in to this block // anyway, we would gain nothing from splitting. if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) - SplitCriticalEdge(PreMBB, &MBB); + PreMBB->SplitCriticalEdge(&MBB, this); } } return true; } - -MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, - MachineBasicBlock *B) { - assert(A && B && "Missing MBB end point"); - - MachineFunction *MF = A->getParent(); - - // We may need to update A's terminator, but we can't do that if AnalyzeBranch - // fails. If A uses a jump table, we won't touch it. - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - if (TII->AnalyzeBranch(*A, TBB, FBB, Cond)) - return NULL; - - ++NumSplits; - - MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); - MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB); - DEBUG(dbgs() << "PHIElimination splitting critical edge:" - " BB#" << A->getNumber() - << " -- BB#" << NMBB->getNumber() - << " -- BB#" << B->getNumber() << '\n'); - - A->ReplaceUsesOfBlockWith(B, NMBB); - A->updateTerminator(); - - // Insert unconditional "jump B" instruction in NMBB if necessary. - NMBB->addSuccessor(B); - if (!NMBB->isLayoutSuccessor(B)) { - Cond.clear(); - MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond); - } - - // Fix PHI nodes in B so they refer to NMBB instead of A - for (MachineBasicBlock::iterator i = B->begin(), e = B->end(); - i != e && i->isPHI(); ++i) - for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) - if (i->getOperand(ni+1).getMBB() == A) - i->getOperand(ni+1).setMBB(NMBB); - - if (LiveVariables *LV=getAnalysisIfAvailable<LiveVariables>()) - LV->addNewBlock(NMBB, A, B); - - if (MachineDominatorTree *MDT=getAnalysisIfAvailable<MachineDominatorTree>()) - MDT->addNewBlock(NMBB, A); - - return NMBB; -} diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 5ea2941..3489db2 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -24,6 +24,11 @@ using namespace llvm; //===---------------------------------------------------------------------===// MachinePassRegistry RegisterRegAlloc::Registry; +static FunctionPass *createDefaultRegisterAllocator() { return 0; } +static RegisterRegAlloc +defaultRegAlloc("default", + "pick register allocator based on -O option", + createDefaultRegisterAllocator); //===---------------------------------------------------------------------===// /// @@ -33,8 +38,8 @@ MachinePassRegistry RegisterRegAlloc::Registry; static cl::opt<RegisterRegAlloc::FunctionPassCtor, false, RegisterPassParser<RegisterRegAlloc> > RegAlloc("regalloc", - cl::init(&createLinearScanRegisterAllocator), - cl::desc("Register allocator to use (default=linearscan)")); + cl::init(&createDefaultRegisterAllocator), + cl::desc("Register allocator to use")); //===---------------------------------------------------------------------===// @@ -42,13 +47,22 @@ RegAlloc("regalloc", /// createRegisterAllocator - choose the appropriate register allocator. /// //===---------------------------------------------------------------------===// -FunctionPass *llvm::createRegisterAllocator() { +FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); - + if (!Ctor) { Ctor = RegAlloc; RegisterRegAlloc::setDefault(RegAlloc); } - - return Ctor(); + + if (Ctor != createDefaultRegisterAllocator) + return Ctor(); + + // When the 'default' allocator is requested, pick one based on OptLevel. + switch (OptLevel) { + case CodeGenOpt::None: + return createFastRegisterAllocator(); + default: + return createLinearScanRegisterAllocator(); + } } diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index af5f289..cbde2b0 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -1,4 +1,4 @@ -//===----- ExactHazardRecognizer.cpp - hazard recognizer -------- ---------===// +//===----- PostRAHazardRecognizer.cpp - hazard recognizer -------- ---------===// // // The LLVM Compiler Infrastructure // @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "post-RA-sched" -#include "ExactHazardRecognizer.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/PostRAHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -22,10 +22,9 @@ using namespace llvm; -ExactHazardRecognizer:: -ExactHazardRecognizer(const InstrItineraryData &LItinData) : - ScheduleHazardRecognizer(), ItinData(LItinData) -{ +PostRAHazardRecognizer:: +PostRAHazardRecognizer(const InstrItineraryData &LItinData) : + ScheduleHazardRecognizer(), ItinData(LItinData) { // Determine the maximum depth of any itinerary. This determines the // depth of the scoreboard. We always make the scoreboard at least 1 // cycle deep to avoid dealing with the boundary condition. @@ -48,16 +47,16 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) : ReservedScoreboard.reset(ScoreboardDepth); RequiredScoreboard.reset(ScoreboardDepth); - DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " + DEBUG(dbgs() << "Using post-ra hazard recognizer: ScoreboardDepth = " << ScoreboardDepth << '\n'); } -void ExactHazardRecognizer::Reset() { +void PostRAHazardRecognizer::Reset() { RequiredScoreboard.reset(); ReservedScoreboard.reset(); } -void ExactHazardRecognizer::ScoreBoard::dump() const { +void PostRAHazardRecognizer::ScoreBoard::dump() const { dbgs() << "Scoreboard:\n"; unsigned last = Depth - 1; @@ -73,7 +72,8 @@ void ExactHazardRecognizer::ScoreBoard::dump() const { } } -ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU) { +ScheduleHazardRecognizer::HazardType +PostRAHazardRecognizer::getHazardType(SUnit *SU) { if (ItinData.isEmpty()) return NoHazard; @@ -120,7 +120,7 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU return NoHazard; } -void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { +void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) { if (ItinData.isEmpty()) return; @@ -174,7 +174,7 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { DEBUG(RequiredScoreboard.dump()); } -void ExactHazardRecognizer::AdvanceCycle() { +void PostRAHazardRecognizer::AdvanceCycle() { ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 9714ea6..4af8e07 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -22,8 +22,6 @@ #include "AntiDepBreaker.h" #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" -#include "ExactHazardRecognizer.h" -#include "SimpleHazardRecognizer.h" #include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" @@ -65,10 +63,6 @@ EnableAntiDepBreaking("break-anti-dependencies", cl::desc("Break post-RA scheduling anti-dependencies: " "\"critical\", \"all\", or \"none\""), cl::init("none"), cl::Hidden); -static cl::opt<bool> -EnablePostRAHazardAvoidance("avoid-hazards", - cl::desc("Enable exact hazard avoidance"), - cl::init(true), cl::Hidden); // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod static cl::opt<int> @@ -85,6 +79,7 @@ AntiDepBreaker::~AntiDepBreaker() { } namespace { class PostRAScheduler : public MachineFunctionPass { AliasAnalysis *AA; + const TargetInstrInfo *TII; CodeGenOpt::Level OptLevel; public: @@ -187,30 +182,9 @@ namespace { }; } -/// isSchedulingBoundary - Test if the given instruction should be -/// considered a scheduling boundary. This primarily includes labels -/// and terminators. -/// -static bool isSchedulingBoundary(const MachineInstr *MI, - const MachineFunction &MF) { - // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) - return true; - - // Don't attempt to schedule around any instruction that defines - // a stack-oriented pointer, as it's unlikely to be profitable. This - // saves compile time, because it doesn't require every single - // stack slot reference to depend on the instruction that does the - // modification. - const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); - if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore())) - return true; - - return false; -} - bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { AA = &getAnalysis<AliasAnalysis>(); + TII = Fn.getTarget().getInstrInfo(); // Check for explicit enable/disable of post-ra scheduling. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; @@ -237,10 +211,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); - const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData(); - ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? - (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : - (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); + const TargetMachine &TM = Fn.getTarget(); + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + ScheduleHazardRecognizer *HR = + TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins); AntiDepBreaker *ADB = ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : @@ -271,8 +245,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock::iterator Current = MBB->end(); unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { - MachineInstr *MI = prior(I); - if (isSchedulingBoundary(MI, Fn)) { + MachineInstr *MI = llvm::prior(I); + if (TII->isSchedulingBoundary(MI, MBB, Fn)) { Scheduler.Run(MBB, I, Current, CurrentCount); Scheduler.EmitSchedule(); Current = MI; @@ -680,15 +654,6 @@ void SchedulePostRATDList::ListScheduleTopDown() { ScheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); CycleHasInsts = true; - - // If we are using the target-specific hazards, then don't - // advance the cycle time just because we schedule a node. If - // the target allows it we can schedule multiple nodes in the - // same cycle. - if (!EnablePostRAHazardAvoidance) { - if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! - ++CurCycle; - } } else { if (CycleHasInsts) { DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 96e7327..fb2f909 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -512,9 +512,6 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI)); // FIXME: Need to set kills properly for inter-block stuff. - if (RetVNI->isKill(UseIndex)) RetVNI->removeKill(UseIndex); - if (IsIntraBlock) - RetVNI->addKill(EndIndex); } else if (ContainsDefs && ContainsUses) { SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; @@ -556,12 +553,6 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, NewVNs, LiveOut, Phis, false, true); LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - - if (foundUse && RetVNI->isKill(StartIndex)) - RetVNI->removeKill(StartIndex); - if (IsIntraBlock) { - RetVNI->addKill(EndIndex); - } } // Memoize results so we don't have to recompute them. @@ -636,9 +627,6 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I = IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { I->second->setHasPHIKill(true); - SlotIndex KillIndex(LIs->getMBBEndIdx(I->first), true); - if (!I->second->isKill(KillIndex)) - I->second->addKill(KillIndex); } } @@ -648,8 +636,6 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us } else EndIndex = LIs->getMBBEndIdx(MBB); LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - if (IsIntraBlock) - RetVNI->addKill(EndIndex); // Memoize results so we don't have to recompute them. if (!IsIntraBlock) @@ -691,10 +677,12 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { // If the def is a move, set the copy field. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { if (DstReg == LI->reg) NewVN->setCopy(&*DI); - + } else if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) + NewVN->setCopy(&*DI); + NewVNs[&*DI] = NewVN; } @@ -725,25 +713,6 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { VNInfo* DeadVN = NewVNs[&*DI]; LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN)); - DeadVN->addKill(DefIdx); - } - - // Update kill markers. - for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); - VI != VE; ++VI) { - VNInfo* VNI = *VI; - for (unsigned i = 0, e = VNI->kills.size(); i != e; ++i) { - SlotIndex KillIdx = VNI->kills[i]; - if (KillIdx.isPHI()) - continue; - MachineInstr *KillMI = LIs->getInstructionFromIndex(KillIdx); - if (KillMI) { - MachineOperand *KillMO = KillMI->findRegisterUseOperand(CurrLI->reg); - if (KillMO) - // It could be a dead def. - KillMO->setIsKill(); - } - } } } @@ -773,19 +742,14 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { VNsToCopy.push_back(OldVN); // Locate two-address redefinitions - for (VNInfo::KillSet::iterator KI = OldVN->kills.begin(), - KE = OldVN->kills.end(); KI != KE; ++KI) { - assert(!KI->isPHI() && - "VN previously reported having no PHI kills."); - MachineInstr* MI = LIs->getInstructionFromIndex(*KI); - unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg); - if (DefIdx == ~0U) continue; - if (MI->isRegTiedToUseOperand(DefIdx)) { - VNInfo* NextVN = - CurrLI->findDefinedVNInfoForRegInt(KI->getDefIndex()); - if (NextVN == OldVN) continue; + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg), + DE = MRI->def_end(); DI != DE; ++DI) { + if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue; + SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex(); + VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx); + if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) != + VNsToCopy.end()) Stack.push_back(NextVN); - } } } @@ -836,7 +800,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { if (IntervalSSMap.count(CurrLI->reg)) IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg]; - NumRenumbers++; + ++NumRenumbers; } bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, @@ -854,7 +818,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, if (KillPt == DefMI->getParent()->end()) return false; - TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, TRI); + TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI); SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt)); ReconstructLiveInterval(CurrLI); @@ -899,12 +863,11 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); } - MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), - FoldPt, Ops, SS); + MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); if (FMI) { LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FMI = MBB->insert(MBB->erase(FoldPt), FMI); + FoldPt->eraseFromParent(); ++NumFolds; IntervalSSMap[vreg] = SS; @@ -980,12 +943,11 @@ MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, if (!TII->canFoldMemoryOperand(FoldPt, Ops)) return 0; - MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), - FoldPt, Ops, SS); + MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); if (FMI) { LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FMI = MBB->insert(MBB->erase(FoldPt), FMI); + FoldPt->eraseFromParent(); ++NumRestoreFolds; } @@ -1192,7 +1154,7 @@ unsigned PreAllocSplitting::getNumberOfNonSpills( int StoreFrameIndex; unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); if (StoreVReg != Reg || StoreFrameIndex != FrameIndex) - NonSpills++; + ++NonSpills; int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg); if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx)) @@ -1255,7 +1217,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { (*LI)->removeValNo(CurrVN); DefMI->eraseFromParent(); VNUseCount.erase(CurrVN); - NumDeadSpills++; + ++NumDeadSpills; changed = true; continue; } @@ -1291,9 +1253,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { Ops.push_back(OpIdx); if (!TII->canFoldMemoryOperand(use, Ops)) continue; - MachineInstr* NewMI = - TII->foldMemoryOperand(*use->getParent()->getParent(), - use, Ops, FrameIndex); + MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex); if (!NewMI) continue; @@ -1303,10 +1263,9 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { (*LI)->removeValNo(CurrVN); DefMI->eraseFromParent(); - MachineBasicBlock* MBB = use->getParent(); - NewMI = MBB->insert(MBB->erase(use), NewMI); + use->eraseFromParent(); VNUseCount[CurrVN].erase(use); - + // Remove deleted instructions. Note that we need to remove them from // the VNInfo->use map as well, just to be safe. for (SmallPtrSet<MachineInstr*, 4>::iterator II = @@ -1328,7 +1287,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { if (VI->second.erase(use)) VI->second.insert(NewMI); - NumDeadSpills++; + ++NumDeadSpills; changed = true; continue; } @@ -1350,7 +1309,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { LIs->RemoveMachineInstrFromMaps(DefMI); (*LI)->removeValNo(CurrVN); DefMI->eraseFromParent(); - NumDeadSpills++; + ++NumDeadSpills; changed = true; } } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 62f525f..ca4c477 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -46,14 +46,14 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, const TargetInstrInfo *tii_) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) + Reg == SrcReg && DstSubReg == 0) return true; - if (OpIdx == 2 && MI->isSubregToReg()) - return true; - if (OpIdx == 1 && MI->isExtractSubreg()) - return true; - return false; + switch(OpIdx) { + case 1: return MI->isCopy() && MI->getOperand(0).getSubReg() == 0; + case 2: return MI->isSubregToReg() && MI->getOperand(0).getSubReg() == 0; + default: return false; + } } /// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure @@ -101,11 +101,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { continue; } - if (MI->isInsertSubreg()) { - MachineOperand &MO = MI->getOperand(2); + // Eliminate %reg1032:sub<def> = COPY undef. + if (MI->isCopy() && MI->getOperand(0).getSubReg()) { + MachineOperand &MO = MI->getOperand(1); if (ImpDefRegs.count(MO.getReg())) { - // %reg1032<def> = INSERT_SUBREG %reg1032, undef, 2 - // This is an identity copy, eliminate it now. if (MO.isKill()) { LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg()); vi.removeKill(MI); @@ -119,7 +118,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) + if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if (!Reg) @@ -144,6 +143,12 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { Changed = true; MO.setIsUndef(); + // This is a partial register redef of an implicit def. + // Make sure the whole register is defined by the instruction. + if (MO.isDef()) { + MI->addRegisterDefined(Reg); + continue; + } if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { // Make sure other uses of for (unsigned j = i+1; j != e; ++j) { @@ -219,8 +224,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // Turn a copy use into an implicit_def. unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) { + if ((RMI->isCopy() && RMI->getOperand(1).getReg() == Reg && + RMI->getOperand(0).getSubReg() == 0) || + (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + Reg == SrcReg && DstSubReg == 0)) { RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index e778024..3843b25 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -158,9 +158,9 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { AdjustsStack = true; FrameSDOps.push_back(I); } else if (I->isInlineAsm()) { - // An InlineAsm might be a call; assume it is to get the stack frame - // aligned correctly for calls. - AdjustsStack = true; + // Some inline asm's need a stack frame, as indicated by operand 1. + if (I->getOperand(1).getImm()) + AdjustsStack = true; } MFI->setAdjustsStack(AdjustsStack); @@ -202,22 +202,17 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) return; - // Figure out which *callee saved* registers are modified by the current - // function, thus needing to be saved and restored in the prolog/epilog. - const TargetRegisterClass * const *CSRegClasses = - RegInfo->getCalleeSavedRegClasses(&Fn); - std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (Fn.getRegInfo().isPhysRegUsed(Reg)) { // If the reg is modified, save it! - CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + CSI.push_back(CalleeSavedInfo(Reg)); } else { for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); *AliasSet; ++AliasSet) { // Check alias registers too. if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { - CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + CSI.push_back(CalleeSavedInfo(Reg)); break; } } @@ -236,7 +231,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Reg = I->getReg(); - const TargetRegisterClass *RC = I->getRegClass(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); int FrameIdx; if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { @@ -265,8 +260,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, - true, false); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); } I->setFrameIdx(FrameIdx); @@ -303,8 +297,10 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. - TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true, - CSI[i].getFrameIdx(), CSI[i].getRegClass(),TRI); + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, + CSI[i].getFrameIdx(), RC, TRI); } } @@ -328,9 +324,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // terminators that preceed it. if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), - CSI[i].getRegClass(), TRI); + RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -374,10 +372,12 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { MBB->addLiveIn(blockCSI[i].getReg()); // Insert the spill to the stack frame. - TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*MBB, I, Reg, true, blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass(), TRI); + RC, TRI); } } @@ -423,9 +423,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Restore all registers immediately before the return and any // terminators that preceed it. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { - TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass(), TRI); + RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -639,6 +641,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { +#ifndef NDEBUG + int SPAdjCount = 0; // frame setup / destroy count. +#endif int SPAdj = 0; // SP offset due to call frame setup / destroy. if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); @@ -646,6 +651,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { +#ifndef NDEBUG + // Track whether we see even pairs of them + SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; +#endif // Remember how much SP has been adjusted to create the call // frame. int Size = I->getOperand(0).getImm(); @@ -712,7 +721,13 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } - assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); + // If we have evenly matched pairs of frame setup / destroy instructions, + // make sure the adjustments come out to zero. If we don't have matched + // pairs, we can't be sure the missing bit isn't in another basic block + // due to a custom inserter playing tricks, so just asserting SPAdj==0 + // isn't sufficient. See tMOVCC on Thumb1, for example. + assert((SPAdjCount || SPAdj == 0) && + "Unbalanced call frame setup / destroy pairs?"); } } @@ -870,11 +885,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Scavenge a new scratch register CurrentVirtReg = Reg; const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - CurrentScratchReg = RS->FindUnusedReg(RC); - if (CurrentScratchReg == 0) - // No register is "free". Scavenge a register. - CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); - + CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); PrevValue = Value; } // replace this reference to the virtual register with the diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index b3b5760..f44478e 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -110,6 +110,11 @@ namespace { // Allocatable - vector of allocatable physical registers. BitVector Allocatable; + // SkippedInstrs - Descriptors of instructions whose clobber list was ignored + // because all registers were spilled. It is still necessary to mark all the + // clobbered registers as used by the function. + SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs; + // isBulkSpilling - This flag is set when LiveRegMap will be cleared // completely after spilling all live registers. LiveRegMap entries should // not be erased. @@ -135,6 +140,8 @@ namespace { private: bool runOnMachineFunction(MachineFunction &Fn); void AllocateBasicBlock(); + void handleThroughOperands(MachineInstr *MI, + SmallVectorImpl<unsigned> &VirtDead); int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); bool isLastUseOfLocalReg(MachineOperand&); @@ -508,27 +515,20 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, bool New; tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); LiveReg &LR = LRI->second; - bool PartialRedef = MI->getOperand(OpNum).getSubReg(); if (New) { // If there is no hint, peek at the only use of this register. if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { + const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; // It's a copy, use the destination register as a hint. - if (TII->isMoveInstr(*MRI->use_nodbg_begin(VirtReg), - SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (UseMI.isCopyLike()) + Hint = UseMI.getOperand(0).getReg(); + else if (TII->isMoveInstr(UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) Hint = DstReg; } allocVirtReg(MI, *LRI, Hint); - // If this is only a partial redefinition, we must reload the other parts. - if (PartialRedef && MI->readsVirtualRegister(VirtReg)) { - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - int FI = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << "Reloading for partial redef: %reg" << VirtReg << "\n"); - TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FI, RC, TRI); - ++NumLoads; - } - } else if (LR.LastUse && !PartialRedef) { + } else if (LR.LastUse) { // Redefining a live register - kill at the last use, unless it is this // instruction defining VirtReg multiple times. if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse()) @@ -564,10 +564,16 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, } else if (LR.Dirty) { if (isLastUseOfLocalReg(MO)) { DEBUG(dbgs() << "Killing last use: " << MO << "\n"); - MO.setIsKill(); + if (MO.isUse()) + MO.setIsKill(); + else + MO.setIsDead(); } else if (MO.isKill()) { DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); + MO.setIsDead(false); } } else if (MO.isKill()) { // We must remove kill flags from uses of reloaded registers because the @@ -576,6 +582,9 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, // This would cause a second reload of %x into a different register. DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); + MO.setIsDead(false); } assert(LR.PhysReg && "Register not assigned"); LR.LastUse = MI; @@ -607,6 +616,91 @@ bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { return MO.isDead(); } +// Handle special instruction operand like early clobbers and tied ops when +// there are additional physreg defines. +void RAFast::handleThroughOperands(MachineInstr *MI, + SmallVectorImpl<unsigned> &VirtDead) { + DEBUG(dbgs() << "Scanning for through registers:"); + SmallSet<unsigned, 8> ThroughRegs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) || + (MO.getSubReg() && MI->readsVirtualRegister(Reg))) { + if (ThroughRegs.insert(Reg)) + DEBUG(dbgs() << " %reg" << Reg); + } + } + + // If any physreg defines collide with preallocated through registers, + // we must spill and reallocate. + DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + UsedInInstr.set(Reg); + if (ThroughRegs.count(PhysRegState[Reg])) + definePhysReg(MI, Reg, regFree); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + UsedInInstr.set(*AS); + if (ThroughRegs.count(PhysRegState[*AS])) + definePhysReg(MI, *AS, regFree); + } + } + + SmallVector<unsigned, 8> PartialDefs; + DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (MO.isUse()) { + unsigned DefIdx = 0; + if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue; + DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand " + << DefIdx << ".\n"); + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; + setPhysReg(MI, i, PhysReg); + // Note: we don't update the def operand yet. That would cause the normal + // def-scan to attempt spilling. + } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) { + DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); + // Reload the register, but don't assign to the operand just yet. + // That would confuse the later phys-def processing pass. + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + PartialDefs.push_back(LRI->second.PhysReg); + } else if (MO.isEarlyClobber()) { + // Note: defineVirtReg may invalidate MO. + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; + if (setPhysReg(MI, i, PhysReg)) + VirtDead.push_back(Reg); + } + } + + // Restore UsedInInstr to a state usable for allocating normal virtual uses. + UsedInInstr.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + UsedInInstr.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + UsedInInstr.set(*AS); + } + + // Also mark PartialDefs as used to avoid reallocation. + for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) + UsedInInstr.set(PartialDefs[i]); +} + void RAFast::AllocateBasicBlock() { DEBUG(dbgs() << "\nAllocating " << *MBB); @@ -620,7 +714,7 @@ void RAFast::AllocateBasicBlock() { E = MBB->livein_end(); I != E; ++I) definePhysReg(MII, *I, regReserved); - SmallVector<unsigned, 8> PhysECs, VirtDead; + SmallVector<unsigned, 8> VirtDead; SmallVector<MachineInstr*, 32> Coalesced; // Otherwise, sequentially allocate each instruction in the MBB. @@ -670,8 +764,25 @@ void RAFast::AllocateBasicBlock() { LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); if (LRI != LiveVirtRegs.end()) setPhysReg(MI, i, LRI->second.PhysReg); - else - MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + else { + int SS = StackSlotForVirtReg[Reg]; + if (SS == -1) + MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + else { + // Modify DBG_VALUE now that the value is in a spill slot. + uint64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = + MI->getOperand(MI->getNumOperands()-1).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = + TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + } else + MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + } + } } // Next instruction. continue; @@ -679,17 +790,25 @@ void RAFast::AllocateBasicBlock() { // If this is a copy, we may be able to coalesce. unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub; - if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub)) + if (MI->isCopy()) { + CopyDst = MI->getOperand(0).getReg(); + CopySrc = MI->getOperand(1).getReg(); + CopyDstSub = MI->getOperand(0).getSubReg(); + CopySrcSub = MI->getOperand(1).getSubReg(); + } else if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub)) CopySrc = CopyDst = 0; // Track registers used by instruction. UsedInInstr.reset(); - PhysECs.clear(); // First scan. // Mark physreg uses and early clobbers as used. // Find the end of the virtreg operands unsigned VirtOpEnd = 0; + bool hasTiedOps = false; + bool hasEarlyClobbers = false; + bool hasPartialRedefs = false; + bool hasPhysDefs = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -697,20 +816,44 @@ void RAFast::AllocateBasicBlock() { if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { VirtOpEnd = i+1; + if (MO.isUse()) { + hasTiedOps = hasTiedOps || + TID.getOperandConstraint(i, TOI::TIED_TO) != -1; + } else { + if (MO.isEarlyClobber()) + hasEarlyClobbers = true; + if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) + hasPartialRedefs = true; + } continue; } if (!Allocatable.test(Reg)) continue; if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { - definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); - PhysECs.push_back(Reg); - } + definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? + regFree : regReserved); + hasEarlyClobbers = true; + } else + hasPhysDefs = true; + } + + // The instruction may have virtual register operands that must be allocated + // the same register at use-time and def-time: early clobbers and tied + // operands. If there are also physical defs, these registers must avoid + // both physical defs and uses, making them more constrained than normal + // operands. + // We didn't detect inline asm tied operands above, so just make this extra + // pass for all inline asm. + if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || + (hasTiedOps && hasPhysDefs)) { + handleThroughOperands(MI, VirtDead); + // Don't attempt coalescing when we have funny stuff going on. + CopyDst = 0; } // Second scan. - // Allocate virtreg uses and early clobbers. - // Collect VirtKills + // Allocate virtreg uses. for (unsigned i = 0; i != VirtOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -722,12 +865,6 @@ void RAFast::AllocateBasicBlock() { CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; if (setPhysReg(MI, i, PhysReg)) killVirtReg(LRI); - } else if (MO.isEarlyClobber()) { - // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); - unsigned PhysReg = LRI->second.PhysReg; - setPhysReg(MI, i, PhysReg); - PhysECs.push_back(PhysReg); } } @@ -735,12 +872,16 @@ void RAFast::AllocateBasicBlock() { // Track registers defined by instruction - early clobbers at this point. UsedInInstr.reset(); - for (unsigned i = 0, e = PhysECs.size(); i != e; ++i) { - unsigned PhysReg = PhysECs[i]; - UsedInInstr.set(PhysReg); - for (const unsigned *AS = TRI->getAliasSet(PhysReg); - unsigned Alias = *AS; ++AS) - UsedInInstr.set(Alias); + if (hasEarlyClobbers) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + UsedInInstr.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + UsedInInstr.set(*AS); + } } unsigned DefOpEnd = MI->getNumOperands(); @@ -752,13 +893,18 @@ void RAFast::AllocateBasicBlock() { DefOpEnd = VirtOpEnd; DEBUG(dbgs() << " Spilling remaining registers before call.\n"); spillAll(MI); + + // The imp-defs are skipped below, but we still need to mark those + // registers as used by the function. + SkippedInstrs.insert(&TID); } // Third scan. // Allocate defs and collect dead defs. for (unsigned i = 0; i != DefOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue; + if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) + continue; unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -837,6 +983,14 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { // Make sure the set of used physregs is closed under subreg operations. MRI->closePhysRegsUsed(*TRI); + // Add the clobber lists for all the instructions we skipped earlier. + for (SmallPtrSet<const TargetInstrDesc*, 4>::const_iterator + I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) + if (const unsigned *Defs = (*I)->getImplicitDefs()) + while (*Defs) + MRI->setPhysRegUsed(*Defs++); + + SkippedInstrs.clear(); StackSlotForVirtReg.clear(); return true; } diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index bc331f0..044672d 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -83,7 +83,8 @@ namespace { // pressure, it can caused fewer GPRs to be held in the queue. static cl::opt<unsigned> NumRecentlyUsedRegs("linearscan-skip-count", - cl::desc("Number of registers for linearscan to remember to skip."), + cl::desc("Number of registers for linearscan to remember" + "to skip."), cl::init(0), cl::Hidden); @@ -421,9 +422,10 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (vni->def != SlotIndex() && vni->isDefAccurate() && (CopyMI = li_->getInstructionFromIndex(vni->def)) && - tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + (CopyMI->isCopy() || + tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))) // Defined by a copy, try to extend SrcReg forward - CandReg = SrcReg; + CandReg = CopyMI->isCopy() ? CopyMI->getOperand(1).getReg() : SrcReg; else if (TrivCoalesceEnds && (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) && @@ -992,6 +994,24 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) mri_->setRegAllocationHint(cur->reg, 0, Reg); } + } else if (CopyMI && CopyMI->isCopy()) { + DstReg = CopyMI->getOperand(0).getReg(); + DstSubReg = CopyMI->getOperand(0).getSubReg(); + SrcReg = CopyMI->getOperand(1).getReg(); + SrcSubReg = CopyMI->getOperand(1).getSubReg(); + unsigned Reg = 0; + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + Reg = SrcReg; + else if (vrm_->isAssignedReg(SrcReg)) + Reg = vrm_->getPhys(SrcReg); + if (Reg) { + if (SrcSubReg) + Reg = tri_->getSubReg(Reg, SrcSubReg); + if (DstSubReg) + Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); + if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) + mri_->setRegAllocationHint(cur->reg, 0, Reg); + } } } } @@ -1206,8 +1226,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); SmallVector<LiveInterval*, 8> spillIs; std::vector<LiveInterval*> added; - - added = spiller_->spill(cur, spillIs); + spiller_->spill(cur, added, spillIs); std::sort(added.begin(), added.end(), LISorter()); addStackInterval(cur, ls_, li_, mri_, *vrm_); @@ -1285,10 +1304,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { if (sli->beginIndex() < earliestStart) earliestStart = sli->beginIndex(); - std::vector<LiveInterval*> newIs; - newIs = spiller_->spill(sli, spillIs, &earliestStart); + spiller_->spill(sli, added, spillIs, &earliestStart); addStackInterval(sli, ls_, li_, mri_, *vrm_); - std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); spilled.insert(sli->reg); } diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp deleted file mode 100644 index 321ae12..0000000 --- a/lib/CodeGen/RegAllocLocal.cpp +++ /dev/null @@ -1,1254 +0,0 @@ -//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This register allocator allocates registers to a basic block at a time, -// attempting to keep values in registers and reusing registers as appropriate. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "regalloc" -#include "llvm/BasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include <algorithm> -using namespace llvm; - -STATISTIC(NumStores, "Number of stores added"); -STATISTIC(NumLoads , "Number of loads added"); -STATISTIC(NumCopies, "Number of copies coalesced"); - -static RegisterRegAlloc - localRegAlloc("local", "local register allocator", - createLocalRegisterAllocator); - -namespace { - class RALocal : public MachineFunctionPass { - public: - static char ID; - RALocal() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {} - private: - const TargetMachine *TM; - MachineFunction *MF; - MachineRegisterInfo *MRI; - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - - // StackSlotForVirtReg - Maps virtual regs to the frame index where these - // values are spilled. - IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; - - // Virt2PhysRegMap - This map contains entries for each virtual register - // that is currently available in a physical register. - IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap; - - unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { - return Virt2PhysRegMap[VirtReg]; - } - - // PhysRegsUsed - This array is effectively a map, containing entries for - // each physical register that currently has a value (ie, it is in - // Virt2PhysRegMap). The value mapped to is the virtual register - // corresponding to the physical register (the inverse of the - // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned - // because it is used by a future instruction, and to -2 if it is not - // allocatable. If the entry for a physical register is -1, then the - // physical register is "not in the map". - // - std::vector<int> PhysRegsUsed; - - // PhysRegsUseOrder - This contains a list of the physical registers that - // currently have a virtual register value in them. This list provides an - // ordering of registers, imposing a reallocation order. This list is only - // used if all registers are allocated and we have to spill one, in which - // case we spill the least recently used register. Entries at the front of - // the list are the least recently used registers, entries at the back are - // the most recently used. - // - std::vector<unsigned> PhysRegsUseOrder; - - // Virt2LastUseMap - This maps each virtual register to its last use - // (MachineInstr*, operand index pair). - IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor> - Virt2LastUseMap; - - std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - return Virt2LastUseMap[Reg]; - } - - // VirtRegModified - This bitset contains information about which virtual - // registers need to be spilled back to memory when their registers are - // scavenged. If a virtual register has simply been rematerialized, there - // is no reason to spill it to memory when we need the register back. - // - BitVector VirtRegModified; - - // UsedInMultipleBlocks - Tracks whether a particular register is used in - // more than one block. - BitVector UsedInMultipleBlocks; - - void markVirtRegModified(unsigned Reg, bool Val = true) { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - Reg -= TargetRegisterInfo::FirstVirtualRegister; - if (Val) - VirtRegModified.set(Reg); - else - VirtRegModified.reset(Reg); - } - - bool isVirtRegModified(unsigned Reg) const { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - assert(Reg - TargetRegisterInfo::FirstVirtualRegister < - VirtRegModified.size() && "Illegal virtual register!"); - return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; - } - - void AddToPhysRegsUseOrder(unsigned Reg) { - std::vector<unsigned>::iterator It = - std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg); - if (It != PhysRegsUseOrder.end()) - PhysRegsUseOrder.erase(It); - PhysRegsUseOrder.push_back(Reg); - } - - void MarkPhysRegRecentlyUsed(unsigned Reg) { - if (PhysRegsUseOrder.empty() || - PhysRegsUseOrder.back() == Reg) return; // Already most recently used - - for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) { - unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle - if (!areRegsEqual(Reg, RegMatch)) continue; - - PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1); - // Add it to the end of the list - PhysRegsUseOrder.push_back(RegMatch); - if (RegMatch == Reg) - return; // Found an exact match, exit early - } - } - - public: - virtual const char *getPassName() const { - return "Local Register Allocator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequiredID(PHIEliminationID); - AU.addRequiredID(TwoAddressInstructionPassID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - private: - /// runOnMachineFunction - Register allocate the whole function - bool runOnMachineFunction(MachineFunction &Fn); - - /// AllocateBasicBlock - Register allocate the specified basic block. - void AllocateBasicBlock(MachineBasicBlock &MBB); - - - /// areRegsEqual - This method returns true if the specified registers are - /// related to each other. To do this, it checks to see if they are equal - /// or if the first register is in the alias set of the second register. - /// - bool areRegsEqual(unsigned R1, unsigned R2) const { - if (R1 == R2) return true; - for (const unsigned *AliasSet = TRI->getAliasSet(R2); - *AliasSet; ++AliasSet) { - if (*AliasSet == R1) return true; - } - return false; - } - - /// getStackSpaceFor - This returns the frame index of the specified virtual - /// register on the stack, allocating space if necessary. - int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); - - /// removePhysReg - This method marks the specified physical register as no - /// longer being in use. - /// - void removePhysReg(unsigned PhysReg); - - void storeVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned VirtReg, unsigned PhysReg, bool isKill); - - /// spillVirtReg - This method spills the value specified by PhysReg into - /// the virtual register slot specified by VirtReg. It then updates the RA - /// data structures to indicate the fact that PhysReg is now available. - /// - void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned VirtReg, unsigned PhysReg); - - /// spillPhysReg - This method spills the specified physical register into - /// the virtual register slot associated with it. If OnlyVirtRegs is set to - /// true, then the request is ignored if the physical register does not - /// contain a virtual register. - /// - void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned PhysReg, bool OnlyVirtRegs = false); - - /// assignVirtToPhysReg - This method updates local state so that we know - /// that PhysReg is the proper container for VirtReg now. The physical - /// register must not be used for anything else when this is called. - /// - void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); - - /// isPhysRegAvailable - Return true if the specified physical register is - /// free and available for use. This also includes checking to see if - /// aliased registers are all free... - /// - bool isPhysRegAvailable(unsigned PhysReg) const; - - /// getFreeReg - Look to see if there is a free register available in the - /// specified register class. If not, return 0. - /// - unsigned getFreeReg(const TargetRegisterClass *RC); - - /// getReg - Find a physical register to hold the specified virtual - /// register. If all compatible physical registers are used, this method - /// spills the last used virtual register to the stack, and uses that - /// register. If NoFree is true, that means the caller knows there isn't - /// a free register, do not call getFreeReg(). - unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg, bool NoFree = false); - - /// reloadVirtReg - This method transforms the specified virtual - /// register use to refer to a physical register. This method may do this - /// in one of several ways: if the register is available in a physical - /// register already, it uses that physical register. If the value is not - /// in a physical register, and if there are physical registers available, - /// it loads it into a register: PhysReg if that is an available physical - /// register, otherwise any physical register of the right class. - /// If register pressure is high, and it is possible, it tries to fold the - /// load of the virtual register into the instruction itself. It avoids - /// doing this if register pressure is low to improve the chance that - /// subsequent instructions can use the reloaded value. This method - /// returns the modified instruction. - /// - MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, SmallSet<unsigned, 4> &RRegs, - unsigned PhysReg); - - /// ComputeLocalLiveness - Computes liveness of registers within a basic - /// block, setting the killed/dead flags as appropriate. - void ComputeLocalLiveness(MachineBasicBlock& MBB); - - void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, - unsigned PhysReg); - }; - char RALocal::ID = 0; -} - -/// getStackSpaceFor - This allocates space for the specified virtual register -/// to be held on the stack. -int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { - // Find the location Reg would belong... - int SS = StackSlotForVirtReg[VirtReg]; - if (SS != -1) - return SS; // Already has space allocated? - - // Allocate a new stack object for this spill location... - int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); - - // Assign the slot. - StackSlotForVirtReg[VirtReg] = FrameIdx; - return FrameIdx; -} - - -/// removePhysReg - This method marks the specified physical register as no -/// longer being in use. -/// -void RALocal::removePhysReg(unsigned PhysReg) { - PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used - - std::vector<unsigned>::iterator It = - std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg); - if (It != PhysRegsUseOrder.end()) - PhysRegsUseOrder.erase(It); -} - -/// storeVirtReg - Store a virtual register to its assigned stack slot. -void RALocal::storeVirtReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned VirtReg, unsigned PhysReg, - bool isKill) { - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << " to stack slot #" << FrameIndex); - TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC, TRI); - ++NumStores; // Update statistics - - // Mark the spill instruction as last use if we're not killing the register. - if (!isKill) { - MachineInstr *Spill = llvm::prior(I); - int OpNum = Spill->findRegisterUseOperandIdx(PhysReg); - if (OpNum < 0) - getVirtRegLastUse(VirtReg) = std::make_pair((MachineInstr*)0, 0); - else - getVirtRegLastUse(VirtReg) = std::make_pair(Spill, OpNum); - } -} - -/// spillVirtReg - This method spills the value specified by PhysReg into the -/// virtual register slot specified by VirtReg. It then updates the RA data -/// structures to indicate the fact that PhysReg is now available. -/// -void RALocal::spillVirtReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned VirtReg, unsigned PhysReg) { - assert(VirtReg && "Spilling a physical register is illegal!" - " Must not have appropriate kill for the register or use exists beyond" - " the intended one."); - DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg) - << " containing %reg" << VirtReg); - - if (!isVirtRegModified(VirtReg)) { - DEBUG(dbgs() << " which has not been modified, so no store necessary!"); - std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg); - if (LastUse.first) - LastUse.first->getOperand(LastUse.second).setIsKill(); - } else { - // Otherwise, there is a virtual register corresponding to this physical - // register. We only need to spill it into its stack slot if it has been - // modified. - // If the instruction reads the register that's spilled, (e.g. this can - // happen if it is a move to a physical register), then the spill - // instruction is not a kill. - bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg)); - storeVirtReg(MBB, I, VirtReg, PhysReg, isKill); - } - - getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available - - DEBUG(dbgs() << '\n'); - removePhysReg(PhysReg); -} - - -/// spillPhysReg - This method spills the specified physical register into the -/// virtual register slot associated with it. If OnlyVirtRegs is set to true, -/// then the request is ignored if the physical register does not contain a -/// virtual register. -/// -void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned PhysReg, bool OnlyVirtRegs) { - if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! - assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); - if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) - spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); - return; - } - - // If the selected register aliases any other registers, we must make - // sure that one of the aliases isn't alive. - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) { - if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register. - PhysRegsUsed[*AliasSet] == -2) // If allocatable. - continue; - - if (PhysRegsUsed[*AliasSet]) - spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); - } -} - - -/// assignVirtToPhysReg - This method updates local state so that we know -/// that PhysReg is the proper container for VirtReg now. The physical -/// register must not be used for anything else when this is called. -/// -void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { - assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); - // Update information to note the fact that this register was just used, and - // it holds VirtReg. - PhysRegsUsed[PhysReg] = VirtReg; - getVirt2PhysRegMapSlot(VirtReg) = PhysReg; - AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg -} - - -/// isPhysRegAvailable - Return true if the specified physical register is free -/// and available for use. This also includes checking to see if aliased -/// registers are all free... -/// -bool RALocal::isPhysRegAvailable(unsigned PhysReg) const { - if (PhysRegsUsed[PhysReg] != -1) return false; - - // If the selected register aliases any other allocated registers, it is - // not free! - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) - if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use? - return false; // Can't use this reg then. - return true; -} - - -/// getFreeReg - Look to see if there is a free register available in the -/// specified register class. If not, return 0. -/// -unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) { - // Get iterators defining the range of registers that are valid to allocate in - // this class, which also specifies the preferred allocation order. - TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); - TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); - - for (; RI != RE; ++RI) - if (isPhysRegAvailable(*RI)) { // Is reg unused? - assert(*RI != 0 && "Cannot use register!"); - return *RI; // Found an unused register! - } - return 0; -} - - -/// getReg - Find a physical register to hold the specified virtual -/// register. If all compatible physical registers are used, this method spills -/// the last used virtual register to the stack, and uses that register. -/// -unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned VirtReg, bool NoFree) { - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - - // First check to see if we have a free register of the requested type... - unsigned PhysReg = NoFree ? 0 : getFreeReg(RC); - - if (PhysReg != 0) { - // Assign the register. - assignVirtToPhysReg(VirtReg, PhysReg); - return PhysReg; - } - - // If we didn't find an unused register, scavenge one now! - assert(!PhysRegsUseOrder.empty() && "No allocated registers??"); - - // Loop over all of the preallocated registers from the least recently used - // to the most recently used. When we find one that is capable of holding - // our register, use it. - for (unsigned i = 0; PhysReg == 0; ++i) { - assert(i != PhysRegsUseOrder.size() && - "Couldn't find a register of the appropriate class!"); - - unsigned R = PhysRegsUseOrder[i]; - - // We can only use this register if it holds a virtual register (ie, it - // can be spilled). Do not use it if it is an explicitly allocated - // physical register! - assert(PhysRegsUsed[R] != -1 && - "PhysReg in PhysRegsUseOrder, but is not allocated?"); - if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) { - // If the current register is compatible, use it. - if (RC->contains(R)) { - PhysReg = R; - break; - } - - // If one of the registers aliased to the current register is - // compatible, use it. - for (const unsigned *AliasIt = TRI->getAliasSet(R); - *AliasIt; ++AliasIt) { - if (!RC->contains(*AliasIt)) continue; - - // If this is pinned down for some reason, don't use it. For - // example, if CL is pinned, and we run across CH, don't use - // CH as justification for using scavenging ECX (which will - // fail). - if (PhysRegsUsed[*AliasIt] == 0) continue; - - // Make sure the register is allocatable. Don't allocate SIL on - // x86-32. - if (PhysRegsUsed[*AliasIt] == -2) continue; - - PhysReg = *AliasIt; // Take an aliased register - break; - } - } - } - - assert(PhysReg && "Physical register not assigned!?!?"); - - // At this point PhysRegsUseOrder[i] is the least recently used register of - // compatible register class. Spill it to memory and reap its remains. - spillPhysReg(MBB, I, PhysReg); - - // Now that we know which register we need to assign this to, do it now! - assignVirtToPhysReg(VirtReg, PhysReg); - return PhysReg; -} - - -/// reloadVirtReg - This method transforms the specified virtual -/// register use to refer to a physical register. This method may do this in -/// one of several ways: if the register is available in a physical register -/// already, it uses that physical register. If the value is not in a physical -/// register, and if there are physical registers available, it loads it into a -/// register: PhysReg if that is an available physical register, otherwise any -/// register. If register pressure is high, and it is possible, it tries to -/// fold the load of the virtual register into the instruction itself. It -/// avoids doing this if register pressure is low to improve the chance that -/// subsequent instructions can use the reloaded value. This method returns -/// the modified instruction. -/// -MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, - SmallSet<unsigned, 4> &ReloadedRegs, - unsigned PhysReg) { - unsigned VirtReg = MI->getOperand(OpNum).getReg(); - unsigned SubIdx = MI->getOperand(OpNum).getSubReg(); - - // If the virtual register is already available, just update the instruction - // and return. - if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { - if (SubIdx) { - PR = TRI->getSubReg(PR, SubIdx); - MI->getOperand(OpNum).setSubReg(0); - } - MI->getOperand(OpNum).setReg(PR); // Assign the input register - if (!MI->isDebugValue()) { - // Do not do these for DBG_VALUE as they can affect codegen. - MarkPhysRegRecentlyUsed(PR); // Already have this value available! - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); - } - return MI; - } - - // Otherwise, we need to fold it into the current instruction, or reload it. - // If we have registers available to hold the value, use them. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - // If we already have a PhysReg (this happens when the instruction is a - // reg-to-reg copy with a PhysReg destination) use that. - if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) || - !isPhysRegAvailable(PhysReg)) - PhysReg = getFreeReg(RC); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - - if (PhysReg) { // Register is available, allocate it! - assignVirtToPhysReg(VirtReg, PhysReg); - } else { // No registers available. - // Force some poor hapless value out of the register file to - // make room for the new register, and reload it. - PhysReg = getReg(MBB, MI, VirtReg, true); - } - - markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded - - DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " - << TRI->getName(PhysReg) << "\n"); - - // Add move instruction(s) - TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC, TRI); - ++NumLoads; // Update statistics - - MF->getRegInfo().setPhysRegUsed(PhysReg); - // Assign the input register. - if (SubIdx) { - MI->getOperand(OpNum).setSubReg(0); - MI->getOperand(OpNum).setReg(TRI->getSubReg(PhysReg, SubIdx)); - } else - MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); - - if (!ReloadedRegs.insert(PhysReg)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); - } - for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); - *SubRegs; ++SubRegs) { - if (ReloadedRegs.insert(*SubRegs)) continue; - - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); - } - - return MI; -} - -/// isReadModWriteImplicitKill - True if this is an implicit kill for a -/// read/mod/write register, i.e. update partial register. -static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && - MO.isDef() && !MO.isDead()) - return true; - } - return false; -} - -/// isReadModWriteImplicitDef - True if this is an implicit def for a -/// read/mod/write register, i.e. update partial register. -static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && - !MO.isDef() && MO.isKill()) - return true; - } - return false; -} - -// precedes - Helper function to determine with MachineInstr A -// precedes MachineInstr B within the same MBB. -static bool precedes(MachineBasicBlock::iterator A, - MachineBasicBlock::iterator B) { - if (A == B) - return false; - - MachineBasicBlock::iterator I = A->getParent()->begin(); - while (I != A->getParent()->end()) { - if (I == A) - return true; - else if (I == B) - return false; - - ++I; - } - - return false; -} - -/// ComputeLocalLiveness - Computes liveness of registers within a basic -/// block, setting the killed/dead flags as appropriate. -void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { - // Keep track of the most recently seen previous use or def of each reg, - // so that we can update them with dead/kill markers. - DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - if (I->isDebugValue()) - continue; - - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand &MO = I->getOperand(i); - // Uses don't trigger any flags, but we need to save - // them for later. Also, we have to process these - // _before_ processing the defs, since an instr - // uses regs before it defs them. - if (!MO.isReg() || !MO.getReg() || !MO.isUse()) - continue; - - // Ignore helpful kill flags from earlier passes. - MO.setIsKill(false); - - LastUseDef[MO.getReg()] = std::make_pair(I, i); - - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; - - const unsigned *Aliases = TRI->getAliasSet(MO.getReg()); - if (Aliases == 0) - continue; - - while (*Aliases) { - DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator - alias = LastUseDef.find(*Aliases); - - if (alias != LastUseDef.end() && alias->second.first != I) - LastUseDef[*Aliases] = std::make_pair(I, i); - - ++Aliases; - } - } - - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand &MO = I->getOperand(i); - // Defs others than 2-addr redefs _do_ trigger flag changes: - // - A def followed by a def is dead - // - A use followed by a def is a kill - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; - - unsigned SubIdx = MO.getSubReg(); - DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator - last = LastUseDef.find(MO.getReg()); - if (last != LastUseDef.end()) { - // Check if this is a two address instruction. If so, then - // the def does not kill the use. - if (last->second.first == I && I->isRegTiedToUseOperand(i)) - continue; - - MachineOperand &lastUD = - last->second.first->getOperand(last->second.second); - if (SubIdx && lastUD.getSubReg() != SubIdx) - // Partial re-def, the last def is not dead. - // %reg1024:5<def> = - // %reg1024:6<def> = - // or - // %reg1024:5<def> = op %reg1024, 5 - continue; - - if (lastUD.isDef()) - lastUD.setIsDead(true); - else - lastUD.setIsKill(true); - } - - LastUseDef[MO.getReg()] = std::make_pair(I, i); - } - } - - // Live-out (of the function) registers contain return values of the function, - // so we need to make sure they are alive at return time. - MachineBasicBlock::iterator Ret = MBB.getFirstTerminator(); - bool BBEndsInReturn = (Ret != MBB.end() && Ret->getDesc().isReturn()); - - if (BBEndsInReturn) - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) - if (!Ret->readsRegister(*I)) { - Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); - LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1); - } - - // Finally, loop over the final use/def of each reg - // in the block and determine if it is dead. - for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator - I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) { - MachineInstr *MI = I->second.first; - unsigned idx = I->second.second; - MachineOperand &MO = MI->getOperand(idx); - - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg()); - - // A crude approximation of "live-out" calculation - bool usedOutsideBlock = isPhysReg ? false : - UsedInMultipleBlocks.test(MO.getReg() - - TargetRegisterInfo::FirstVirtualRegister); - - // If the machine BB ends in a return instruction, then the value isn't used - // outside of the BB. - if (!isPhysReg && (!usedOutsideBlock || BBEndsInReturn)) { - // DBG_VALUE complicates this: if the only refs of a register outside - // this block are DBG_VALUE, we can't keep the reg live just for that, - // as it will cause the reg to be spilled at the end of this block when - // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that - // happens. - bool UsedByDebugValueOnly = false; - for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()), - UE = MRI->reg_end(); UI != UE; ++UI) { - // Two cases: - // - used in another block - // - used in the same block before it is defined (loop) - if (UI->getParent() == &MBB && - !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) - continue; - - if (UI->isDebugValue()) { - UsedByDebugValueOnly = true; - continue; - } - - // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone. - UsedInMultipleBlocks.set(MO.getReg() - - TargetRegisterInfo::FirstVirtualRegister); - usedOutsideBlock = true; - UsedByDebugValueOnly = false; - break; - } - - if (UsedByDebugValueOnly) - for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()), - UE = MRI->reg_end(); UI != UE; ++UI) - if (UI->isDebugValue() && - (UI->getParent() != &MBB || - (MO.isDef() && precedes(&*UI, MI)))) - UI.getOperand().setReg(0U); - } - - // Physical registers and those that are not live-out of the block are - // killed/dead at their last use/def within this block. - if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) { - if (MO.isUse()) { - // Don't mark uses that are tied to defs as kills. - if (!MI->isRegTiedToDefOperand(idx)) - MO.setIsKill(true); - } else { - MO.setIsDead(true); - } - } - } -} - -void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { - // loop over each instruction - MachineBasicBlock::iterator MII = MBB.begin(); - - DEBUG({ - const BasicBlock *LBB = MBB.getBasicBlock(); - if (LBB) - dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName(); - }); - - // Add live-in registers as active. - for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), - E = MBB.livein_end(); I != E; ++I) { - unsigned Reg = *I; - MF->getRegInfo().setPhysRegUsed(Reg); - PhysRegsUsed[Reg] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - AddToPhysRegsUseOrder(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } - } - - ComputeLocalLiveness(MBB); - - // Otherwise, sequentially allocate each instruction in the MBB. - while (MII != MBB.end()) { - MachineInstr *MI = MII++; - const TargetInstrDesc &TID = MI->getDesc(); - DEBUG({ - dbgs() << "\nStarting RegAlloc of: " << *MI; - dbgs() << " Regs have values: "; - for (unsigned i = 0; i != TRI->getNumRegs(); ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { - if (PhysRegsUsed[i] && isVirtRegModified(PhysRegsUsed[i])) - dbgs() << "*"; - dbgs() << "[" << TRI->getName(i) - << ",%reg" << PhysRegsUsed[i] << "] "; - } - dbgs() << '\n'; - }); - - // Determine whether this is a copy instruction. The cases where the - // source or destination are phys regs are handled specially. - unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg; - unsigned SrcCopyPhysReg = 0U; - bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg) && - SrcCopySubReg == DstCopySubReg; - if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) - SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg); - - // Loop over the implicit uses, making sure that they are at the head of the - // use order list, so they don't get reallocated. - if (TID.ImplicitUses) { - for (const unsigned *ImplicitUses = TID.ImplicitUses; - *ImplicitUses; ++ImplicitUses) - MarkPhysRegRecentlyUsed(*ImplicitUses); - } - - SmallVector<unsigned, 8> Kills; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isKill()) continue; - - if (!MO.isImplicit()) - Kills.push_back(MO.getReg()); - else if (!isReadModWriteImplicitKill(MI, MO.getReg())) - // These are extra physical register kills when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - Kills.push_back(MO.getReg()); - } - - // If any physical regs are earlyclobber, spill any value they might - // have in them, then mark them unallocatable. - // If any virtual regs are earlyclobber, allocate them now (before - // freeing inputs that are killed). - if (MI->isInlineAsm()) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() || - !MO.getReg()) - continue; - - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; - - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) - DestPhysReg = getReg(MBB, MI, DestVirtReg); - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = - std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - if (unsigned DestSubIdx = MO.getSubReg()) { - MO.setSubReg(0); - DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx); - } - MO.setReg(DestPhysReg); // Assign the earlyclobber register - } else { - unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(Reg); - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(*SubRegs); - } - } - } - } - - // If a DBG_VALUE says something is located in a spilled register, - // change the DBG_VALUE to be undef, which prevents the register - // from being reloaded here. Doing that would change the generated - // code, unless another use immediately follows this instruction. - if (MI->isDebugValue() && - MI->getNumOperands()==3 && MI->getOperand(0).isReg()) { - unsigned VirtReg = MI->getOperand(0).getReg(); - if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) && - !getVirt2PhysRegMapSlot(VirtReg)) - MI->getOperand(0).setReg(0U); - } - - // Get the used operands into registers. This has the potential to spill - // incoming values if we are out of registers. Note that we completely - // ignore physical register uses here. We assume that if an explicit - // physical register is referenced by the instruction, that it is guaranteed - // to be live-in, or the input is badly hosed. - // - SmallSet<unsigned, 4> ReloadedRegs; - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand &MO = MI->getOperand(i); - // here we are looking for only used operands (never def&use) - if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) - MI = reloadVirtReg(MBB, MI, i, ReloadedRegs, - isCopy ? DstCopyReg : 0); - } - - // If this instruction is the last user of this register, kill the - // value, freeing the register being used, so it doesn't need to be - // spilled to memory. - // - for (unsigned i = 0, e = Kills.size(); i != e; ++i) { - unsigned VirtReg = Kills[i]; - unsigned PhysReg = VirtReg; - if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { - // If the virtual register was never materialized into a register, it - // might not be in the map, but it won't hurt to zero it out anyway. - unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); - PhysReg = PhysRegSlot; - PhysRegSlot = 0; - } else if (PhysRegsUsed[PhysReg] == -2) { - // Unallocatable register dead, ignore. - continue; - } else { - assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) && - "Silently clearing a virtual register?"); - } - - if (!PhysReg) continue; - - DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg) - << "[%reg" << VirtReg <<"], removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] != -2) { - DEBUG(dbgs() << " Last use of " - << TRI->getName(*SubRegs) << "[%reg" << VirtReg - <<"], removing it from live set\n"); - removePhysReg(*SubRegs); - } - } - } - - // Loop over all of the operands of the instruction, spilling registers that - // are defined, and marking explicit destinations in the PhysRegsUsed map. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() || - MO.isEarlyClobber() || - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; - - unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(Reg); - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(*SubRegs); - } - } - - // Loop over the implicit defs, spilling them as well. - if (TID.ImplicitDefs) { - for (const unsigned *ImplicitDefs = TID.ImplicitDefs; - *ImplicitDefs; ++ImplicitDefs) { - unsigned Reg = *ImplicitDefs; - if (PhysRegsUsed[Reg] != -2) { - spillPhysReg(MBB, MI, Reg, true); - AddToPhysRegsUseOrder(Reg); - PhysRegsUsed[Reg] = 0; // It is free and reserved now - } - MF->getRegInfo().setPhysRegUsed(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - AddToPhysRegsUseOrder(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } - } - } - - SmallVector<unsigned, 8> DeadDefs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDead()) - DeadDefs.push_back(MO.getReg()); - } - - // Okay, we have allocated all of the source operands and spilled any values - // that would be destroyed by defs of this instruction. Loop over the - // explicit defs and assign them to a register, spilling incoming values if - // we need to scavenge a register. - // - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || - MO.isEarlyClobber() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; - - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) { - // If this is a copy try to reuse the input as the output; - // that will make the copy go away. - // If this is a copy, the source reg is a phys reg, and - // that reg is available, use that phys reg for DestPhysReg. - // If this is a copy, the source reg is a virtual reg, and - // the phys reg that was assigned to that virtual reg is now - // available, use that phys reg for DestPhysReg. (If it's now - // available that means this was the last use of the source.) - if (isCopy && - TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) && - isPhysRegAvailable(SrcCopyReg)) { - DestPhysReg = SrcCopyReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else if (isCopy && - TargetRegisterInfo::isVirtualRegister(SrcCopyReg) && - SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) && - MF->getRegInfo().getRegClass(DestVirtReg)-> - contains(SrcCopyPhysReg)) { - DestPhysReg = SrcCopyPhysReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else - DestPhysReg = getReg(MBB, MI, DestVirtReg); - } - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - - if (unsigned DestSubIdx = MO.getSubReg()) { - MO.setSubReg(0); - DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx); - } - MO.setReg(DestPhysReg); // Assign the output register - } - - // If this instruction defines any registers that are immediately dead, - // kill them now. - // - for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { - unsigned VirtReg = DeadDefs[i]; - unsigned PhysReg = VirtReg; - if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { - unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); - PhysReg = PhysRegSlot; - assert(PhysReg != 0); - PhysRegSlot = 0; - } else if (PhysRegsUsed[PhysReg] == -2) { - // Unallocatable register dead, ignore. - continue; - } else if (!PhysReg) - continue; - - DEBUG(dbgs() << " Register " << TRI->getName(PhysReg) - << " [%reg" << VirtReg - << "] is never used, removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) { - if (PhysRegsUsed[*AliasSet] != -2) { - DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet) - << " [%reg" << *AliasSet - << "] is never used, removing it from live set\n"); - removePhysReg(*AliasSet); - } - } - } - - // If this instruction is a call, make sure there are no dirty registers. The - // call might throw an exception, and the landing pad expects to find all - // registers in stack slots. - if (TID.isCall()) - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { - if (PhysRegsUsed[i] <= 0) continue; - unsigned VirtReg = PhysRegsUsed[i]; - if (!isVirtRegModified(VirtReg)) continue; - DEBUG(dbgs() << " Storing dirty %reg" << VirtReg); - storeVirtReg(MBB, MI, VirtReg, i, false); - markVirtRegModified(VirtReg, false); - DEBUG(dbgs() << " because the call might throw\n"); - } - - // Finally, if this is a noop copy instruction, zap it. (Except that if - // the copy is dead, it must be kept to avoid messing up liveness info for - // the register scavenger. See pr4100.) - if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg) && - SrcCopyReg == DstCopyReg && SrcCopySubReg == DstCopySubReg && - DeadDefs.empty()) { - ++NumCopies; - MBB.erase(MI); - } - } - - MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); - - // Spill all physical registers holding virtual registers now. - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { - if (unsigned VirtReg = PhysRegsUsed[i]) - spillVirtReg(MBB, MI, VirtReg, i); - else - removePhysReg(i); - } - -#if 0 - // This checking code is very expensive. - bool AllOk = true; - for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, - e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) - if (unsigned PR = Virt2PhysRegMap[i]) { - cerr << "Register still mapped: " << i << " -> " << PR << "\n"; - AllOk = false; - } - assert(AllOk && "Virtual registers still in phys regs?"); -#endif - - // Clear any physical register which appear live at the end of the basic - // block, but which do not hold any virtual registers. e.g., the stack - // pointer. - PhysRegsUseOrder.clear(); -} - -/// runOnMachineFunction - Register allocate the whole function -/// -bool RALocal::runOnMachineFunction(MachineFunction &Fn) { - DEBUG(dbgs() << "Machine Function\n"); - MF = &Fn; - MRI = &Fn.getRegInfo(); - TM = &Fn.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); - - PhysRegsUsed.assign(TRI->getNumRegs(), -1); - - // At various places we want to efficiently check to see whether a register - // is allocatable. To handle this, we mark all unallocatable registers as - // being pinned down, permanently. - { - BitVector Allocable = TRI->getAllocatableSet(Fn); - for (unsigned i = 0, e = Allocable.size(); i != e; ++i) - if (!Allocable[i]) - PhysRegsUsed[i] = -2; // Mark the reg unallocable. - } - - // initialize the virtual->physical register map to have a 'null' - // mapping for all virtual registers - unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); - StackSlotForVirtReg.grow(LastVirtReg); - Virt2PhysRegMap.grow(LastVirtReg); - Virt2LastUseMap.grow(LastVirtReg); - VirtRegModified.resize(LastVirtReg+1 - - TargetRegisterInfo::FirstVirtualRegister); - UsedInMultipleBlocks.resize(LastVirtReg+1 - - TargetRegisterInfo::FirstVirtualRegister); - - // Loop over all of the basic blocks, eliminating virtual register references - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) - AllocateBasicBlock(*MBB); - - StackSlotForVirtReg.clear(); - PhysRegsUsed.clear(); - VirtRegModified.clear(); - UsedInMultipleBlocks.clear(); - Virt2PhysRegMap.clear(); - Virt2LastUseMap.clear(); - return true; -} - -FunctionPass *llvm::createLocalRegisterAllocator() { - return new RALocal(); -} diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 4fafd28..7e61a12 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -396,28 +396,23 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { if (srcRegIsPhysical && dstRegIsPhysical) continue; - // If it's a copy that includes a virtual register but the source and - // destination classes differ then we can't coalesce, so continue with - // the next instruction. - const TargetRegisterClass *srcRegClass = srcRegIsPhysical ? - tri->getPhysicalRegisterRegClass(srcReg) : mri->getRegClass(srcReg); - - const TargetRegisterClass *dstRegClass = dstRegIsPhysical ? - tri->getPhysicalRegisterRegClass(dstReg) : mri->getRegClass(dstReg); - - if (srcRegClass != dstRegClass) + // If it's a copy that includes two virtual register but the source and + // destination classes differ then we can't coalesce. + if (!srcRegIsPhysical && !dstRegIsPhysical && + mri->getRegClass(srcReg) != mri->getRegClass(dstReg)) continue; - // We also need any physical regs to be allocable, coalescing with - // a non-allocable register is invalid. - if (srcRegIsPhysical) { + // If one is physical and one is virtual, check that the physical is + // allocatable in the class of the virtual. + if (srcRegIsPhysical && !dstRegIsPhysical) { + const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg); if (std::find(dstRegClass->allocation_order_begin(*mf), dstRegClass->allocation_order_end(*mf), srcReg) == dstRegClass->allocation_order_end(*mf)) continue; } - - if (dstRegIsPhysical) { + if (!srcRegIsPhysical && dstRegIsPhysical) { + const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg); if (std::find(srcRegClass->allocation_order_begin(*mf), srcRegClass->allocation_order_end(*mf), dstReg) == srcRegClass->allocation_order_end(*mf)) diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 1131e3d..ab0bc2d 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -16,6 +16,8 @@ #include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Pass.h" @@ -33,6 +35,160 @@ char RegisterCoalescer::ID = 0; // RegisterCoalescer::~RegisterCoalescer() {} +unsigned CoalescerPair::compose(unsigned a, unsigned b) const { + if (!a) return b; + if (!b) return a; + return tri_.composeSubRegIndices(a, b); +} + +bool CoalescerPair::isMoveInstr(const MachineInstr *MI, + unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) const { + if (MI->isCopy()) { + Dst = MI->getOperand(0).getReg(); + DstSub = MI->getOperand(0).getSubReg(); + Src = MI->getOperand(1).getReg(); + SrcSub = MI->getOperand(1).getSubReg(); + } else if (MI->isSubregToReg()) { + Dst = MI->getOperand(0).getReg(); + DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm()); + Src = MI->getOperand(2).getReg(); + SrcSub = MI->getOperand(2).getSubReg(); + } else if (!tii_.isMoveInstr(*MI, Src, Dst, SrcSub, DstSub)) { + return false; + } + return true; +} + +bool CoalescerPair::setRegisters(const MachineInstr *MI) { + srcReg_ = dstReg_ = subIdx_ = 0; + newRC_ = 0; + flipped_ = crossClass_ = false; + + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + return false; + partial_ = SrcSub || DstSub; + + // If one register is a physreg, it must be Dst. + if (TargetRegisterInfo::isPhysicalRegister(Src)) { + if (TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + flipped_ = true; + } + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + + if (TargetRegisterInfo::isPhysicalRegister(Dst)) { + // Eliminate DstSub on a physreg. + if (DstSub) { + Dst = tri_.getSubReg(Dst, DstSub); + if (!Dst) return false; + DstSub = 0; + } + + // Eliminate SrcSub by picking a corresponding Dst superregister. + if (SrcSub) { + Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); + if (!Dst) return false; + SrcSub = 0; + } else if (!MRI.getRegClass(Src)->contains(Dst)) { + return false; + } + } else { + // Both registers are virtual. + + // Both registers have subreg indices. + if (SrcSub && DstSub) { + // For now we only handle the case of identical indices in commensurate + // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg + // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg. + if (SrcSub != DstSub) + return false; + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + if (!getCommonSubClass(DstRC, SrcRC)) + return false; + SrcSub = DstSub = 0; + } + + // There can be no SrcSub. + if (SrcSub) { + std::swap(Src, Dst); + DstSub = SrcSub; + SrcSub = 0; + assert(!flipped_ && "Unexpected flip"); + flipped_ = true; + } + + // Find the new register class. + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + if (DstSub) + newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); + else + newRC_ = getCommonSubClass(DstRC, SrcRC); + if (!newRC_) + return false; + crossClass_ = newRC_ != DstRC || newRC_ != SrcRC; + } + // Check our invariants + assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); + assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && + "Cannot have a physical SubIdx"); + srcReg_ = Src; + dstReg_ = Dst; + subIdx_ = DstSub; + return true; +} + +bool CoalescerPair::flip() { + if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_)) + return false; + std::swap(srcReg_, dstReg_); + flipped_ = !flipped_; + return true; +} + +bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { + if (!MI) + return false; + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + return false; + + // Find the virtual register that is srcReg_. + if (Dst == srcReg_) { + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + } else if (Src != srcReg_) { + return false; + } + + // Now check that Dst matches dstReg_. + if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) { + if (!TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + assert(!subIdx_ && "Inconsistent CoalescerPair state."); + // DstSub could be set for a physreg from INSERT_SUBREG. + if (DstSub) + Dst = tri_.getSubReg(Dst, DstSub); + // Full copy of Src. + if (!SrcSub) + return dstReg_ == Dst; + // This is a partial register copy. Check that the parts match. + return tri_.getSubReg(dstReg_, SrcSub) == Dst; + } else { + // dstReg_ is virtual. + if (dstReg_ != Dst) + return false; + // Registers match, do the subregisters line up? + return compose(subIdx_, SrcSub) == DstSub; + } +} + // Because of the way .a files work, we must force the SimpleRC // implementation to be pulled in if the RegisterCoalescer classes are // pulled in. Otherwise we run the risk of RegisterCoalescer being diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 690e59f..43b3fb6 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -141,6 +141,10 @@ void RegScavenger::forward() { // Find out which registers are early clobbered, killed, defined, and marked // def-dead in this instruction. + // FIXME: The scavenger is not predication aware. If the instruction is + // predicated, conservatively assume "kill" markers do not actually kill the + // register. Similarly ignores "dead" markers. + bool isPred = TII->isPredicated(MI); BitVector EarlyClobberRegs(NumPhysRegs); BitVector KillRegs(NumPhysRegs); BitVector DefRegs(NumPhysRegs); @@ -155,11 +159,11 @@ void RegScavenger::forward() { if (MO.isUse()) { // Two-address operands implicitly kill. - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) + if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i))) addRegWithSubRegs(KillRegs, Reg); } else { assert(MO.isDef()); - if (MO.isDead()) + if (!isPred && MO.isDead()) addRegWithSubRegs(DeadRegs, Reg); else addRegWithSubRegs(DefRegs, Reg); @@ -238,8 +242,18 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { return 0; } +/// getRegsAvailable - Return all available registers in the register class +/// in Mask. +void RegScavenger::getRegsAvailable(const TargetRegisterClass *RC, + BitVector &Mask) { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (!isAliasUsed(*I)) + Mask.set(*I); +} + /// findSurvivorReg - Return the candidate register that is unused for the -/// longest after MBBI. UseMI is set to the instruction where the search +/// longest after StargMII. UseMI is set to the instruction where the search /// stopped. /// /// No more than InstrLimit instructions are inspected. @@ -258,6 +272,10 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, bool inVirtLiveRange = false; for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { + if (MI->isDebugValue()) { + ++InstrLimit; // Don't count debug instructions + continue; + } bool isVirtKillInsn = false; bool isVirtDefInsn = false; // Remove any candidates touched by instruction. @@ -321,13 +339,16 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, Candidates.reset(MO.getReg()); } + // Try to find a register that's unused if there is one, as then we won't + // have to spill. + if ((Candidates & RegsAvailable).any()) + Candidates &= RegsAvailable; + // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); - // If we found an unused register there is no reason to spill it. We have - // probably found a callee-saved register that has been saved in the - // prologue, but happens to be unused at this point. + // If we found an unused register there is no reason to spill it. if (!isAliasUsed(SReg)) return SReg; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index da20c12..7d39dc4 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -380,26 +380,26 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { } #endif -/// InitDAGTopologicalSorting - create the initial topological +/// InitDAGTopologicalSorting - create the initial topological /// ordering from the DAG to be scheduled. /// -/// The idea of the algorithm is taken from +/// The idea of the algorithm is taken from /// "Online algorithms for managing the topological order of /// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly -/// This is the MNR algorithm, which was first introduced by -/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in +/// This is the MNR algorithm, which was first introduced by +/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in /// "Maintaining a topological order under edge insertions". /// -/// Short description of the algorithm: +/// Short description of the algorithm: /// /// Topological ordering, ord, of a DAG maps each node to a topological /// index so that for all edges X->Y it is the case that ord(X) < ord(Y). /// -/// This means that if there is a path from the node X to the node Z, +/// This means that if there is a path from the node X to the node Z, /// then ord(X) < ord(Z). /// /// This property can be used to check for reachability of nodes: -/// if Z is reachable from X, then an insertion of the edge Z->X would +/// if Z is reachable from X, then an insertion of the edge Z->X would /// create a cycle. /// /// The algorithm first computes a topological ordering for the DAG by @@ -431,7 +431,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { // Collect leaf nodes. WorkList.push_back(SU); } - } + } int Id = DAGSize; while (!WorkList.empty()) { @@ -456,7 +456,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { SUnit *SU = &SUnits[i]; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && + assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && "Wrong topological sorting"); } } @@ -494,7 +494,7 @@ void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, bool& HasLoop) { std::vector<const SUnit*> WorkList; - WorkList.reserve(SUnits.size()); + WorkList.reserve(SUnits.size()); WorkList.push_back(SU); do { @@ -504,20 +504,20 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, for (int I = SU->Succs.size()-1; I >= 0; --I) { int s = SU->Succs[I].getSUnit()->NodeNum; if (Node2Index[s] == UpperBound) { - HasLoop = true; + HasLoop = true; return; } // Visit successors if not already and in affected region. if (!Visited.test(s) && Node2Index[s] < UpperBound) { WorkList.push_back(SU->Succs[I].getSUnit()); - } - } + } + } } while (!WorkList.empty()); } -/// Shift - Renumber the nodes so that the topological ordering is +/// Shift - Renumber the nodes so that the topological ordering is /// preserved. -void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, +void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, int UpperBound) { std::vector<int> L; int shift = 0; @@ -568,7 +568,7 @@ bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, // Is Ord(TargetSU) < Ord(SU) ? if (LowerBound < UpperBound) { Visited.reset(); - // There may be a path from TargetSU to SU. Check for it. + // There may be a path from TargetSU to SU. Check for it. DFS(TargetSU, UpperBound, HasLoop); } return HasLoop; @@ -580,8 +580,7 @@ void ScheduleDAGTopologicalSort::Allocate(int n, int index) { Index2Node[index] = n; } -ScheduleDAGTopologicalSort::ScheduleDAGTopologicalSort( - std::vector<SUnit> &sunits) - : SUnits(sunits) {} +ScheduleDAGTopologicalSort:: +ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits) : SUnits(sunits) {} ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {} diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index ee08e1d..0a2fb37 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -50,11 +50,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, break; } } - bool Success = TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second, - SU->CopyDstRC, SU->CopySrcRC, - DebugLoc()); - (void)Success; - assert(Success && "copyRegToReg failed!"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); } else { // Copy from physical register. assert(I->getReg() && "Unknown physical register!"); @@ -62,11 +59,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; isNew = isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); - bool Success = TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(), - SU->CopyDstRC, SU->CopySrcRC, - DebugLoc()); - (void)Success; - assert(Success && "copyRegToReg failed!"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); } break; } diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index ad82db2..d90659b 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -69,8 +69,10 @@ namespace llvm { const SmallSet<unsigned, 8> &LoopLiveIns) { unsigned Count = 0; for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I, ++Count) { + I != E; ++I) { const MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse()) @@ -79,6 +81,7 @@ namespace llvm { if (LoopLiveIns.count(MOReg)) Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); } + ++Count; // Not every iteration due to dbg_value above. } const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 0cfd5e1..799988a 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -1,5 +1,4 @@ add_llvm_library(LLVMSelectionDAG - CallingConvLower.cpp DAGCombiner.cpp FastISel.cpp FunctionLoweringInfo.cpp diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6bddd78..e671752 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -211,6 +211,7 @@ namespace { SDValue visitBUILD_VECTOR(SDNode *N); SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitMEMBARRIER(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); @@ -668,7 +669,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; - return DAG.getExtLoad(ExtType, dl, PVT, + return DAG.getExtLoad(ExtType, PVT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), MemVT, LD->isVolatile(), @@ -890,7 +891,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); - SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, + SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), MemVT, LD->isVolatile(), @@ -1079,6 +1080,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::MEMBARRIER: return visitMEMBARRIER(N); } return SDValue(); } @@ -1313,7 +1315,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, GA->getOffset() + (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A @@ -1550,7 +1552,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { // fold (sub Sym, c) -> Sym-c if (N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, GA->getOffset() - (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 @@ -2028,7 +2030,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) // // do not sink logical op inside of a vector extend, since it may combine // into a vsetcc. @@ -2038,7 +2040,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // Avoid infinite looping with PromoteIntBinOp. (N0.getOpcode() == ISD::ANY_EXTEND && (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || - (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && + (N0.getOpcode() == ISD::TRUNCATE && + (!TLI.isZExtFree(VT, Op0VT) || + !TLI.isTruncateFree(Op0VT, VT)) && + TLI.isTypeLegal(Op0VT))) && !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { @@ -2193,7 +2198,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -2216,7 +2221,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -2250,7 +2255,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -2286,7 +2291,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -2317,7 +2322,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or x, undef) -> -1 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) { + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); } @@ -2425,6 +2431,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) return SDValue(Rot, 0); + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -3158,6 +3169,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return NewSRL; } + // Attempt to convert a srl of a load into a narrower zero-extending load. + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + // Here is a common situation. We want to optimize: // // %a = ... @@ -3487,7 +3503,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3531,7 +3547,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -3557,24 +3573,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -3635,10 +3651,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) && - (!TLI.isTruncateFree(N0.getOperand(0).getValueType(), - N0.getValueType()) || - !TLI.isZExtFree(N0.getValueType(), VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -3679,7 +3692,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3723,7 +3736,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -3881,7 +3894,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3925,8 +3938,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), - VT, LN0->getChain(), LN0->getBasePtr(), + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT, + N->getDebugLoc(), + LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -3950,24 +3964,24 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -4024,6 +4038,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { /// extended, also fold the extension to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4040,6 +4055,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) return SDValue(); + } else if (Opc == ISD::SRL) { + // Annother special-case: SRL is basically zero-extending a narrower + // value. + ExtType = ISD::ZEXTLOAD; + N0 = SDValue(N, 0); + ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01) return SDValue(); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() - N01->getZExtValue()); } unsigned EVTBits = ExtVT.getSizeInBits(); @@ -4085,7 +4109,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) - : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, + : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); @@ -4172,7 +4196,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, @@ -4189,7 +4213,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, @@ -4243,8 +4267,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) - if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) - return ReduceLoadWidth(N); + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { + SDValue Reduced = ReduceLoadWidth(N); + if (Reduced.getNode()) + return Reduced; + } + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -4943,7 +4976,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -5527,8 +5560,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment()) - return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), - LD->getValueType(0), + return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), + N->getDebugLoc(), Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), Align); @@ -5551,8 +5584,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } else { - ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), - LD->getValueType(0), + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), + LD->getDebugLoc(), BetterChain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), @@ -6077,7 +6110,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. - EVT EltVT = InVec.getValueType().getVectorElementType(); SDValue InOp = InVec.getOperand(0); EVT NVT = N->getValueType(0); if (InOp.getValueType() != NVT) { @@ -6277,8 +6309,6 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { - return SDValue(); - EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -6334,6 +6364,59 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { + if (!TLI.getShouldFoldAtomicFences()) + return SDValue(); + + SDValue atomic = N->getOperand(0); + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + break; + default: + return SDValue(); + } + + SDValue fence = atomic.getOperand(0); + if (fence.getOpcode() != ISD::MEMBARRIER) + return SDValue(); + + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2), + atomic.getOperand(3)), atomic.getResNo()); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2)), + atomic.getResNo()); + default: + return SDValue(); + } +} + /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform /// an AND to a vector_shuffle with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -6565,8 +6648,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType(), - TheSelect->getDebugLoc(), TheSelect->getValueType(0), + TheSelect->getDebugLoc(), LLD->getChain(), Addr, 0, 0, LLD->getMemoryVT(), LLD->isVolatile(), @@ -6807,38 +6890,34 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } } - // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X -> + // Check to see if this is an integer abs. + // select_cc setg[te] X, 0, X, -X -> + // select_cc setgt X, -1, X, -X -> + // select_cc setl[te] X, 0, -X, X -> + // select_cc setlt X, 1, -X, X -> // Y = sra (X, size(X)-1); xor (add (X, Y), Y) - if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && - N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && - N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { + if (N1C) { + ConstantSDNode *SubC = NULL; + if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (N1C->isAllOnesValue() && CC == ISD::SETGT)) && + N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) + SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); + else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || + (N1C->isOne() && CC == ISD::SETLT)) && + N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) + SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); + EVT XType = N0.getValueType(); - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType, - N0, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); - } - // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X -> - // Y = sra (X, size(X)-1); xor (add (X, Y), Y) - if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && - N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { - if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) { - EVT XType = N0.getValueType(); - if (SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, - N0, - DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), - XType, N0, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); - } + if (SubC && SubC->isNullValue() && XType.isInteger()) { + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + XType, N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); } } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 95f4d07..3f7e4a5 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -44,18 +44,38 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" -#include "FunctionLoweringInfo.h" using namespace llvm; +/// startNewBlock - Set the current block to which generated machine +/// instructions will be appended, and clear the local CSE map. +/// +void FastISel::startNewBlock() { + LocalValueMap.clear(); + + // Start out as null, meaining no local-value instructions have + // been emitted. + LastLocalValue = 0; + + // Advance the last local value past any EH_LABEL instructions. + MachineBasicBlock::iterator + I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end(); + while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) { + LastLocalValue = I; + ++I; + } +} + bool FastISel::hasTrivialKill(const Value *V) const { // Don't consider constants or arguments to have trivial kills. const Instruction *I = dyn_cast<Instruction>(V); @@ -99,25 +119,31 @@ unsigned FastISel::getRegForValue(const Value *V) { // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - DenseMap<const Value *, unsigned>::iterator I = ValueMap.find(V); - if (I != ValueMap.end()) - return I->second; + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) { + unsigned Reg = I->second; + return Reg; + } unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; // In bottom-up mode, just create the virtual register which will be used // to hold the value. It will be materialized later. - if (IsBottomUp) { - Reg = createResultReg(TLI.getRegClassFor(VT)); - if (isa<Instruction>(V)) - ValueMap[V] = Reg; - else - LocalValueMap[V] = Reg; - return Reg; - } + if (isa<Instruction>(V) && + (!isa<AllocaInst>(V) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) + return FuncInfo.InitializeRegForValue(V); + + MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); + + // Materialize the value in a register. Emit any instructions in the + // local value area. + Reg = materializeRegForValue(V, VT); - return materializeRegForValue(V, VT); + leaveLocalValueArea(SaveInsertPt); + + return Reg; } /// materializeRegForValue - Helper for getRegForVale. This function is @@ -161,11 +187,15 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { } } } else if (const Operator *Op = dyn_cast<Operator>(V)) { - if (!SelectOperator(Op, Op->getOpcode())) return 0; - Reg = LocalValueMap[Op]; + if (!SelectOperator(Op, Op->getOpcode())) + if (!isa<Instruction>(Op) || + !TargetSelectInstruction(cast<Instruction>(Op))) + return 0; + Reg = lookUpRegForValue(Op); } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } // If target-independent code couldn't handle the value, give target-specific @@ -175,8 +205,10 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { // Don't cache constant materializations in the general ValueMap. // To do so would require tracking what uses they dominate. - if (Reg != 0) + if (Reg != 0) { LocalValueMap[V] = Reg; + LastLocalValue = MRI.getVRegDef(Reg); + } return Reg; } @@ -185,8 +217,9 @@ unsigned FastISel::lookUpRegForValue(const Value *V) { // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - if (ValueMap.count(V)) - return ValueMap[V]; + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) + return I->second; return LocalValueMap[V]; } @@ -202,14 +235,17 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { return Reg; } - unsigned &AssignedReg = ValueMap[I]; + unsigned &AssignedReg = FuncInfo.ValueMap[I]; if (AssignedReg == 0) + // Use the new register. AssignedReg = Reg; else if (Reg != AssignedReg) { - const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); - TII.copyRegToReg(*MBB, MBB->end(), AssignedReg, - Reg, RegClass, RegClass, DL); + // Arrange for uses of AssignedReg to be replaced by uses of Reg. + FuncInfo.RegFixups[AssignedReg] = Reg; + + AssignedReg = Reg; } + return AssignedReg; } @@ -237,6 +273,33 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { return std::pair<unsigned, bool>(IdxN, IdxNIsKill); } +void FastISel::recomputeInsertPt() { + if (getLastLocalValue()) { + FuncInfo.InsertPt = getLastLocalValue(); + ++FuncInfo.InsertPt; + } else + FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); + + // Now skip past any EH_LABELs, which must remain at the beginning. + while (FuncInfo.InsertPt != FuncInfo.MBB->end() && + FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL) + ++FuncInfo.InsertPt; +} + +MachineBasicBlock::iterator FastISel::enterLocalValueArea() { + MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; + recomputeInsertPt(); + return OldInsertPt; +} + +void FastISel::leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt) { + if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) + LastLocalValue = llvm::prior(FuncInfo.InsertPt); + + // Restore the previous insert position. + FuncInfo.InsertPt = OldInsertPt; +} + /// SelectBinaryOp - Select and emit code for a binary operator instruction, /// which has an opcode which directly corresponds to the given ISD opcode. /// @@ -345,7 +408,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { - if (CI->getZExtValue() == 0) continue; + if (CI->isZero()) continue; uint64_t Offs = TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); @@ -395,7 +458,7 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(I); if (!DIVariable(DI->getVariable()).Verify() || - !MF.getMMI().hasDebugInfo()) + !FuncInfo.MF->getMMI().hasDebugInfo()) return true; const Value *Address = DI->getAddress(); @@ -409,11 +472,12 @@ bool FastISel::SelectCall(const User *I) { // those are handled in SelectionDAGBuilder. if (AI) { DenseMap<const AllocaInst*, int>::iterator SI = - StaticAllocaMap.find(AI); - if (SI == StaticAllocaMap.end()) break; // VLAs. + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) break; // VLAs. int FI = SI->second; if (!DI->getDebugLoc().isUnknown()) - MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc()); + FuncInfo.MF->getMMI().setVariableDbgInfo(DI->getVariable(), + FI, DI->getDebugLoc()); } else // Building the map above is target independent. Generating DBG_VALUE // inline is target dependent; do this now. @@ -428,23 +492,28 @@ bool FastISel::SelectCall(const User *I) { if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(0U).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(CI->getZExtValue()).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addFPImm(CF).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { - BuildMI(MBB, DL, II).addReg(Reg, RegState::Debug).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Reg, RegState::Debug).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. // Insert an undef so we can see what we dropped. - BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(0U).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } return true; } @@ -453,14 +522,13 @@ bool FastISel::SelectCall(const User *I) { switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { default: break; case TargetLowering::Expand: { - assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); unsigned Reg = TLI.getExceptionAddressRegister(); const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Reg, RC, RC, DL); - assert(InsertedCopy && "Can't copy address registers!"); - InsertedCopy = InsertedCopy; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); UpdateValueMap(I, ResultReg); return true; } @@ -472,25 +540,23 @@ bool FastISel::SelectCall(const User *I) { switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { default: break; case TargetLowering::Expand: { - if (MBB->isLandingPad()) - AddCatchInfo(*cast<CallInst>(I), &MF.getMMI(), MBB); + if (FuncInfo.MBB->isLandingPad()) + AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); else { #ifndef NDEBUG - CatchInfoLost.insert(cast<CallInst>(I)); + FuncInfo.CatchInfoLost.insert(cast<CallInst>(I)); #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) MBB->addLiveIn(Reg); + if (Reg) FuncInfo.MBB->addLiveIn(Reg); } unsigned Reg = TLI.getExceptionSelectorRegister(); EVT SrcVT = TLI.getPointerTy(); const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); unsigned ResultReg = createResultReg(RC); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg, - RC, RC, DL); - assert(InsertedCopy && "Can't copy address registers!"); - InsertedCopy = InsertedCopy; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); bool ResultRegIsKill = hasTrivialKill(I); @@ -605,12 +671,12 @@ bool FastISel::SelectBitCast(const User *I) { if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); - ResultReg = createResultReg(DstClass); - - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Op0, DstClass, SrcClass, DL); - if (!InsertedCopy) - ResultReg = 0; + // Don't attempt a cross-class copy. It will likely fail. + if (SrcClass == DstClass) { + ResultReg = createResultReg(DstClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Op0); + } } // If the reg-reg copy failed, select a BIT_CONVERT opcode. @@ -655,14 +721,15 @@ FastISel::SelectInstruction(const Instruction *I) { /// unless it is the immediate (fall-through) successor, and update /// the CFG. void -FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { - if (MBB->isLayoutSuccessor(MSucc)) { +FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { + if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // The unconditional fall-through case, which needs no instructions. } else { // The unconditional branch case. - TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>()); + TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, + SmallVector<MachineOperand, 0>(), DL); } - MBB->addSuccessor(MSucc); + FuncInfo.MBB->addSuccessor(MSucc); } /// SelectFNeg - Emit an FNeg operation. @@ -712,8 +779,39 @@ FastISel::SelectFNeg(const User *I) { } bool +FastISel::SelectLoad(const User *I) { + LoadInst *LI = const_cast<LoadInst *>(cast<LoadInst>(I)); + + // For a load from an alloca, make a limited effort to find the value + // already available in a register, avoiding redundant loads. + if (!LI->isVolatile() && isa<AllocaInst>(LI->getPointerOperand())) { + BasicBlock::iterator ScanFrom = LI; + if (const Value *V = FindAvailableLoadedValue(LI->getPointerOperand(), + LI->getParent(), ScanFrom)) { + if (!V->use_empty() && + (!isa<Instruction>(V) || + cast<Instruction>(V)->getParent() == LI->getParent() || + (isa<AllocaInst>(V) && + FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) && + (!isa<Argument>(V) || + LI->getParent() == &LI->getParent()->getParent()->getEntryBlock())) { + unsigned ResultReg = getRegForValue(V); + if (ResultReg != 0) { + UpdateValueMap(I, ResultReg); + return true; + } + } + } + } + + return false; +} + +bool FastISel::SelectOperator(const User *I, unsigned Opcode) { switch (Opcode) { + case Instruction::Load: + return SelectLoad(I); case Instruction::Add: return SelectBinaryOp(I, ISD::ADD); case Instruction::FAdd: @@ -762,8 +860,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { if (BI->isUnconditional()) { const BasicBlock *LLVMSucc = BI->getSuccessor(0); - MachineBasicBlock *MSucc = MBBMap[LLVMSucc]; - FastEmitBranch(MSucc); + MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; + FastEmitBranch(MSucc, BI->getDebugLoc()); return true; } @@ -778,7 +876,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { case Instruction::Alloca: // FunctionLowering has the static-sized case covered. - if (StaticAllocaMap.count(cast<AllocaInst>(I))) + if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I))) return true; // Dynamic-sized alloca is not handled yet. @@ -824,32 +922,16 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &vm, - DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am, - std::vector<std::pair<MachineInstr*, unsigned> > &pn -#ifndef NDEBUG - , SmallSet<const Instruction *, 8> &cil -#endif - ) - : MBB(0), - ValueMap(vm), - MBBMap(bm), - StaticAllocaMap(am), - PHINodesToUpdate(pn), -#ifndef NDEBUG - CatchInfoLost(cil), -#endif - MF(mf), - MRI(MF.getRegInfo()), - MFI(*MF.getFrameInfo()), - MCP(*MF.getConstantPool()), - TM(MF.getTarget()), +FastISel::FastISel(FunctionLoweringInfo &funcInfo) + : FuncInfo(funcInfo), + MRI(FuncInfo.MF->getRegInfo()), + MFI(*FuncInfo.MF->getFrameInfo()), + MCP(*FuncInfo.MF->getConstantPool()), + TM(FuncInfo.MF->getTarget()), TD(*TM.getTargetData()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - IsBottomUp(false) { + TRI(*TM.getRegisterInfo()) { } FastISel::~FastISel() {} @@ -978,7 +1060,7 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); - BuildMI(MBB, DL, II, ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); return ResultReg; } @@ -989,13 +1071,13 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; @@ -1009,17 +1091,15 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1032,17 +1112,15 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1055,17 +1133,15 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1079,19 +1155,17 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1103,13 +1177,11 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); else { - BuildMI(MBB, DL, II).addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1117,24 +1189,12 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { - const TargetRegisterClass* RC = MRI.getRegClass(Op0); - unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG); - - if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Idx); - else { - BuildMI(MBB, DL, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Idx); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; - } + assert(TargetRegisterInfo::isVirtualRegister(Op0) && + "Cannot yet extract from physregs"); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx); return ResultReg; } @@ -1154,14 +1214,14 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; - unsigned OrigNumPHINodesToUpdate = PHINodesToUpdate.size(); + unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa<PHINode>(SuccBB->begin())) continue; - MachineBasicBlock *SuccMBB = MBBMap[SuccBB]; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. @@ -1182,7 +1242,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its // own moves. Second, this check is necessary becuase FastISel doesn't - // use CreateRegForValue to create registers, so it always creates + // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { @@ -1190,7 +1250,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (VT == MVT::i1) VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); else { - PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } } @@ -1205,10 +1265,10 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { - PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } - PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); DL = DebugLoc(); } } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 65c36c1..928e1ec 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "function-lowering-info" -#include "FunctionLoweringInfo.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" @@ -30,7 +30,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" @@ -47,9 +46,11 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { if (isa<PHINode>(I)) return true; const BasicBlock *BB = I->getParent(); for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) - if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI)) + UI != E; ++UI) { + const User *U = *UI; + if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U)) return true; + } return false; } @@ -64,9 +65,11 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) { const BasicBlock *Entry = A->getParent()->begin(); for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) - if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI)) + UI != E; ++UI) { + const User *U = *UI; + if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) return false; // Use not in entry block. + } return true; } @@ -74,12 +77,18 @@ FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) : TLI(tli) { } -void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, - bool EnableFastISel) { +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { Fn = &fn; MF = &mf; RegInfo = &MF->getRegInfo(); + // Check whether the function can return without sret-demotion. + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(Fn->getReturnType(), + Fn->getAttributes().getRetAttributes(), Outs, TLI); + CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(), + Outs, Fn->getContext()); + // Create a vreg for each argument register that is not dead and is used // outside of the entry block for the function. for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); @@ -172,31 +181,33 @@ void FunctionLoweringInfo::clear() { #endif LiveOutRegInfo.clear(); ArgDbgValues.clear(); + RegFixups.clear(); } -unsigned FunctionLoweringInfo::MakeReg(EVT VT) { +/// CreateReg - Allocate a single virtual register for the given type. +unsigned FunctionLoweringInfo::CreateReg(EVT VT) { return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); } -/// CreateRegForValue - Allocate the appropriate number of virtual registers of +/// CreateRegs - Allocate the appropriate number of virtual registers of /// the correctly promoted or expanded types. Assign these registers /// consecutive vreg numbers and return the first assigned number. /// /// In the case that the given value has struct or array type, this function /// will assign registers for each member or element. /// -unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { +unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, V->getType(), ValueVTs); + ComputeValueVTs(TLI, Ty, ValueVTs); unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; - EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT); + EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); - unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT); + unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { - unsigned R = MakeReg(RegisterVT); + unsigned R = CreateReg(RegisterVT); if (!FirstReg) FirstReg = R; } } @@ -208,7 +219,7 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB) { // Inform the MachineModuleInfo of the personality for this landing pad. - const ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2)); + const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1)); assert(CE->getOpcode() == Instruction::BitCast && isa<Function>(CE->getOperand(0)) && "Personality should be a function"); @@ -217,18 +228,18 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, // Gather all the type infos for this landing pad and pass them along to // MachineModuleInfo. std::vector<const GlobalVariable *> TyInfo; - unsigned N = I.getNumOperands(); + unsigned N = I.getNumArgOperands(); - for (unsigned i = N - 1; i > 2; --i) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) { + for (unsigned i = N - 1; i > 1; --i) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) { unsigned FilterLength = CI->getZExtValue(); unsigned FirstCatch = i + FilterLength + !FilterLength; - assert (FirstCatch <= N && "Invalid filter length"); + assert(FirstCatch <= N && "Invalid filter length"); if (FirstCatch < N) { TyInfo.reserve(N - FirstCatch); for (unsigned j = FirstCatch; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addCatchTypeInfo(MBB, TyInfo); TyInfo.clear(); } @@ -240,7 +251,7 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, // Filter. TyInfo.reserve(FilterLength - 1); for (unsigned j = i + 1; j < FirstCatch; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addFilterTypeInfo(MBB, TyInfo); TyInfo.clear(); } @@ -249,10 +260,10 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, } } - if (N > 3) { - TyInfo.reserve(N - 3); - for (unsigned j = 3; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + if (N > 2) { + TyInfo.reserve(N - 2); + for (unsigned j = 2; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addCatchTypeInfo(MBB, TyInfo); } } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h deleted file mode 100644 index 4067a5b..0000000 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h +++ /dev/null @@ -1,144 +0,0 @@ -//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This implements routines for translating functions from LLVM IR into -// Machine IR. -// -//===----------------------------------------------------------------------===// - -#ifndef FUNCTIONLOWERINGINFO_H -#define FUNCTIONLOWERINGINFO_H - -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#ifndef NDEBUG -#include "llvm/ADT/SmallSet.h" -#endif -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/Support/CallSite.h" -#include <vector> - -namespace llvm { - -class AllocaInst; -class BasicBlock; -class CallInst; -class Function; -class GlobalVariable; -class Instruction; -class MachineInstr; -class MachineBasicBlock; -class MachineFunction; -class MachineModuleInfo; -class MachineRegisterInfo; -class TargetLowering; -class Value; - -//===--------------------------------------------------------------------===// -/// FunctionLoweringInfo - This contains information that is global to a -/// function that is used when lowering a region of the function. -/// -class FunctionLoweringInfo { -public: - const TargetLowering &TLI; - const Function *Fn; - MachineFunction *MF; - MachineRegisterInfo *RegInfo; - - /// CanLowerReturn - true iff the function's return value can be lowered to - /// registers. - bool CanLowerReturn; - - /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg - /// allocated to hold a pointer to the hidden sret parameter. - unsigned DemoteRegister; - - /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. - DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap; - - /// ValueMap - Since we emit code for the function a basic block at a time, - /// we must remember which virtual registers hold the values for - /// cross-basic-block values. - DenseMap<const Value*, unsigned> ValueMap; - - /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in - /// the entry block. This allows the allocas to be efficiently referenced - /// anywhere in the function. - DenseMap<const AllocaInst*, int> StaticAllocaMap; - - /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for - /// function arguments that are inserted after scheduling is completed. - SmallVector<MachineInstr*, 8> ArgDbgValues; - -#ifndef NDEBUG - SmallSet<const Instruction *, 8> CatchInfoLost; - SmallSet<const Instruction *, 8> CatchInfoFound; -#endif - - struct LiveOutInfo { - unsigned NumSignBits; - APInt KnownOne, KnownZero; - LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {} - }; - - /// LiveOutRegInfo - Information about live out vregs, indexed by their - /// register number offset by 'FirstVirtualRegister'. - std::vector<LiveOutInfo> LiveOutRegInfo; - - /// PHINodesToUpdate - A list of phi instructions whose operand list will - /// be updated after processing the current basic block. - /// TODO: This isn't per-function state, it's per-basic-block state. But - /// there's no other convenient place for it to live right now. - std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate; - - explicit FunctionLoweringInfo(const TargetLowering &TLI); - - /// set - Initialize this FunctionLoweringInfo with the given Function - /// and its associated MachineFunction. - /// - void set(const Function &Fn, MachineFunction &MF, bool EnableFastISel); - - /// clear - Clear out all the function-specific state. This returns this - /// FunctionLoweringInfo to an empty state, ready to be used for a - /// different function. - void clear(); - - unsigned MakeReg(EVT VT); - - /// isExportedInst - Return true if the specified value is an instruction - /// exported from its block. - bool isExportedInst(const Value *V) { - return ValueMap.count(V); - } - - unsigned CreateRegForValue(const Value *V); - - unsigned InitializeRegForValue(const Value *V) { - unsigned &R = ValueMap[V]; - assert(R == 0 && "Already initialized this value register!"); - return R = CreateRegForValue(V); - } -}; - -/// AddCatchInfo - Extract the personality and type infos from an eh.selector -/// call, and add them to the specified machine basic block. -void AddCatchInfo(const CallInst &I, - MachineModuleInfo *MMI, MachineBasicBlock *MBB); - -/// CopyCatchInfo - Copy catch information from DestBB to SrcBB. -void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, - MachineModuleInfo *MMI, FunctionLoweringInfo &FLI); - -} // end namespace llvm - -#endif diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 16eb8a7..61c2a90 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -123,7 +123,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, EVT VT = Node->getValueType(ResNo); const TargetRegisterClass *SrcRC = 0, *DstRC = 0; - SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT); + SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); // Figure out the register class to create for the destreg. if (VRBase) { @@ -142,11 +142,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg, - DstRC, SrcRC, Node->getDebugLoc()); - - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + VRBase).addReg(SrcReg); } SDValue Op(Node, ResNo); @@ -246,7 +243,7 @@ unsigned InstrEmitter::getVR(SDValue Op, const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); VReg = MRI->createVirtualRegister(RC); } - BuildMI(MBB, Op.getDebugLoc(), + BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; } @@ -288,10 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, "Don't have operand info for this instruction!"); if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC, Op.getNode()->getDebugLoc()); - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; } } @@ -428,12 +423,9 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, } if (Opc == TargetOpcode::EXTRACT_SUBREG) { + // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); - // Create the extract_subreg machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::EXTRACT_SUBREG)); - // Figure out the register class to create for the destreg. unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(VReg); @@ -450,11 +442,16 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBase = MRI->createVirtualRegister(SRC); } - // Add def, source, and subreg index - MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); + + // Add source, and subreg index AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); - MI->addOperand(MachineOperand::CreateImm(SubIdx)); + assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) && + "Cannot yet extract from physregs"); + MI->getOperand(1).setSubReg(SubIdx); MBB->insert(InsertPos, MI); } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { @@ -511,18 +508,13 @@ void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); - const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); + // Create the new VReg in the destination class and emit a copy. unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); - - // Create the new VReg in the destination class and emit a copy. unsigned NewVReg = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC, Node->getDebugLoc()); - assert(Emitted && - "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + NewVReg).addReg(VReg); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; @@ -604,9 +596,10 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - // FIXME: SDDbgValues aren't updated with legalization, so it's possible - // to have i128 values in them at this point. As a crude workaround, just - // drop the debug info if this happens. + // FIXME: SDDbgValue constants aren't updated with legalization, so it's + // possible to have i128 constants in them at this point. Dwarf writer + // does not handle i128 constants at the moment so, as a crude workaround, + // just drop the debug info if this happens. if (!CI->getValue().isSignedIntN(64)) MIB.addReg(0U); else @@ -676,6 +669,33 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Create the new machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); + + // The MachineInstr constructor adds implicit-def operands. Scan through + // these to determine which are dead. + if (MI->getNumOperands() != 0 && + Node->getValueType(Node->getNumValues()-1) == MVT::Flag) { + // First, collect all used registers. + SmallVector<unsigned, 8> UsedRegs; + for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser()) + if (F->getOpcode() == ISD::CopyFromReg) + UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); + else { + // Collect declared implicit uses. + const TargetInstrDesc &TID = TII->get(F->getMachineOpcode()); + UsedRegs.append(TID.getImplicitUses(), + TID.getImplicitUses() + TID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { + unsigned Reg = R->getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) + UsedRegs.push_back(Reg); + } + } + // Then mark unused registers as dead. + MI->setPhysRegsDeadExcept(UsedRegs, *TRI); + } // Add result register values for things that are defined by this // instruction. @@ -696,16 +716,24 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); + // Insert the instruction into position in the block. This needs to + // happen before any custom inserter hook is called so that the + // hook knows where in the block to insert the replacement code. + MBB->insert(InsertPos, MI); + if (II.usesCustomInsertionHook()) { // Insert this instruction into the basic block using a target // specific inserter which may returns a new basic block. - MBB = TLI->EmitInstrWithCustomInserter(MI, MBB); - InsertPos = MBB->end(); + bool AtEnd = InsertPos == MBB->end(); + MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); + if (NewMBB != MBB) { + if (AtEnd) + InsertPos = NewMBB->end(); + MBB = NewMBB; + } return; } - MBB->insert(InsertPos, MI); - // Additional results must be an physical register def. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { @@ -761,24 +789,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; - - const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0; - // Get the register classes of the src/dst. - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) - SrcTRC = MRI->getRegClass(SrcReg); - else - SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType()); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) - DstTRC = MRI->getRegClass(DestReg); - else - DstTRC = TRI->getPhysicalRegisterRegClass(DestReg, - Node->getOperand(1).getValueType()); - - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg, - DstTRC, SrcTRC, Node->getDebugLoc()); - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + DestReg).addReg(SrcReg); break; } case ISD::CopyFromReg: { @@ -807,6 +820,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); + // Add the isAlignStack bit. + int64_t isAlignStack = + cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))-> + getZExtValue(); + MI->addOperand(MachineOperand::CreateImm(isAlignStack)); + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = @@ -821,14 +840,22 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case InlineAsm::Kind_RegDef: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - MI->addOperand(MachineOperand::CreateReg(Reg, true)); + // FIXME: Add dead flags for physical and virtual registers defined. + // For now, mark physical register defs as implicit to help fast + // regalloc. This makes inline asm look a lot like calls. + MI->addOperand(MachineOperand::CreateReg(Reg, true, + /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, - false, false, true)); + MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, + /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), + /*isKill=*/ false, + /*isDead=*/ false, + /*isUndef=*/false, + /*isEarlyClobber=*/ true)); } break; case InlineAsm::Kind_RegUse: // Use of register. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 62a37a5..7a47da4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -31,6 +31,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" @@ -133,7 +134,7 @@ private: /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, - SDValue N1, SDValue N2, + SDValue N1, SDValue N2, SmallVectorImpl<int> &Mask) const; bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, @@ -143,6 +144,8 @@ private: DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_PPCF128); @@ -172,6 +175,8 @@ private: SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); + void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); }; @@ -181,8 +186,8 @@ private: /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> -SDValue -SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, +SDValue +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, SmallVectorImpl<int> &Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); @@ -193,12 +198,12 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, if (NumEltsGrowth == 1) return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); - + SmallVector<int, 8> NewMask; for (unsigned i = 0; i != NumMaskElts; ++i) { int Idx = Mask[i]; for (unsigned j = 0; j != NumEltsGrowth; ++j) { - if (Idx < 0) + if (Idx < 0) NewMask.push_back(-1); else NewMask.push_back(Idx * NumEltsGrowth + j); @@ -320,7 +325,8 @@ bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, bool OperandsLeadToDest = false; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) OperandsLeadToDest |= // If an operand leads to Dest, so do we. - LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo); + LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, + NodesLeadingTo); if (OperandsLeadToDest) { NodesLeadingTo.insert(N); @@ -357,7 +363,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, EVT SVT = VT; while (SVT != MVT::f32) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); - if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) && + if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from // smaller type. TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && @@ -372,8 +378,8 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, dl, - OrigVT, DAG.getEntryNode(), + return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl, + DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, VT, false, false, Alignment); return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, @@ -450,7 +456,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, 8 * (StoredBytes - Offset)); // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr, NULL, 0, MemVT, false, false, 0); Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, @@ -552,7 +558,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // The last copy may be partial. Do an extending load. EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset)); - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, MemVT, LD->isVolatile(), LD->isNonTemporal(), @@ -568,7 +574,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, Stores.size()); // Finally, perform the original load only redirected to the stack slot. - Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase, NULL, 0, LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. @@ -597,23 +603,23 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Load the value in two parts SDValue Lo, Hi; if (TLI.isLittleEndian()) { - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } else { - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } // aggregate the two parts @@ -773,7 +779,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { "Unexpected illegal type!"); for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - assert((isTypeLegal(Node->getOperand(i).getValueType()) || + assert((isTypeLegal(Node->getOperand(i).getValueType()) || Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); @@ -853,6 +859,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: + case ISD::EH_SJLJ_SETJMP: + case ISD::EH_SJLJ_LONGJMP: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -925,8 +933,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } - Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(), - Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), + Ops.size()), 0); switch (Action) { case TargetLowering::Legal: for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -1000,11 +1008,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { NodesLeadingTo); } - // Now that we legalized all of the inputs (which may have inserted - // libcalls) create the new CALLSEQ_START node. + // Now that we have legalized all of the inputs (which may have inserted + // libcalls), create the new CALLSEQ_START node. Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - // Merge in the last call, to ensure that this call start after the last + // Merge in the last call to ensure that this call starts after the last // call ended. if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -1016,7 +1024,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Tmp1 != Node->getOperand(0)) { SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], + Ops.size()), Result.getResNo()); } // Remember that the CALLSEQ_START is legalized. @@ -1058,7 +1067,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Tmp1 != Node->getOperand(0)) { SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); } } else { Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); @@ -1067,7 +1078,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; Ops.back() = Tmp2; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); } } assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); @@ -1087,7 +1100,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); Tmp3 = Result.getValue(0); Tmp4 = Result.getValue(1); @@ -1100,7 +1115,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG, TLI); Tmp3 = Result.getOperand(0); Tmp4 = Result.getOperand(1); @@ -1166,7 +1181,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset, NVT, isVolatile, isNonTemporal, Alignment); @@ -1202,8 +1217,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (TLI.isLittleEndian()) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, - Node->getValueType(0), Tmp1, Tmp2, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl, + Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, isNonTemporal, Alignment); @@ -1211,13 +1226,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { IncrementSize = RoundWidth / 8; Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); - // Build a factor node to remember that this load is independent of the - // other one. + // Build a factor node to remember that this load is independent of + // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); @@ -1231,7 +1246,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 // Load the top RoundWidth bits. - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, isNonTemporal, Alignment); @@ -1239,14 +1254,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { IncrementSize = RoundWidth / 8; Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, - Node->getValueType(0), Tmp1, Tmp2, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); - // Build a factor node to remember that this load is independent of the - // other one. + // Build a factor node to remember that this load is independent of + // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); @@ -1267,7 +1282,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); Tmp1 = Result.getValue(0); Tmp2 = Result.getValue(1); @@ -1281,10 +1298,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // If this is an unaligned load and the target doesn't support it, // expand it. if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + const Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG, TLI); Tmp1 = Result.getOperand(0); Tmp2 = Result.getOperand(1); @@ -1310,10 +1329,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Load.getValue(1)); break; } - assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!"); + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), SrcVT, LD->isVolatile(), LD->isNonTemporal(), @@ -1355,8 +1375,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { { Tmp3 = LegalizeOp(ST->getValue()); - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { @@ -1366,7 +1388,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG, TLI); @@ -1459,8 +1481,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1469,7 +1493,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG, TLI); @@ -1531,7 +1555,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0, false, false, 0); else - return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr, NULL, 0, Vec.getValueType().getVectorElementType(), false, false, 0); } @@ -1568,7 +1592,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { Node->getOperand(i), Idx, SV, Offset, EltVT, false, false, 0)); } else - Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, SV, Offset, false, false, 0)); } @@ -1763,7 +1787,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); - return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT, + return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT, false, false, DestAlign); } @@ -1926,6 +1950,44 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return CallInfo.first; } +// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair<SDValue, SDValue> +SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo; +} + SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, @@ -2048,7 +2110,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr); SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr); - SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, + TwoP84PlusTwoP52); return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } @@ -2058,11 +2121,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { EVT SHVT = TLI.getShiftAmountTy(); - SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, DAG.getConstant(UINT64_C(0x800), MVT::i64)); - SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); @@ -2122,7 +2185,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, false, false, Alignment); else { FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, MVT::f32, false, false, Alignment)); @@ -2332,6 +2395,92 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } } +std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { + unsigned Opc = Node->getOpcode(); + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC; + + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic intrinsic Expand!"); + break; + case ISD::ATOMIC_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; + case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; + case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; + case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + } + break; + case ISD::ATOMIC_CMP_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; + case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; + case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; + case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + } + break; + case ISD::ATOMIC_LOAD_ADD: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + } + break; + case ISD::ATOMIC_LOAD_SUB: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + } + break; + case ISD::ATOMIC_LOAD_AND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + } + break; + case ISD::ATOMIC_LOAD_OR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + } + break; + case ISD::ATOMIC_LOAD_XOR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + } + break; + case ISD::ATOMIC_LOAD_NAND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + } + break; + } + + return ExpandChainLibCall(LC, Node, false); +} + void SelectionDAGLegalize::ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results) { DebugLoc dl = Node->getDebugLoc(); @@ -2357,10 +2506,48 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::EH_RETURN: case ISD::EH_LABEL: case ISD::PREFETCH: - case ISD::MEMBARRIER: case ISD::VAEND: + case ISD::EH_SJLJ_LONGJMP: + Results.push_back(Node->getOperand(0)); + break; + case ISD::EH_SJLJ_SETJMP: + Results.push_back(DAG.getConstant(0, MVT::i32)); Results.push_back(Node->getOperand(0)); break; + case ISD::MEMBARRIER: { + // If the target didn't lower this, lower it to '__sync_synchronize()' call + TargetLowering::ArgListTy Args; + std::pair<SDValue, SDValue> CallResult = + TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy()), + Args, DAG, dl); + Results.push_back(CallResult.second); + break; + } + // By default, atomic intrinsics are marked Legal and lowered. Targets + // which don't support them directly, however, may want libcalls, in which + // case they mark them Expand, and we get here. + // FIXME: Unimplemented for now. Add libcalls. + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_CMP_SWAP: { + std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; @@ -2465,15 +2652,31 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); - SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, - false, false, 0); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, + false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(Align - 1, + TLI.getPointerTy())); + + VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(-Align, + TLI.getPointerTy())); + } + // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, DAG.getConstant(TLI.getTargetData()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0, + Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0, false, false, 0); // Load the actual argument out of the pointer VAList Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, @@ -2496,7 +2699,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), Node->getOperand(0)); else Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); @@ -2948,13 +3151,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, const TargetData &TD = *TLI.getTargetData(); unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - + Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(EntrySize, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); - SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, + SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr, PseudoSourceValue::getJumpTable(), 0, MemVT, false, false, 0); Addr = LD; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e3eb949..650ee5a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -453,8 +453,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { SDValue NewL; if (L->getExtensionType() == ISD::NON_EXTLOAD) { - NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(), - NVT, L->getChain(), L->getBasePtr(), L->getOffset(), + NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), + NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), NVT, L->isVolatile(), L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to @@ -464,8 +464,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { } // Do a non-extending load followed by FP_EXTEND. - NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD, - L->getMemoryVT(), L->getChain(), + NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, + L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), L->getMemoryVT(), L->isVolatile(), @@ -504,7 +504,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue NewVAARG; - NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); + NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -698,9 +699,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { @@ -739,9 +741,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { @@ -757,8 +760,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { @@ -1106,7 +1110,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); - Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); @@ -1294,9 +1298,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { @@ -1375,9 +1379,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { @@ -1393,8 +1397,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8b382bc..b94ea9a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -369,7 +369,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); DebugLoc dl = N->getDebugLoc(); - SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT(), N->isVolatile(), N->isNonTemporal(), N->getAlignment()); @@ -572,7 +572,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SmallVector<SDValue, 8> Parts(NumRegs); for (unsigned i = 0; i < NumRegs; ++i) { - Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2)); + Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); Chain = Parts[i].getValue(1); } @@ -725,8 +726,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always // legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - N->getOperand(1), LHS, RHS, N->getOperand(4)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)), + 0); } SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { @@ -737,8 +739,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); // The chain (Op#0) and basic block destination (Op#2) are always legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond, - N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, + N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { @@ -773,7 +775,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { for (unsigned i = 0; i < NumElts; ++i) NewOps.push_back(GetPromotedInteger(N->getOperand(i))); - return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts); + return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { @@ -798,17 +800,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, assert(N->getOperand(1).getValueType().getSizeInBits() >= N->getValueType(0).getVectorElementType().getSizeInBits() && "Type of inserted value narrower than vector element type!"); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), GetPromotedInteger(N->getOperand(1)), - N->getOperand(2)); + N->getOperand(2)), + 0); } assert(OpNo == 2 && "Different operand and result vector types?"); // Promote the index. SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - N->getOperand(1), Idx); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), Idx), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { @@ -819,15 +822,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { SDValue Flag = GetPromotedInteger(N->getOperand(i)); NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); } - return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps, - array_lengthof(NewOps)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote // the operand in place. - return DAG.UpdateNodeOperands(SDValue(N, 0), - GetPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + GetPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { @@ -837,8 +839,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT); - return DAG.UpdateNodeOperands(SDValue(N, 0), Cond, - N->getOperand(1), N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, Cond, + N->getOperand(1), N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { @@ -849,8 +851,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get()); // The CC (#4) and the possible return values (#2 and #3) have legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2), - N->getOperand(3), N->getOperand(4)); + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { @@ -861,12 +863,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get()); // The CC (#2) is always legal. - return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - ZExtPromotedInteger(N->getOperand(1))); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { @@ -878,8 +880,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), - SExtPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + SExtPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ @@ -905,8 +907,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), - ZExtPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + ZExtPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { @@ -990,6 +992,11 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SHL: case ISD::SRA: case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; + + case ISD::SADDO: + case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; + case ISD::UADDO: + case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1526,7 +1533,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, if (N->getMemoryVT().bitsLE(NVT)) { EVT MemVT = N->getMemoryVT(); - Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); // Remember the chain. @@ -1559,7 +1566,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), + Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, NEVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); @@ -1577,7 +1584,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned ExcessBits = (EBytes - IncrementSize)*8; // Load both the high bits and maybe some of the low bits. - Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), isVolatile, isNonTemporal, Alignment); @@ -1586,7 +1593,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); // Load the rest of the low bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, @@ -1716,6 +1723,48 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + DebugLoc dl = Node->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Compute the overflow. + // + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + EVT OType = Node->getValueType(1); + SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(Node, 1), Cmp); +} + void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -1912,6 +1961,29 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); } +void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Calculate the overflow: addition overflows iff a + b < a, and subtraction + // overflows iff a - b > a. + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, + N->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -2154,9 +2226,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { @@ -2172,9 +2244,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { @@ -2190,8 +2262,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { @@ -2200,7 +2272,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // upper half of the shift amount is zero. Just use the lower half. SDValue Lo, Hi; GetExpandedInteger(N->getOperand(1), Lo, Hi); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { @@ -2209,7 +2281,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { // constant to valid type. SDValue Lo, Hi; GetExpandedInteger(N->getOperand(0), Lo, Hi); - return DAG.UpdateNodeOperands(SDValue(N, 0), Lo); + return SDValue(DAG.UpdateNodeOperands(N, Lo), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { @@ -2384,7 +2456,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // Load the value out, extending it from f32 to the destination float type. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(), FudgePtr, NULL, 0, MVT::f32, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 17f131b..6e56c98 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -485,15 +485,14 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { NewOps.push_back(Op); } else if (Op != OrigOp) { // This is the first operand to change - add all operands so far. - NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i); + NewOps.append(N->op_begin(), N->op_begin() + i); NewOps.push_back(Op); } } // Some operands changed - update the node. if (!NewOps.empty()) { - SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], - NewOps.size()).getNode(); + SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size()); if (M != N) { // The node morphed into a different node. Normally for this to happen // the original node would have to be marked NewNode. However this can @@ -684,40 +683,45 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { // can potentially cause recursive merging. SmallSetVector<SDNode*, 16> NodesToAnalyze; NodeUpdateListener NUL(*this, NodesToAnalyze); - DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); - - // The old node may still be present in a map like ExpandedIntegers or - // PromotedIntegers. Inform maps about the replacement. - ReplacedValues[From] = To; - - // Process the list of nodes that need to be reanalyzed. - while (!NodesToAnalyze.empty()) { - SDNode *N = NodesToAnalyze.back(); - NodesToAnalyze.pop_back(); - if (N->getNodeId() != DAGTypeLegalizer::NewNode) - // The node was analyzed while reanalyzing an earlier node - it is safe to - // skip. Note that this is not a morphing node - otherwise it would still - // be marked NewNode. - continue; + do { + DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); + + // The old node may still be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + ReplacedValues[From] = To; + + // Process the list of nodes that need to be reanalyzed. + while (!NodesToAnalyze.empty()) { + SDNode *N = NodesToAnalyze.back(); + NodesToAnalyze.pop_back(); + if (N->getNodeId() != DAGTypeLegalizer::NewNode) + // The node was analyzed while reanalyzing an earlier node - it is safe + // to skip. Note that this is not a morphing node - otherwise it would + // still be marked NewNode. + continue; - // Analyze the node's operands and recalculate the node ID. - SDNode *M = AnalyzeNewNode(N); - if (M != N) { - // The node morphed into a different node. Make everyone use the new node - // instead. - assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); - assert(N->getNumValues() == M->getNumValues() && - "Node morphing changed the number of results!"); - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - SDValue OldVal(N, i); - SDValue NewVal(M, i); - if (M->getNodeId() == Processed) - RemapValue(NewVal); - DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + // Analyze the node's operands and recalculate the node ID. + SDNode *M = AnalyzeNewNode(N); + if (M != N) { + // The node morphed into a different node. Make everyone use the new + // node instead. + assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + SDValue OldVal(N, i); + SDValue NewVal(M, i); + if (M->getNodeId() == Processed) + RemapValue(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + } + // The original node continues to exist in the DAG, marked NewNode. } - // The original node continues to exist in the DAG, marked NewNode. } - } + // When recursively update nodes with new nodes, it is possible to have + // new uses of From due to CSE. If this happens, replace the new uses of + // From with To. + } while (!From.use_empty()); } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c665963..bd86694 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -345,6 +345,9 @@ private: void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -620,6 +623,7 @@ private: SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 88e1e62..9c2b1d9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -238,13 +238,15 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, } void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Chain = N->getOperand(0); SDValue Ptr = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); + const unsigned Align = N->getConstantOperandVal(3); - Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); - Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2)); + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); // Handle endianness of the load. if (TLI.isBigEndian()) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 0e2bd02..621c087 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -116,7 +116,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Ops.push_back(LegalizeOp(Node->getOperand(i))); SDValue Result = - DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size()); + SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); bool HasVectorValue = false; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7efeea1..93aeff5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -165,9 +165,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); - SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(), + SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getExtensionType(), N->getValueType(0).getVectorElementType(), + N->getDebugLoc(), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), N->getSrcValue(), N->getSrcValueOffset(), @@ -448,6 +449,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -755,14 +761,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); - Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); SVOffset += IncrementSize; - Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset, + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment); // Build a factor node to remember that this load is independent of the @@ -1082,10 +1088,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { uint64_t LoElts = Lo.getValueType().getVectorNumElements(); if (IdxVal < LoElts) - return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx); - return DAG.UpdateNodeOperands(SDValue(N, 0), Hi, + return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); + return SDValue(DAG.UpdateNodeOperands(N, Hi, DAG.getConstant(IdxVal - LoElts, - Idx.getValueType())); + Idx.getValueType())), + 0); } // Store the vector to the stack. @@ -1099,7 +1106,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); - return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr, SV, 0, EltVT, false, false, 0); } @@ -1199,7 +1206,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: - case ISD::FPOWI: case ISD::FREM: case ISD::FSUB: case ISD::MUL: @@ -1215,6 +1221,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Binary(N); break; + case ISD::FPOWI: + Res = WidenVecRes_POWI(N); + break; + case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -1241,6 +1251,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEG: case ISD::FSIN: case ISD::FSQRT: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: Res = WidenVecRes_Unary(N); break; } @@ -1258,7 +1273,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); - while (!TLI.isTypeLegal(VT) && NumElts != 1) { + while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); } @@ -1273,13 +1288,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); } else { // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); SmallVector<SDValue, 16> ConcatOps(CurNumElts); unsigned ConcatEnd = 0; // Current ConcatOps index. - unsigned Idx = 0; // Current Idx into input vectors. + int Idx = 0; // Current Idx into input vectors. + + // NumElts := greatest synthesizable vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, @@ -1290,26 +1312,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { Idx += NumElts; CurNumElts -= NumElts; } - EVT PrevVecVT = VT; do { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); - } while (!TLI.isTypeLegal(VT) && NumElts != 1); + } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); if (NumElts == 1) { - // Since we are using concat vector, build a vector from the scalar ops. - SDValue VecOp = DAG.getUNDEF(PrevVecVT); for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, DAG.getIntPtrConstant(Idx)); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, DAG.getIntPtrConstant(Idx)); - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp, - DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2), - DAG.getIntPtrConstant(i)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, + EOp1, EOp2); } CurNumElts = 0; - ConcatOps[ConcatEnd++] = VecOp; } } @@ -1320,23 +1337,65 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return ConcatOps[0]; } - // Rebuild vector to one with the widen type - Idx = ConcatEnd - 1; - while (Idx != 0) { + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + Idx = ConcatEnd - 1; VT = ConcatOps[Idx--].getValueType(); - while (Idx != 0 && ConcatOps[Idx].getValueType() == VT) - --Idx; - if (Idx != 0) { - VT = ConcatOps[Idx].getValueType(); - ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - &ConcatOps[Idx+1], ConcatEnd - Idx - 1); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeSynthesizable(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, + ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); + } + ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; + } + else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, &SubConcatOps[0], + OpsToConcat); + ConcatEnd = SubConcatIdx + 1; } } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } - unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements(); + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = + WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); if (NumOps != ConcatEnd ) { - SDValue UndefVal = DAG.getUNDEF(VT); + SDValue UndefVal = DAG.getUNDEF(MaxVT); for (unsigned j = ConcatEnd; j < NumOps; ++j) ConcatOps[j] = UndefVal; } @@ -1366,7 +1425,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(Opcode, dl, WidenVT, InOp); } - if (TLI.isTypeLegal(InWidenVT)) { + if (TLI.isTypeSynthesizable(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1410,6 +1469,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); } +SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); +} + SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); @@ -1501,7 +1567,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } - if (TLI.isTypeLegal(NewInVT)) { + if (TLI.isTypeSynthesizable(NewInVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1642,7 +1708,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SatOp, CvtCode); } - if (TLI.isTypeLegal(InWidenVT)) { + if (TLI.isTypeSynthesizable(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1968,7 +2034,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { if (InWidenSize % Size == 0 && !VT.isVector()) { unsigned NewNumElts = InWidenSize / Size; EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); - if (TLI.isTypeLegal(NewVT)) { + if (TLI.isTypeSynthesizable(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, DAG.getIntPtrConstant(0)); @@ -2066,7 +2132,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (MemVT.getSizeInBits() <= WidenEltWidth) break; - if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; @@ -2080,7 +2146,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { EVT MemVT = (MVT::SimpleValueType) VT; unsigned MemVTWidth = MemVT.getSizeInBits(); - if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { @@ -2286,14 +2352,14 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(WidenNumElts); unsigned Increment = LdEltVT.getSizeInBits() / 8; - Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, + Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset, LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Offset)); - Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, + Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV, SVOffset + Offset, LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[i].getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index ad8630a..3b86c32 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -535,7 +535,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { SUnit *LRDef = LiveRegDefs[Reg]; EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); + TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); // If cross copy register class is null, then it must be possible copy diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 820ba66..3ef521c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -320,7 +320,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { CapturePred(&*I); - if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { + if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(LiveRegDefs[I->getReg()] == I->getSUnit() && "Physical register dependency violated?"); @@ -795,7 +795,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { SUnit *LRDef = LiveRegDefs[Reg]; EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); + TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); // If cross copy register class is null, then it must be possible copy @@ -1116,7 +1116,7 @@ namespace { SUnit *pop() { if (empty()) return NULL; std::vector<SUnit *>::iterator Best = Queue.begin(); - for (std::vector<SUnit *>::iterator I = next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; @@ -1275,6 +1275,17 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ return left->getHeight() > right->getHeight(); } else if (RStall) return false; + + // If either node is scheduling for latency, sort them by height and latency + // first. + if (left->SchedulingPref == Sched::Latency || + right->SchedulingPref == Sched::Latency) { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + if (left->Latency != right->Latency) + return left->Latency > right->Latency; + } + return BURRSort(left, right, SPQ); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 3185c88..06cf053 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -59,7 +59,11 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { SUnits.back().OrigNode = &SUnits.back(); SUnit *SU = &SUnits.back(); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - SU->SchedulingPref = TLI.getSchedulingPreference(N); + if (N->isMachineOpcode() && + N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) + SU->SchedulingPref = Sched::None; + else + SU->SchedulingPref = TLI.getSchedulingPreference(N); return SU; } @@ -97,7 +101,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { PhysReg = Reg; const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo)); + TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo)); Cost = RC->getCopyCost(); } } @@ -106,17 +110,42 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, SelectionDAG *DAG) { SmallVector<EVT, 4> VTs; - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) - VTs.push_back(N->getValueType(i)); + SDNode *FlagDestNode = Flag.getNode(); + + // Don't add a flag from a node to itself. + if (FlagDestNode == N) return; + + // Don't add a flag to something which already has a flag. + if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return; + + for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) + VTs.push_back(N->getValueType(I)); + if (AddFlag) VTs.push_back(MVT::Flag); + SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - Ops.push_back(N->getOperand(i)); - if (Flag.getNode()) + for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) + Ops.push_back(N->getOperand(I)); + + if (FlagDestNode) Ops.push_back(Flag); + SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); + MachineSDNode::mmo_iterator Begin = 0, End = 0; + MachineSDNode *MN = dyn_cast<MachineSDNode>(N); + + // Store memory references. + if (MN) { + Begin = MN->memoperands_begin(); + End = MN->memoperands_end(); + } + DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); + + // Reset the memory references + if (MN) + MN->setMemRefs(Begin, End); } /// ClusterNeighboringLoads - Force nearby loads together by "flagging" them. @@ -124,98 +153,98 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, /// offsets are not far apart (target specific), it add MVT::Flag inputs and /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. -void ScheduleDAGSDNodes::ClusterNeighboringLoads() { +void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { + SDNode *Chain = 0; + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) + Chain = Node->getOperand(NumOps-1).getNode(); + if (!Chain) + return; + + // Look for other loads of the same chain. Find loads that are loading from + // the same base pointer and different offsets. SmallPtrSet<SDNode*, 16> Visited; SmallVector<int64_t, 4> Offsets; DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. - for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), - E = DAG->allnodes_end(); NI != E; ++NI) { - SDNode *Node = &*NI; - if (!Node || !Node->isMachineOpcode()) + bool Cluster = false; + SDNode *Base = Node; + for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); + I != E; ++I) { + SDNode *User = *I; + if (User == Node || !Visited.insert(User)) continue; - - unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - if (!TID.mayLoad()) + int64_t Offset1, Offset2; + if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || + Offset1 == Offset2) + // FIXME: Should be ok if they addresses are identical. But earlier + // optimizations really should have eliminated one of the loads. continue; + if (O2SMap.insert(std::make_pair(Offset1, Base)).second) + Offsets.push_back(Offset1); + O2SMap.insert(std::make_pair(Offset2, User)); + Offsets.push_back(Offset2); + if (Offset2 < Offset1) + Base = User; + Cluster = true; + } - SDNode *Chain = 0; - unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) - Chain = Node->getOperand(NumOps-1).getNode(); - if (!Chain) - continue; + if (!Cluster) + return; - // Look for other loads of the same chain. Find loads that are loading from - // the same base pointer and different offsets. - Visited.clear(); - Offsets.clear(); - O2SMap.clear(); - bool Cluster = false; - SDNode *Base = Node; - int64_t BaseOffset; - for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); - I != E; ++I) { - SDNode *User = *I; - if (User == Node || !Visited.insert(User)) - continue; - int64_t Offset1, Offset2; - if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || - Offset1 == Offset2) - // FIXME: Should be ok if they addresses are identical. But earlier - // optimizations really should have eliminated one of the loads. - continue; - if (O2SMap.insert(std::make_pair(Offset1, Base)).second) - Offsets.push_back(Offset1); - O2SMap.insert(std::make_pair(Offset2, User)); - Offsets.push_back(Offset2); - if (Offset2 < Offset1) { - Base = User; - BaseOffset = Offset2; - } else { - BaseOffset = Offset1; - } - Cluster = true; - } + // Sort them in increasing order. + std::sort(Offsets.begin(), Offsets.end()); + + // Check if the loads are close enough. + SmallVector<SDNode*, 4> Loads; + unsigned NumLoads = 0; + int64_t BaseOff = Offsets[0]; + SDNode *BaseLoad = O2SMap[BaseOff]; + Loads.push_back(BaseLoad); + for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { + int64_t Offset = Offsets[i]; + SDNode *Load = O2SMap[Offset]; + if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads)) + break; // Stop right here. Ignore loads that are further away. + Loads.push_back(Load); + ++NumLoads; + } - if (!Cluster) - continue; + if (NumLoads == 0) + return; - // Sort them in increasing order. - std::sort(Offsets.begin(), Offsets.end()); - - // Check if the loads are close enough. - SmallVector<SDNode*, 4> Loads; - unsigned NumLoads = 0; - int64_t BaseOff = Offsets[0]; - SDNode *BaseLoad = O2SMap[BaseOff]; - Loads.push_back(BaseLoad); - for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { - int64_t Offset = Offsets[i]; - SDNode *Load = O2SMap[Offset]; - if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset, - NumLoads)) - break; // Stop right here. Ignore loads that are further away. - Loads.push_back(Load); - ++NumLoads; - } + // Cluster loads by adding MVT::Flag outputs and inputs. This also + // ensure they are scheduled in order of increasing addresses. + SDNode *Lead = Loads[0]; + AddFlags(Lead, SDValue(0, 0), true, DAG); + + SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1); + for (unsigned I = 1, E = Loads.size(); I != E; ++I) { + bool OutFlag = I < E - 1; + SDNode *Load = Loads[I]; + + AddFlags(Load, InFlag, OutFlag, DAG); + + if (OutFlag) + InFlag = SDValue(Load, Load->getNumValues() - 1); + + ++LoadsClustered; + } +} - if (NumLoads == 0) +/// ClusterNodes - Cluster certain nodes which should be scheduled together. +/// +void ScheduleDAGSDNodes::ClusterNodes() { + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + SDNode *Node = &*NI; + if (!Node || !Node->isMachineOpcode()) continue; - // Cluster loads by adding MVT::Flag outputs and inputs. This also - // ensure they are scheduled in order of increasing addresses. - SDNode *Lead = Loads[0]; - AddFlags(Lead, SDValue(0,0), true, DAG); - SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1); - for (unsigned i = 1, e = Loads.size(); i != e; ++i) { - bool OutFlag = i < e-1; - SDNode *Load = Loads[i]; - AddFlags(Load, InFlag, OutFlag, DAG); - if (OutFlag) - InFlag = SDValue(Load, Load->getNumValues()-1); - ++LoadsClustered; - } + unsigned Opc = Node->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + if (TID.mayLoad()) + // Cluster loads from "near" addresses into combined SUnits. + ClusterNeighboringLoads(Node); } } @@ -364,8 +393,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (Cost >= 0) PhysReg = 0; - const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, - OpSU->Latency, PhysReg); + // If this is a ctrl dep, latency is 1. + unsigned OpLatency = isChain ? 1 : OpSU->Latency; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, + OpLatency, PhysReg); if (!isChain && !UnitLatencies) { ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); @@ -382,8 +413,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { - // Cluster loads from "near" addresses into combined SUnits. - ClusterNeighboringLoads(); + // Cluster certain nodes which should be scheduled together. + ClusterNodes(); // Populate the SUnits array. BuildSchedUnits(); // Compute all the scheduling dependencies between nodes. @@ -427,15 +458,18 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); - if (Def->isMachineOpcode() && Use->isMachineOpcode()) { + if (Def->isMachineOpcode()) { const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); if (DefIdx >= II.getNumDefs()) return; int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); if (DefCycle < 0) return; - const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); - int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + int UseCycle = 1; + if (Use->isMachineOpcode()) { + const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); + UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + } if (UseCycle >= 0) { int Latency = DefCycle - UseCycle + 1; if (Latency >= 0) @@ -473,7 +507,7 @@ namespace { } // ProcessSourceNode - Process nodes with source order numbers. These are added -// to a vector which EmitSchedule use to determine how to insert dbg_value +// to a vector which EmitSchedule uses to determine how to insert dbg_value // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, @@ -485,13 +519,13 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, return; MachineBasicBlock *BB = Emitter.getBlock(); - if (BB->empty() || BB->back().isPHI()) { + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { // Did not insert any instruction. Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); return; } - Orders.push_back(std::make_pair(Order, &BB->back())); + Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); if (!N->getHasDebugValue()) return; // Opportunistically insert immediate dbg_value uses, i.e. those with source @@ -530,7 +564,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { for (; PDI != PDE; ++PDI) { MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); if (DbgMI) - BB->insert(BB->end(), DbgMI); + BB->insert(InsertPos, DbgMI); } } @@ -574,9 +608,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // Insert all the dbg_values which have not already been inserted in source // order sequence. if (HasDbg) { - MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin(); - while (BBBegin != BB->end() && BBBegin->isPHI()) - ++BBBegin; + MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); // Sort the source order instructions and use the order to insert debug // values. @@ -586,14 +618,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); // Now emit the rest according to source order. unsigned LastOrder = 0; - MachineInstr *LastMI = 0; for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { unsigned Order = Orders[i].first; MachineInstr *MI = Orders[i].second; // Insert all SDDbgValue's whose order(s) are before "Order". if (!MI) continue; - MachineBasicBlock *MIBB = MI->getParent(); #ifndef NDEBUG unsigned LastDIOrder = 0; #endif @@ -612,13 +642,14 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // Insert to start of the BB (after PHIs). BB->insert(BBBegin, DbgMI); else { + // Insert at the instruction, which may be in a different + // block, if the block was split by a custom inserter. MachineBasicBlock::iterator Pos = MI; - MIBB->insert(llvm::next(Pos), DbgMI); + MI->getParent()->insert(llvm::next(Pos), DbgMI); } } } LastOrder = Order; - LastMI = MI; } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index e8714ba..842fc8c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -108,7 +108,10 @@ namespace llvm { private: /// ClusterNeighboringLoads - Cluster loads from "near" addresses into /// combined SUnits. - void ClusterNeighboringLoads(); + void ClusterNeighboringLoads(SDNode *Node); + /// ClusterNodes - Cluster certain nodes which should be scheduled together. + /// + void ClusterNodes(); /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. void BuildSchedUnits(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 38bf68b..e83a034 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -790,9 +790,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli) +SelectionDAG::SelectionDAG(const TargetMachine &tm) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - FLI(fli), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); @@ -808,7 +807,6 @@ void SelectionDAG::init(MachineFunction &mf) { SelectionDAG::~SelectionDAG() { allnodes_clear(); delete Ordering; - DbgInfo->clear(); delete DbgInfo; } @@ -835,11 +833,8 @@ void SelectionDAG::clear() { EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); - delete Ordering; - Ordering = new SDNodeOrdering(); + Ordering->clear(); DbgInfo->clear(); - delete DbgInfo; - DbgInfo = new SDDbgInfo(); } SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { @@ -980,7 +975,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { } } -SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, EVT VT, int64_t Offset, bool isTargetGA, unsigned char TargetFlags) { @@ -1015,7 +1010,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, GV, VT, + SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -2291,7 +2286,6 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, unsigned i) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); if (N->getMaskElt(i) < 0) return getUNDEF(VT.getVectorElementType()); unsigned Index = N->getMaskElt(i); @@ -2475,9 +2469,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); - if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND) + + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + + // (ext (trunx x)) -> x + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = Operand.getNode()->getOperand(0); + if (OpOp.getValueType() == VT) + return OpOp; + } break; case ISD::TRUNCATE: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2622,7 +2625,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N1.getOpcode() == ISD::BUILD_VECTOR && N2.getOpcode() == ISD::BUILD_VECTOR) { SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); - Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); } break; @@ -3011,7 +3014,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); switch (Opcode) { case ISD::CONCAT_VECTORS: // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to @@ -3020,8 +3022,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, N2.getOpcode() == ISD::BUILD_VECTOR && N3.getOpcode() == ISD::BUILD_VECTOR) { SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); - Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); - Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); } break; @@ -3041,14 +3043,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N2 == N3) return N2; // select C, X, X -> X break; - case ISD::BRCOND: - if (N2C) { - if (N2C->getZExtValue()) // Unconditional branch - return getNode(ISD::BR, DL, MVT::Other, N1, N3); - else - return N1; // Never-taken branch - } - break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); break; @@ -3267,6 +3261,15 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, if (VT.bitsGT(LVT)) VT = LVT; } + + // If we're optimizing for size, and there is a limit, bump the maximum number + // of operations inserted down to 4. This is a wild guess that approximates + // the size of a call to memcpy or memset (3 arguments + call). + if (Limit != ~0U) { + const Function *F = DAG.getMachineFunction().getFunction(); + if (F->hasFnAttr(Attribute::OptimizeForSize)) + Limit = 4; + } unsigned NumMemOps = 0; while (Size != 0) { @@ -3321,9 +3324,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, std::string Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); - uint64_t Limit = -1ULL; - if (!AlwaysInline) - Limit = TLI.getMaxStoresPerMemcpy(); + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(); + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), @@ -3368,7 +3370,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // FIXME does the case above also need this? EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); - Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, + Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), SrcSV, SrcSVOff + SrcOff, VT, isVol, false, MinAlign(SrcAlign, SrcOff)); @@ -3401,9 +3403,6 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector<EVT> MemOps; - uint64_t Limit = -1ULL; - if (!AlwaysInline) - Limit = TLI.getMaxStoresPerMemmove(); bool DstAlignCanChange = false; MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); @@ -3412,6 +3411,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), @@ -3895,8 +3895,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, } SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, - ISD::LoadExtType ExtType, EVT VT, SDValue Chain, +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, bool isNonTemporal, @@ -3919,12 +3919,12 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, MachineMemOperand *MMO = MF.getMachineMemOperand(SV, Flags, SVOffset, MemVT.getStoreSize(), Alignment); - return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO); + return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, - ISD::LoadExtType ExtType, EVT VT, SDValue Chain, +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, EVT MemVT, MachineMemOperand *MMO) { if (VT == MemVT) { @@ -3974,18 +3974,18 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, bool isVolatile, bool isNonTemporal, unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); - return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef, + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); - return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef, + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); } @@ -3995,7 +3995,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); assert(LD->getOffset().getOpcode() == ISD::UNDEF && "Load is already a indexed load!"); - return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(), + return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); @@ -4141,9 +4141,10 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, - SDValue SV) { - SDValue Ops[] = { Chain, Ptr, SV }; - return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3); + SDValue SV, + unsigned Align) { + SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, @@ -4425,17 +4426,16 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { /// already exists. If the resultant node does not exist in the DAG, the /// input node is returned. As a degenerate case, if you specify the same /// input operands as the node already has, the input node is returned. -SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); // Check to see if there is no change. - if (Op == N->getOperand(0)) return InN; + if (Op == N->getOperand(0)) return N; // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4447,22 +4447,20 @@ SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); // Check to see if there is no change. if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) - return InN; // No operands changed, just return the input node. + return N; // No operands changed, just return the input node. // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4477,32 +4475,31 @@ UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) { +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { SDValue Ops[] = { Op1, Op2, Op3 }; return UpdateNodeOperands(N, Ops, 3); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4) { SDValue Ops[] = { Op1, Op2, Op3, Op4 }; return UpdateNodeOperands(N, Ops, 4); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4, SDValue Op5) { SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; return UpdateNodeOperands(N, Ops, 5); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { assert(N->getNumOperands() == NumOps && "Update with wrong number of operands"); @@ -4516,12 +4513,12 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { } // No operands changed, just return the input node. - if (!AnyChange) return InN; + if (!AnyChange) return N; // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4535,7 +4532,7 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } /// DropOperands - Release the operands and set this node to have @@ -5378,9 +5375,10 @@ HandleSDNode::~HandleSDNode() { DropOperands(); } -GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, +GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL, + const GlobalValue *GA, EVT VT, int64_t o, unsigned char TF) - : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) { + : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } @@ -5669,13 +5667,16 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; case ISD::FCOS: return "fcos"; - case ISD::FPOWI: return "fpowi"; - case ISD::FPOW: return "fpow"; case ISD::FTRUNC: return "ftrunc"; case ISD::FFLOOR: return "ffloor"; case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; // Binary operators case ISD::ADD: return "add"; @@ -5706,7 +5707,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; case ISD::VSETCC: return "vsetcc"; case ISD::SELECT: return "select"; @@ -6260,23 +6263,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), FrameOffset); - if (MFI.isFixedObjectIndex(FrameIdx)) { - int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; - - // The alignment of the frame index can be determined from its offset from - // the incoming frame position. If the frame object is at offset 32 and - // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. - unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment(); - unsigned Align = MinAlign(ObjectOffset, StackAlign); - - // Finally, the frame object itself may have a known alignment. Factor - // the alignment + offset into a new alignment. For example, if we know - // the FI is 8 byte aligned, but the pointer is 4 off, we really have a - // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte - // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. - return std::max(Align, FIInfoAlign); - } return FIInfoAlign; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fbe601f..d323c16 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "isel" #include "SDNodeDbgValue.h" #include "SelectionDAGBuilder.h" -#include "FunctionLoweringInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -32,6 +31,7 @@ #include "llvm/Module.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFunction.h" @@ -70,113 +70,6 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); -namespace { - /// RegsForValue - This struct represents the registers (physical or virtual) - /// that a particular set of values is assigned, and the type information - /// about the value. The most common situation is to represent one value at a - /// time, but struct or array values are handled element-wise as multiple - /// values. The splitting of aggregates is performed recursively, so that we - /// never have aggregate-typed registers. The values at this point do not - /// necessarily have legal types, so each value may require one or more - /// registers of some legal type. - /// - struct RegsForValue { - /// TLI - The TargetLowering object. - /// - const TargetLowering *TLI; - - /// ValueVTs - The value types of the values, which may not be legal, and - /// may need be promoted or synthesized from one or more registers. - /// - SmallVector<EVT, 4> ValueVTs; - - /// RegVTs - The value types of the registers. This is the same size as - /// ValueVTs and it records, for each value, what the type of the assigned - /// register or registers are. (Individual values are never synthesized - /// from more than one type of register.) - /// - /// With virtual registers, the contents of RegVTs is redundant with TLI's - /// getRegisterType member function, however when with physical registers - /// it is necessary to have a separate record of the types. - /// - SmallVector<EVT, 4> RegVTs; - - /// Regs - This list holds the registers assigned to the values. - /// Each legal or promoted value requires one register, and each - /// expanded value requires multiple registers. - /// - SmallVector<unsigned, 4> Regs; - - RegsForValue() : TLI(0) {} - - RegsForValue(const TargetLowering &tli, - const SmallVector<unsigned, 4> ®s, - EVT regvt, EVT valuevt) - : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - RegsForValue(const TargetLowering &tli, - const SmallVector<unsigned, 4> ®s, - const SmallVector<EVT, 4> ®vts, - const SmallVector<EVT, 4> &valuevts) - : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} - RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, const Type *Ty) : TLI(&tli) { - ComputeValueVTs(tli, Ty, ValueVTs); - - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT); - EVT RegisterVT = TLI->getRegisterType(Context, ValueVT); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(Reg + i); - RegVTs.push_back(RegisterVT); - Reg += NumRegs; - } - } - - /// areValueTypesLegal - Return true if types of all the values are legal. - bool areValueTypesLegal() { - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT RegisterVT = RegVTs[Value]; - if (!TLI->isTypeLegal(RegisterVT)) - return false; - } - return true; - } - - - /// append - Add the specified values to this one. - void append(const RegsForValue &RHS) { - TLI = RHS.TLI; - ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); - RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); - Regs.append(RHS.Regs.begin(), RHS.Regs.end()); - } - - - /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from - /// this value and returns the result as a ValueVTs value. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; - - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the - /// specified value into the registers specified by this object. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; - - /// AddInlineAsmOperands - Add this value to the specified inlineasm node - /// operand list. This adds the code marker, matching input operand index - /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector<SDValue> &Ops) const; - }; -} - /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra @@ -528,6 +421,268 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, } } +namespace { + /// RegsForValue - This struct represents the registers (physical or virtual) + /// that a particular set of values is assigned, and the type information + /// about the value. The most common situation is to represent one value at a + /// time, but struct or array values are handled element-wise as multiple + /// values. The splitting of aggregates is performed recursively, so that we + /// never have aggregate-typed registers. The values at this point do not + /// necessarily have legal types, so each value may require one or more + /// registers of some legal type. + /// + struct RegsForValue { + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector<EVT, 4> ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector<EVT, 4> RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector<unsigned, 4> Regs; + + RegsForValue() {} + + RegsForValue(const SmallVector<unsigned, 4> ®s, + EVT regvt, EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + + RegsForValue(const SmallVector<unsigned, 4> ®s, + const SmallVector<EVT, 4> ®vts, + const SmallVector<EVT, 4> &valuevts) + : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} + + RegsForValue(LLVMContext &Context, const TargetLowering &tli, + unsigned Reg, const Type *Ty) { + ComputeValueVTs(tli, Ty, ValueVTs); + + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); + EVT RegisterVT = tli.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } + } + + /// areValueTypesLegal - Return true if types of all the values are legal. + bool areValueTypesLegal(const TargetLowering &TLI) { + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT RegisterVT = RegVTs[Value]; + if (!TLI.isTypeLegal(RegisterVT)) + return false; + } + return true; + } + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the + /// specified value into the registers specified by this object. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Kind, + bool HasMatching, unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const; + }; +} + +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, + FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Assemble the legal parts into the final values. + SmallVector<SDValue, 4> Values(ValueVTs.size()); + SmallVector<SDValue, 8> Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; + + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (Flag == 0) { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + } else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } + + Chain = P.getValue(1); + + // If the source register was virtual and if we know something about it, + // add an assert node. + if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && + RegisterVT.isInteger() && !RegisterVT.isVector()) { + unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; + if (FuncInfo.LiveOutRegInfo.size() > SlotNo) { + const FunctionLoweringInfo::LiveOutInfo &LOI = + FuncInfo.LiveOutRegInfo[SlotNo]; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI.NumSignBits; + unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + EVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + + if (FromVT != MVT::Other) + P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + } + } + + Parts[i] = P; + } + + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), + NumRegs, RegisterVT, ValueVT); + Part += NumRegs; + Parts.clear(); + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Values[0], ValueVTs.size()); +} + +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; + + getCopyToParts(DAG, dl, + Val.getValue(Val.getResNo() + Value), + &Parts[Part], NumParts, RegisterVT); + Part += NumParts; + } + + // Copy the parts into the registers. + SmallVector<SDValue, 8> Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (Flag == 0) { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); + } else { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); + } + + Chains[i] = Part.getValue(0); + } + + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); +} + +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); + if (HasMatching) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); + SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); + Ops.push_back(Res); + + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + EVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + } + } +} void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { AA = &aa; @@ -543,6 +698,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { /// consumed. void SelectionDAGBuilder::clear() { NodeMap.clear(); + UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); CurDebugLoc = DebugLoc(); @@ -649,27 +805,63 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { + // If we already have an SDValue for this value, use it. It's important + // to do this first, so that we don't create a CopyFromReg if we already + // have a regular SDValue. + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + // If there's a virtual register allocated and initialized for this + // value, use it. + DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + } + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + return Val; +} + +/// getNonRegisterValue - Return an SDValue for the given Value, but +/// don't look in FuncInfo.ValueMap for a virtual register. +SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { + // If we already have an SDValue for this value, use it. SDValue &N = NodeMap[V]; if (N.getNode()) return N; + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + return Val; +} + +/// getValueImpl - Helper function for getValue and getMaterializedValue. +/// Create an SDValue for the given value. +SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const Constant *C = dyn_cast<Constant>(V)) { EVT VT = TLI.getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) - return N = DAG.getConstant(*CI, VT); + return DAG.getConstant(*CI, VT); if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) - return N = DAG.getGlobalAddress(GV, VT); + return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT); if (isa<ConstantPointerNull>(C)) - return N = DAG.getConstant(0, TLI.getPointerTy()); + return DAG.getConstant(0, TLI.getPointerTy()); if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) - return N = DAG.getConstantFP(*CFP, VT); + return DAG.getConstantFP(*CFP, VT); if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) - return N = DAG.getUNDEF(VT); + return DAG.getUNDEF(VT); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { visit(CE->getOpcode(), *CE); @@ -757,82 +949,25 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); } - unsigned InReg = FuncInfo.ValueMap[V]; - assert(InReg && "Value not in map!"); - - RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); - SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); -} - -/// Get the EVTs and ArgFlags collections that represent the legalized return -/// type of the given function. This does not require a DAG or a return value, -/// and is suitable for use before any DAGs for the function are constructed. -static void getReturnInfo(const Type* ReturnType, - Attributes attr, SmallVectorImpl<EVT> &OutVTs, - SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags, - const TargetLowering &TLI, - SmallVectorImpl<uint64_t> *Offsets = 0) { - SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, ReturnType, ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) return; - unsigned Offset = 0; - - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - - if (attr & Attribute::SExt) - ExtendKind = ISD::SIGN_EXTEND; - else if (attr & Attribute::ZExt) - ExtendKind = ISD::ZERO_EXTEND; - - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } - - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); - unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( - PartVT.getTypeForEVT(ReturnType->getContext())); - - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr & Attribute::InReg) - Flags.setInReg(); - - // Propagate extension type if any - if (attr & Attribute::SExt) - Flags.setSExt(); - else if (attr & Attribute::ZExt) - Flags.setZExt(); - - for (unsigned i = 0; i < NumParts; ++i) { - OutVTs.push_back(PartVT); - OutFlags.push_back(Flags); - if (Offsets) - { - Offsets->push_back(Offset); - Offset += PartSize; - } - } + // If this is an instruction which fast-isel has deferred, select it now. + if (const Instruction *Inst = dyn_cast<Instruction>(V)) { + unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); } + + llvm_unreachable("Can't get register for value!"); + return SDValue(); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); + SmallVector<SDValue, 8> OutVals; - if (!FLI.CanLowerReturn) { - unsigned DemoteReg = FLI.DemoteRegister; + if (!FuncInfo.CanLowerReturn) { + unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); // Emit a store of the return value through the virtual register. @@ -908,8 +1043,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { else if (F->paramHasAttr(0, Attribute::ZExt)) Flags.setZExt(); - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), + /*isfixed=*/true)); + OutVals.push_back(Parts[i]); + } } } } @@ -918,7 +1056,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, - Outs, getCurDebugLoc(), DAG); + Outs, OutVals, getCurDebugLoc(), DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -1119,7 +1257,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ } void SelectionDAGBuilder::visitBr(const BranchInst &I) { - MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *BrMBB = FuncInfo.MBB; // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; @@ -1269,18 +1407,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); - // If the branch was constant folded, fix up the CFG. - if (BrCond.getOpcode() == ISD::BR) { - SwitchBB->removeSuccessor(CB.FalseBB); - } else { - // Otherwise, go ahead and insert the false branch. - if (BrCond == getControlRoot()) - SwitchBB->removeSuccessor(CB.TrueBB); - - if (CB.FalseBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, - DAG.getBasicBlock(CB.FalseBB)); - } + // Insert the false branch. + if (CB.FalseBB != NextBlock) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); DAG.setRoot(BrCond); } @@ -1319,7 +1449,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // therefore require extension or truncating. SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); - unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); + unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1370,7 +1500,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); - B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); + B.Reg = FuncInfo.CreateReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), B.Reg, ShiftOp); @@ -1402,29 +1532,41 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { - // Make desired shift SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, TLI.getPointerTy()); - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), - TLI.getPointerTy(), - DAG.getConstant(1, TLI.getPointerTy()), - ShiftOp); - - // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), - TLI.getPointerTy(), SwitchVal, - DAG.getConstant(B.Mask, TLI.getPointerTy())); - SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(AndOp.getValueType()), - AndOp, DAG.getConstant(0, TLI.getPointerTy()), - ISD::SETNE); + SDValue Cmp; + if (CountPopulation_64(B.Mask) == 1) { + // Testing for a single bit; just compare the shift count with what it + // would need to be to shift a 1 bit in that position. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(ShiftOp.getValueType()), + ShiftOp, + DAG.getConstant(CountTrailingZeros_64(B.Mask), + TLI.getPointerTy()), + ISD::SETEQ); + } else { + // Make desired shift + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(1, TLI.getPointerTy()), + ShiftOp); + + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + TLI.getPointerTy(), SwitchVal, + DAG.getConstant(B.Mask, TLI.getPointerTy())); + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(AndOp.getValueType()), + AndOp, DAG.getConstant(0, TLI.getPointerTy()), + ISD::SETNE); + } SwitchBB->addSuccessor(B.TargetBB); SwitchBB->addSuccessor(NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), - AndCmp, DAG.getBasicBlock(B.TargetBB)); + Cmp, DAG.getBasicBlock(B.TargetBB)); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1441,7 +1583,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, } void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { - MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *InvokeMBB = FuncInfo.MBB; // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; @@ -1969,7 +2111,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, } void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { - MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()]; + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; @@ -2035,7 +2177,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { } void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { - MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. SmallVector<BasicBlock*, 32> succs; @@ -2245,7 +2387,6 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } @@ -2254,7 +2395,6 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } @@ -2579,7 +2719,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { - if (CI->getZExtValue() == 0) continue; + if (CI->isZero()) continue; uint64_t Offs = TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; @@ -2643,12 +2783,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue AllocSize = getValue(I.getArraySize()); - AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(), - AllocSize, - DAG.getConstant(TySize, AllocSize.getValueType())); - EVT IntPtr = TLI.getPointerTy(); - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + if (AllocSize.getValueType() != IntPtr) + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + + AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, + AllocSize, + DAG.getConstant(TySize, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -2804,8 +2945,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. - for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { - SDValue Op = getValue(I.getOperand(i)); + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + SDValue Op = getValue(I.getArgOperand(i)); assert(TLI.isTypeLegal(Op.getValueType()) && "Intrinsic uses a non-legal type?"); Ops.push_back(Op); @@ -2910,11 +3051,11 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, SDValue Root = getRoot(); SDValue L = DAG.getAtomic(Op, getCurDebugLoc(), - getValue(I.getOperand(2)).getValueType().getSimpleVT(), + getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), Root, - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - I.getOperand(1)); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + I.getArgOperand(0)); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -2923,8 +3064,8 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, // implVisitAluOverflow - Lower arithmetic overflow instrinsics. const char * SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); @@ -2938,9 +3079,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); // Put the exponent in the right bit position for later addition to the // final result: @@ -3050,8 +3191,8 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FEXP, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3064,9 +3205,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. @@ -3160,8 +3301,8 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3174,9 +3315,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Get the exponent. @@ -3269,8 +3410,8 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG2, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3283,9 +3424,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. @@ -3371,8 +3512,8 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG10, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3385,9 +3526,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); @@ -3485,8 +3626,8 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FEXP2, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3497,12 +3638,12 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { void SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue result; - const Value *Val = I.getOperand(1); + const Value *Val = I.getArgOperand(0); DebugLoc dl = getCurDebugLoc(); bool IsExp10 = false; if (getValue(Val).getValueType() == MVT::f32 && - getValue(I.getOperand(2)).getValueType() == MVT::f32 && + getValue(I.getArgOperand(1)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) { if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { @@ -3513,7 +3654,7 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { } if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(2)); + SDValue Op = getValue(I.getArgOperand(1)); // Put the exponent in the right bit position for later addition to the // final result: @@ -3618,9 +3759,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FPOW, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1))); } setValue(&I, result); @@ -3696,7 +3837,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, if (DV.isInlinedFnArgument(MF.getFunction())) return false; - MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()]; + MachineBasicBlock *MBB = FuncInfo.MBB; if (MBB != &MF.front()) return false; @@ -3750,11 +3891,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vacopy: visitVACopy(I); return 0; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: return "_setjmp"+!TLI.usesUnderscoreSetJmp(); @@ -3763,63 +3904,63 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace() + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && - cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace() + cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); - bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, - I.getOperand(1), 0, I.getOperand(2), 0)); + I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } case Intrinsic::memset: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace() + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); - bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getOperand(1), 0)); + I.getArgOperand(0), 0)); return 0; } case Intrinsic::memmove: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace() + assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && - cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace() + cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); - bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); // If the source and destination are known to not be aliases, we can // lower memmove as memcpy. uint64_t Size = -1ULL; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3)) Size = C->getZExtValue(); - if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) == + if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) == AliasAnalysis::NoAlias) { DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - false, I.getOperand(1), 0, I.getOperand(2), 0)); + false, I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getOperand(1), 0, I.getOperand(2), 0)); + I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } case Intrinsic::dbg_declare: { @@ -3908,7 +4049,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { bool createUndef = false; // FIXME : Why not use getValue() directly ? - SDValue &N = NodeMap[V]; + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) + // Check unused arguments map. + N = UnusedArgNodeMap[V]; if (N.getNode()) { if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { SDV = DAG.getDbgValue(Variable, N.getNode(), @@ -3956,7 +4100,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_exception: { // Insert the EXCEPTIONADDR instruction. - assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() && + assert(FuncInfo.MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[1]; @@ -3968,7 +4112,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_selector: { - MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *CallMBB = FuncInfo.MBB; MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); if (CallMBB->isLandingPad()) AddCatchInfo(I, &MMI, CallMBB); @@ -3978,13 +4122,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg); + if (Reg) FuncInfo.MBB->addLiveIn(Reg); } // Insert the EHSELECTION instruction. SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[2]; - Ops[0] = getValue(I.getOperand(1)); + Ops[0] = getValue(I.getArgOperand(0)); Ops[1] = getRoot(); SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); DAG.setRoot(Op.getValue(1)); @@ -3994,7 +4138,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. - GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); + GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, MVT::i32); setValue(&I, Res); @@ -4007,15 +4151,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, MVT::Other, getControlRoot(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2)))); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); return 0; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); return 0; case Intrinsic::eh_dwarf_cfa: { - EVT VT = getValue(I.getOperand(1)).getValueType(); - SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl, + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl, TLI.getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), @@ -4031,7 +4174,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); @@ -4040,13 +4183,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_setjmp: { setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, getRoot(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; } @@ -4072,34 +4215,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } EVT DestVT = TLI.getValueType(I.getType()); - const Value *Op1 = I.getOperand(1); + const Value *Op1 = I.getArgOperand(0); Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), - getValue(I.getOperand(2)), - getValue(I.getOperand(3)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Code); setValue(&I, Res); return 0; } case Intrinsic::sqrt: setValue(&I, DAG.getNode(ISD::FSQRT, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::powi: - setValue(&I, ExpandPowI(dl, getValue(I.getOperand(1)), - getValue(I.getOperand(2)), DAG)); + setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG)); return 0; case Intrinsic::sin: setValue(&I, DAG.getNode(ISD::FSIN, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::cos: setValue(&I, DAG.getNode(ISD::FCOS, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::log: visitLog(I); @@ -4121,14 +4264,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, - MVT::i16, getValue(I.getOperand(1)))); + MVT::i16, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::convert_from_fp16: setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl, - MVT::f32, getValue(I.getOperand(1)))); + MVT::f32, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::pcmarker: { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); return 0; } @@ -4143,23 +4286,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::cttz: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg)); return 0; @@ -4173,7 +4316,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::stackrestore: { - Res = getValue(I.getOperand(1)); + Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res)); return 0; } @@ -4183,8 +4326,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachineFrameInfo *MFI = MF.getFrameInfo(); EVT PtrTy = TLI.getPointerTy(); - SDValue Src = getValue(I.getOperand(1)); // The guard's value. - AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); + SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. + AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; MFI->setStackProtectorIndex(FI); @@ -4201,14 +4344,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. - ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2)); + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); assert(CI && "Non-constant type in __builtin_object_size?"); - SDValue Arg = getValue(I.getOperand(0)); + SDValue Arg = getValue(I.getCalledValue()); EVT Ty = Arg.getValueType(); - if (CI->getZExtValue() == 0) + if (CI->isZero()) Res = DAG.getConstant(-1ULL, Ty); else Res = DAG.getConstant(0, Ty); @@ -4221,14 +4364,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::init_trampoline: { - const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts()); + const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); SDValue Ops[6]; Ops[0] = getRoot(); - Ops[1] = getValue(I.getOperand(1)); - Ops[2] = getValue(I.getOperand(2)); - Ops[3] = getValue(I.getOperand(3)); - Ops[4] = DAG.getSrcValue(I.getOperand(1)); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); Res = DAG.getNode(ISD::TRAMPOLINE, dl, @@ -4241,8 +4384,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::gcroot: if (GFI) { - const Value *Alloca = I.getOperand(1); - const Constant *TypeMap = cast<Constant>(I.getOperand(2)); + const Value *Alloca = I.getArgOperand(0); + const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); @@ -4274,9 +4417,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::prefetch: { SDValue Ops[4]; Ops[0] = getRoot(); - Ops[1] = getValue(I.getOperand(1)); - Ops[2] = getValue(I.getOperand(2)); - Ops[3] = getValue(I.getOperand(3)); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); return 0; } @@ -4285,7 +4428,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[6]; Ops[0] = getRoot(); for (int x = 1; x < 6; ++x) - Ops[x] = getValue(I.getOperand(x)); + Ops[x] = getValue(I.getArgOperand(x - 1)); DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6)); return 0; @@ -4294,12 +4437,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Root = getRoot(); SDValue L = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(), - getValue(I.getOperand(2)).getValueType().getSimpleVT(), + getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), Root, - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - getValue(I.getOperand(3)), - I.getOperand(1)); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), + I.getArgOperand(0)); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -4353,14 +4496,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Args.reserve(CS.arg_size()); // Check whether the function can return without sret-demotion. - SmallVector<EVT, 4> OutVTs; - SmallVector<ISD::ArgFlagsTy, 4> OutsFlags; + SmallVector<ISD::OutputArg, 4> Outs; SmallVector<uint64_t, 4> Offsets; - getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), - OutVTs, OutsFlags, TLI, &Offsets); + GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), + Outs, TLI, &Offsets); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - FTy->isVarArg(), OutVTs, OutsFlags, DAG); + FTy->isVarArg(), Outs, FTy->getContext()); SDValue DemoteStackSlot; @@ -4453,7 +4595,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, ComputeValueVTs(TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; - unsigned NumValues = OutVTs.size(); + unsigned NumValues = Outs.size(); SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(NumValues); @@ -4461,7 +4603,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, + SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, Add, NULL, Offsets[i], false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); @@ -4580,16 +4722,16 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, /// lowered like a normal call. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) - if (I.getNumOperands() != 4) + if (I.getNumArgOperands() != 3) return false; - const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); + const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || - !I.getOperand(3)->getType()->isIntegerTy() || + !I.getArgOperand(2)->getType()->isIntegerTy() || !I.getType()->isIntegerTy()) return false; - const ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3)); + const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2)); // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 @@ -4656,11 +4798,16 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { void SelectionDAGBuilder::visitCall(const CallInst &I) { + // Handle inline assembly differently. + if (isa<InlineAsm>(I.getCalledValue())) { + visitInlineAsm(&I); + return; + } + const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { - const TargetIntrinsicInfo *II = TM.getIntrinsicInfo(); - if (II) { + if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { if (unsigned IID = II->getIntrinsicID(F)) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) @@ -4679,51 +4826,51 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (!F->hasLocalLinkage() && F->hasName()) { StringRef Name = F->getName(); if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") { - if (I.getNumOperands() == 3 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && - I.getType() == I.getOperand(2)->getType()) { - SDValue LHS = getValue(I.getOperand(1)); - SDValue RHS = getValue(I.getOperand(2)); + if (I.getNumArgOperands() == 2 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.getType() == I.getArgOperand(1)->getType()) { + SDValue LHS = getValue(I.getArgOperand(0)); + SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), LHS.getValueType(), LHS, RHS)); return; } } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType()) { - SDValue Tmp = getValue(I.getOperand(1)); + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; @@ -4733,14 +4880,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } } - } else if (isa<InlineAsm>(I.getOperand(0))) { - visitInlineAsm(&I); - return; } - + SDValue Callee; if (!RenameFn) - Callee = getValue(I.getOperand(0)); + Callee = getValue(I.getCalledValue()); else Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); @@ -4749,210 +4893,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LowerCallTo(&I, Callee, I.isTailCall()); } -/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from -/// this value and returns the result as a ValueVT value. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. -SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { - // Assemble the legal parts into the final values. - SmallVector<SDValue, 4> Values(ValueVTs.size()); - SmallVector<SDValue, 8> Parts; - for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - // Copy the legal parts from the registers. - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; - - Parts.resize(NumRegs); - for (unsigned i = 0; i != NumRegs; ++i) { - SDValue P; - if (Flag == 0) { - P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); - } else { - P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); - *Flag = P.getValue(2); - } - - Chain = P.getValue(1); - - // If the source register was virtual and if we know something about it, - // add an assert node. - if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && - RegisterVT.isInteger() && !RegisterVT.isVector()) { - unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); - if (FLI.LiveOutRegInfo.size() > SlotNo) { - FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo]; - - unsigned RegSize = RegisterVT.getSizeInBits(); - unsigned NumSignBits = LOI.NumSignBits; - unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); - - // FIXME: We capture more information than the dag can represent. For - // now, just use the tightest assertzext/assertsext possible. - bool isSExt = true; - EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) - isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 - else if (NumZeroBits >= RegSize-1) - isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 - else if (NumSignBits > RegSize-8) - isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 - else if (NumZeroBits >= RegSize-8) - isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 - else if (NumSignBits > RegSize-16) - isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 - else if (NumZeroBits >= RegSize-16) - isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 - else if (NumSignBits > RegSize-32) - isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 - else if (NumZeroBits >= RegSize-32) - isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - - if (FromVT != MVT::Other) - P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, - RegisterVT, P, DAG.getValueType(FromVT)); - } - } - - Parts[i] = P; - } - - Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), - NumRegs, RegisterVT, ValueVT); - Part += NumRegs; - Parts.clear(); - } - - return DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Values[0], ValueVTs.size()); -} - -/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the -/// specified value into the registers specified by this object. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. -void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { - // Get the list of the values's legal parts. - unsigned NumRegs = Regs.size(); - SmallVector<SDValue, 8> Parts(NumRegs); - for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; - - getCopyToParts(DAG, dl, - Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT); - Part += NumParts; - } - - // Copy the parts into the registers. - SmallVector<SDValue, 8> Chains(NumRegs); - for (unsigned i = 0; i != NumRegs; ++i) { - SDValue Part; - if (Flag == 0) { - Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); - } else { - Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); - *Flag = Part.getValue(1); - } - - Chains[i] = Part.getValue(0); - } - - if (NumRegs == 1 || Flag) - // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is - // flagged to it. That is the CopyToReg nodes and the user are considered - // a single scheduling unit. If we create a TokenFactor and return it as - // chain, then the TokenFactor is both a predecessor (operand) of the - // user as well as a successor (the TF operands are flagged to the user). - // c1, f1 = CopyToReg - // c2, f2 = CopyToReg - // c3 = TokenFactor c1, c2 - // ... - // = op c3, ..., f2 - Chain = Chains[NumRegs-1]; - else - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); -} - -/// AddInlineAsmOperands - Add this value to the specified inlineasm node -/// operand list. This adds the code marker and includes the number of -/// values added into it. -void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector<SDValue> &Ops) const { - unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); - if (HasMatching) - Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); - SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); - Ops.push_back(Res); - - for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { - unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]); - EVT RegisterVT = RegVTs[Value]; - for (unsigned i = 0; i != NumRegs; ++i) { - assert(Reg < Regs.size() && "Mismatch in # registers expected"); - Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); - } - } -} - -/// isAllocatableRegister - If the specified register is safe to allocate, -/// i.e. it isn't a stack pointer or some other special register, return the -/// register class for the register. Otherwise, return null. -static const TargetRegisterClass * -isAllocatableRegister(unsigned Reg, MachineFunction &MF, - const TargetLowering &TLI, - const TargetRegisterInfo *TRI) { - EVT FoundVT = MVT::Other; - const TargetRegisterClass *FoundRC = 0; - for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), - E = TRI->regclass_end(); RCI != E; ++RCI) { - EVT ThisVT = MVT::Other; - - const TargetRegisterClass *RC = *RCI; - // If none of the value types for this register class are valid, we - // can't use it. For example, 64-bit reg classes on 32-bit targets. - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (TLI.isTypeLegal(*I)) { - // If we have already found this register in a different register class, - // choose the one with the largest VT specified. For example, on - // PowerPC, we favor f64 register classes over f32. - if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { - ThisVT = *I; - break; - } - } - } - - if (ThisVT == MVT::Other) continue; - - // NOTE: This isn't ideal. In particular, this might allocate the - // frame pointer in functions that need it (due to them not being taken - // out of allocation, because a variable sized allocation hasn't been seen - // yet). This is a slight code pessimization, but should still work. - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - if (*I == Reg) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; - } - } - return FoundRC; -} - - namespace llvm { + /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : @@ -5041,8 +4983,56 @@ private: Regs.insert(*Aliases); } }; + } // end llvm namespace. +/// isAllocatableRegister - If the specified register is safe to allocate, +/// i.e. it isn't a stack pointer or some other special register, return the +/// register class for the register. Otherwise, return null. +static const TargetRegisterClass * +isAllocatableRegister(unsigned Reg, MachineFunction &MF, + const TargetLowering &TLI, + const TargetRegisterInfo *TRI) { + EVT FoundVT = MVT::Other; + const TargetRegisterClass *FoundRC = 0; + for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), + E = TRI->regclass_end(); RCI != E; ++RCI) { + EVT ThisVT = MVT::Other; + + const TargetRegisterClass *RC = *RCI; + // If none of the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (TLI.isTypeLegal(*I)) { + // If we have already found this register in a different register class, + // choose the one with the largest VT specified. For example, on + // PowerPC, we favor f64 register classes over f32. + if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { + ThisVT = *I; + break; + } + } + } + + if (ThisVT == MVT::Other) continue; + + // NOTE: This isn't ideal. In particular, this might allocate the + // frame pointer in functions that need it (due to them not being taken + // out of allocation, because a variable sized allocation hasn't been seen + // yet). This is a slight code pessimization, but should still work. + for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), + E = RC->allocation_order_end(MF); I != E; ++I) + if (*I == Reg) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } + } + return FoundRC; +} /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the @@ -5154,7 +5144,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, } } - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; @@ -5172,7 +5162,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, for (; NumRegs; --NumRegs) Regs.push_back(RegInfo.createVirtualRegister(RC)); - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); return; } @@ -5215,7 +5205,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, for (unsigned i = RegStart; i != RegEnd; ++i) Regs.push_back(RegClassRegs[i]); - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(), + OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), OpInfo.ConstraintVT); OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; @@ -5332,7 +5322,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG); + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); // If this is a memory input, and if the operand is not indirect, do what we // need to to provide an address for the memory input. @@ -5406,6 +5396,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); + // Remember the AlignStack bit as operand 3. + AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0, + MVT::i1)); + // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. RegsForValue RetValRegs; @@ -5497,7 +5491,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } RegsForValue MatchedRegs; - MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); MatchedRegs.RegVTs.push_back(RegVT); @@ -5535,7 +5528,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::vector<SDValue> Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], - hasMemory, Ops, DAG); + Ops, DAG); if (Ops.empty()) report_fatal_error("Invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -5570,7 +5563,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty() || - !OpInfo.AssignedRegs.areValueTypesLegal()) + !OpInfo.AssignedRegs.areValueTypesLegal(TLI)) report_fatal_error("Couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -5595,7 +5588,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Finish up input operands. Set the input chain and add the flag last. - AsmNodeOperands[0] = Chain; + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), @@ -5606,7 +5599,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this asm returns a register value, copy the result from that register // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { - SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, &Flag); // FIXME: Why don't we do this for inline asms with MRVs? @@ -5646,7 +5639,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; - SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, &Flag); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -5672,14 +5665,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { + const TargetData &TD = *TLI.getTargetData(); SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), - DAG.getSrcValue(I.getOperand(0))); + DAG.getSrcValue(I.getOperand(0)), + TD.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } @@ -5687,17 +5682,17 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - DAG.getSrcValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(2)))); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getSrcValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(1)))); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -5715,6 +5710,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, DebugLoc dl) const { // Handle all of the outgoing arguments. SmallVector<ISD::OutputArg, 32> Outs; + SmallVector<SDValue, 32> OutVals; for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); @@ -5768,13 +5764,15 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs); + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), + i < NumFixedArgs); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) MyFlags.Flags.setOrigAlign(1); Outs.push_back(MyFlags); + OutVals.push_back(Parts[j]); } } } @@ -5803,7 +5801,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, SmallVector<SDValue, 4> InVals; Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); // Verify that the target's LowerCall behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -5876,7 +5874,7 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { - SDValue Op = getValue(V); + SDValue Op = getNonRegisterValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); @@ -5894,21 +5892,16 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. const Function &F = *LLVMBB->getParent(); SelectionDAG &DAG = SDB->DAG; - SDValue OldRoot = DAG.getRoot(); DebugLoc dl = SDB->getCurDebugLoc(); const TargetData *TD = TLI.getTargetData(); SmallVector<ISD::InputArg, 16> Ins; // Check whether the function can return without sret-demotion. - SmallVector<EVT, 4> OutVTs; - SmallVector<ISD::ArgFlagsTy, 4> OutsFlags; - getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - OutVTs, OutsFlags, TLI); - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); - - FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(), - OutVTs, OutsFlags, DAG); - if (!FLI.CanLowerReturn) { + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + Outs, TLI); + + if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); @@ -6002,7 +5995,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // Set up the argument values. unsigned i = 0; Idx = 1; - if (!FLI.CanLowerReturn) { + if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector<EVT, 1> ValueVTs; @@ -6016,7 +6009,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); - FLI.DemoteRegister = SRetReg; + FuncInfo->DemoteRegister = SRetReg; NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); @@ -6032,6 +6025,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); + + // If this argument is unused then remember its value. It is used to generate + // debugging information. + if (I->use_empty() && NumValues) + SDB->setUnusedArgValue(I, InVals[i]); + for (unsigned Value = 0; Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); @@ -6112,17 +6111,20 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (const Constant *C = dyn_cast<Constant>(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { - RegOut = FuncInfo.CreateRegForValue(C); + RegOut = FuncInfo.CreateRegs(C->getType()); CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; } else { - Reg = FuncInfo.ValueMap[PHIOp]; - if (Reg == 0) { + DenseMap<const Value *, unsigned>::iterator I = + FuncInfo.ValueMap.find(PHIOp); + if (I != FuncInfo.ValueMap.end()) + Reg = I->second; + else { assert(isa<AllocaInst>(PHIOp) && FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && "Didn't codegen value into a register!??"); - Reg = FuncInfo.CreateRegForValue(PHIOp); + Reg = FuncInfo.CreateRegs(PHIOp->getType()); CopyValueToVirtualRegister(PHIOp, Reg); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 3fcd4b9..46733d6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -88,6 +88,10 @@ class SelectionDAGBuilder { DebugLoc CurDebugLoc; DenseMap<const Value*, SDValue> NodeMap; + + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used + /// to preserve debug information for incoming arguments. + DenseMap<const Value*, SDValue> UnusedArgNodeMap; public: /// PendingLoads - Loads are not emitted to the program immediately. We bunch @@ -342,6 +346,8 @@ public: void visit(unsigned Opcode, const User &I); SDValue getValue(const Value *V); + SDValue getNonRegisterValue(const Value *V); + SDValue getValueImpl(const Value *V); void setValue(const Value *V, SDValue NewN) { SDValue &N = NodeMap[V]; @@ -349,6 +355,12 @@ public: N = NewN; } + void setUnusedArgValue(const Value *V, SDValue NewN) { + SDValue &N = UnusedArgNodeMap[V]; + assert(N.getNode() == 0 && "Already set a value for this node!"); + N = NewN; + } + void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, std::set<unsigned> &OutputRegs, std::set<unsigned> &InputRegs); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 65b8d4f..08ba548 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -14,7 +14,7 @@ #define DEBUG_TYPE "isel" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" -#include "FunctionLoweringInfo.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" @@ -171,7 +171,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), - CurDAG(new SelectionDAG(tm, *FuncInfo)), + CurDAG(new SelectionDAG(tm)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), @@ -244,7 +244,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); CurDAG->init(*MF); - FuncInfo->set(Fn, *MF, EnableFastISel); + FuncInfo->set(Fn, *MF); SDB->init(GFI, *AA); SelectAllBasicBlocks(Fn); @@ -300,7 +300,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); - if (II->isInlineAsm() || (TID.isCall() && !TID.isReturn())) { + + // Operand 1 of an inline asm instruction indicates whether the asm + // needs stack or not. + if ((II->isInlineAsm() && II->getOperand(1).getImm()) || + (TID.isCall() && !TID.isReturn())) { MFI->setHasCalls(true); goto done; } @@ -312,6 +316,26 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if there is a call to setjmp in the machine function. MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn)); + // Replace forward-declared registers with the registers containing + // the desired value. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (DenseMap<unsigned, unsigned>::iterator + I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); + I != E; ++I) { + unsigned From = I->first; + unsigned To = I->second; + // If To is also scheduled to be replaced, find what its ultimate + // replacement is. + for (;;) { + DenseMap<unsigned, unsigned>::iterator J = + FuncInfo->RegFixups.find(To); + if (J == E) break; + To = J->second; + } + // Replace it. + MRI.replaceRegWith(From, To); + } + // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); @@ -319,10 +343,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } -MachineBasicBlock * -SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB, - const BasicBlock *LLVMBB, - BasicBlock::const_iterator Begin, +void +SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { // Lower all of the non-terminator instructions. If a call is emitted @@ -337,7 +359,7 @@ SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB, SDB->clear(); // Final step, emit the lowered DAG as machine code. - return CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } namespace { @@ -372,102 +394,6 @@ public: }; } -/// TrivialTruncElim - Eliminate some trivial nops that can result from -/// ShrinkDemandedOps: (trunc (ext n)) -> n. -static bool TrivialTruncElim(SDValue Op, - TargetLowering::TargetLoweringOpt &TLO) { - SDValue N0 = Op.getOperand(0); - EVT VT = Op.getValueType(); - if ((N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::SIGN_EXTEND || - N0.getOpcode() == ISD::ANY_EXTEND) && - N0.getOperand(0).getValueType() == VT) { - return TLO.CombineTo(Op, N0.getOperand(0)); - } - return false; -} - -/// ShrinkDemandedOps - A late transformation pass that shrink expressions -/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts -/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. -void SelectionDAGISel::ShrinkDemandedOps() { - SmallVector<SDNode*, 128> Worklist; - SmallPtrSet<SDNode*, 128> InWorklist; - - // Add all the dag nodes to the worklist. - Worklist.reserve(CurDAG->allnodes_size()); - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - Worklist.push_back(I); - InWorklist.insert(I); - } - - TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true); - while (!Worklist.empty()) { - SDNode *N = Worklist.pop_back_val(); - InWorklist.erase(N); - - if (N->use_empty() && N != CurDAG->getRoot().getNode()) { - // Deleting this node may make its operands dead, add them to the worklist - // if they aren't already there. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (InWorklist.insert(N->getOperand(i).getNode())) - Worklist.push_back(N->getOperand(i).getNode()); - - CurDAG->DeleteNode(N); - continue; - } - - // Run ShrinkDemandedOp on scalar binary operations. - if (N->getNumValues() != 1 || - !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger()) - continue; - - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Demanded = APInt::getAllOnesValue(BitWidth); - APInt KnownZero, KnownOne; - if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded, - KnownZero, KnownOne, TLO) && - (N->getOpcode() != ISD::TRUNCATE || - !TrivialTruncElim(SDValue(N, 0), TLO))) - continue; - - // Revisit the node. - assert(!InWorklist.count(N) && "Already in worklist"); - Worklist.push_back(N); - InWorklist.insert(N); - - // Replace the old value with the new one. - DEBUG(errs() << "\nShrinkDemandedOps replacing "; - TLO.Old.getNode()->dump(CurDAG); - errs() << "\nWith: "; - TLO.New.getNode()->dump(CurDAG); - errs() << '\n'); - - if (InWorklist.insert(TLO.New.getNode())) - Worklist.push_back(TLO.New.getNode()); - - SDOPsWorkListRemover DeadNodes(Worklist, InWorklist); - CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); - - if (!TLO.Old.getNode()->use_empty()) continue; - - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); - i != e; ++i) { - SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); - if (OpNode->hasOneUse()) { - // Add OpNode to the end of the list to revisit. - DeadNodes.RemoveFromWorklist(OpNode); - Worklist.push_back(OpNode); - InWorklist.insert(OpNode); - } - } - - DeadNodes.RemoveFromWorklist(TLO.Old.getNode()); - CurDAG->DeleteNode(TLO.Old.getNode()); - } -} - void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet<SDNode*, 128> VisitedNodes; SmallVector<SDNode*, 128> Worklist; @@ -522,7 +448,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { } while (!Worklist.empty()); } -MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { +void SelectionDAGISel::CodeGenAndEmitDAG() { std::string GroupName; if (TimePassesIsEnabled) GroupName = "Instruction Selection and Scheduling"; @@ -531,23 +457,19 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) BlockName = MF->getFunction()->getNameStr() + ":" + - BB->getBasicBlock()->getNameStr(); + FuncInfo->MBB->getBasicBlock()->getNameStr(); - DEBUG(dbgs() << "Initial selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump()); if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); // Run the DAG combiner in pre-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining 1", GroupName); - CurDAG->Combine(Unrestricted, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); CurDAG->Combine(Unrestricted, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -555,44 +477,36 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { BlockName); bool Changed; - if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization", GroupName); - Changed = CurDAG->LegalizeTypes(); - } else { + { + NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled); Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump()); if (Changed) { if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize types", GroupName); - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining after legalize types", GroupName, + TimePassesIsEnabled); CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"; + CurDAG->dump()); } - if (TimePassesIsEnabled) { - NamedRegionTimer T("Vector Legalization", GroupName); - Changed = CurDAG->LegalizeVectors(); - } else { + { + NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled); Changed = CurDAG->LegalizeVectors(); } if (Changed) { - if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization 2", GroupName); - CurDAG->LegalizeTypes(); - } else { + { + NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled); CurDAG->LegalizeTypes(); } @@ -600,95 +514,79 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { CurDAG->viewGraph("dag-combine-lv input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize vectors", GroupName); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, + TimePassesIsEnabled); CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"; + CurDAG->dump()); } if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Legalization", GroupName); - CurDAG->Legalize(OptLevel); - } else { + { + NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); CurDAG->Legalize(OptLevel); } - DEBUG(dbgs() << "Legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump()); if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); // Run the DAG combiner in post-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining 2", GroupName); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump()); - if (OptLevel != CodeGenOpt::None) { - ShrinkDemandedOps(); + if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); - } if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Selection", GroupName); - DoInstructionSelection(); - } else { + { + NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled); DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump()); if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); // Schedule machine code. ScheduleDAGSDNodes *Scheduler = CreateScheduler(); - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Scheduling", GroupName); - Scheduler->Run(CurDAG, BB, BB->end()); - } else { - Scheduler->Run(CurDAG, BB, BB->end()); + { + NamedRegionTimer T("Instruction Scheduling", GroupName, + TimePassesIsEnabled); + Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt); } if (ViewSUnitDAGs) Scheduler->viewGraph(); // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Creation", GroupName); - BB = Scheduler->EmitSchedule(); - } else { - BB = Scheduler->EmitSchedule(); + { + NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); + + FuncInfo->MBB = Scheduler->EmitSchedule(); + FuncInfo->InsertPt = Scheduler->InsertPos; } // Free the scheduler state. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName); - delete Scheduler; - } else { + { + NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName, + TimePassesIsEnabled); delete Scheduler; } // Free the SelectionDAG state, now that we're finished with it. CurDAG->clear(); - - return BB; } void SelectionDAGISel::DoInstructionSelection() { @@ -750,21 +648,22 @@ void SelectionDAGISel::DoInstructionSelection() { /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. -void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) { +void SelectionDAGISel::PrepareEHLandingPad() { // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. - MCSymbol *Label = MF->getMMI().addLandingPad(BB); + MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB); const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); - BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label); + BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) + .addSym(Label); // Mark exception register as live in. unsigned Reg = TLI.getExceptionAddressRegister(); - if (Reg) BB->addLiveIn(Reg); + if (Reg) FuncInfo->MBB->addLiveIn(Reg); // Mark exception selector register as live in. Reg = TLI.getExceptionSelectorRegister(); - if (Reg) BB->addLiveIn(Reg); + if (Reg) FuncInfo->MBB->addLiveIn(Reg); // FIXME: Hack around an exception handling flaw (PR1508): the personality // function and list of typeids logically belong to the invoke (or, if you @@ -777,7 +676,7 @@ void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) { // in exceptions not being caught because no typeids are associated with // the invoke. This may not be the only way things can go wrong, but it // is the only way we try to work around for the moment. - const BasicBlock *LLVMBB = BB->getBasicBlock(); + const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock(); const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); if (Br && Br->isUnconditional()) { // Critical edge? @@ -796,83 +695,100 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (EnableFastISel) - FastIS = TLI.createFastISel(*MF, FuncInfo->ValueMap, FuncInfo->MBBMap, - FuncInfo->StaticAllocaMap, - FuncInfo->PHINodesToUpdate -#ifndef NDEBUG - , FuncInfo->CatchInfoLost -#endif - ); + FastIS = TLI.createFastISel(*FuncInfo); // Iterate over all basic blocks in the function. for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { const BasicBlock *LLVMBB = &*I; - MachineBasicBlock *BB = FuncInfo->MBBMap[LLVMBB]; + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); BasicBlock::const_iterator const End = LLVMBB->end(); - BasicBlock::const_iterator BI = Begin; + BasicBlock::const_iterator BI = End; + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); + + // Setup an EH landing-pad block. + if (FuncInfo->MBB->isLandingPad()) + PrepareEHLandingPad(); + // Lower any arguments needed in this block if this is the entry block. if (LLVMBB == &Fn.getEntryBlock()) LowerArguments(LLVMBB); - // Setup an EH landing-pad block. - if (BB->isLandingPad()) - PrepareEHLandingPad(BB); - // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { + FastIS->startNewBlock(); + // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { CurDAG->setRoot(SDB->getControlRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); + + // If we inserted any instructions at the beginning, make a note of + // where they are, so we can be sure to emit subsequent instructions + // after them. + if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) + FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt)); + else + FastIS->setLastLocalValue(0); } - FastIS->startNewBlock(BB); + // Do FastISel on as many instructions as possible. - for (; BI != End; ++BI) { + for (; BI != Begin; --BI) { + const Instruction *Inst = llvm::prior(BI); + + // If we no longer require this instruction, skip it. + if (!Inst->mayWriteToMemory() && + !isa<TerminatorInst>(Inst) && + !isa<DbgInfoIntrinsic>(Inst) && + !FuncInfo->isExportedInst(Inst)) + continue; + + // Bottom-up: reset the insert pos at the top, after any local-value + // instructions. + FastIS->recomputeInsertPt(); + // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(BI)) + if (FastIS->SelectInstruction(Inst)) continue; // Then handle certain instructions as single-LLVM-Instruction blocks. - if (isa<CallInst>(BI)) { + if (isa<CallInst>(Inst)) { ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed call: "; - BI->dump(); + Inst->dump(); } - if (!BI->getType()->isVoidTy() && !BI->use_empty()) { - unsigned &R = FuncInfo->ValueMap[BI]; + if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { + unsigned &R = FuncInfo->ValueMap[Inst]; if (!R) - R = FuncInfo->CreateRegForValue(BI); + R = FuncInfo->CreateRegs(Inst->getType()); } bool HadTailCall = false; - BB = SelectBasicBlock(BB, LLVMBB, BI, llvm::next(BI), HadTailCall); + SelectBasicBlock(Inst, BI, HadTailCall); // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { - BI = End; + --BI; break; } - // If the instruction was codegen'd with multiple blocks, - // inform the FastISel object where to resume inserting. - FastIS->setCurrentBlock(BB); continue; } // Otherwise, give up on FastISel for the rest of the block. // For now, be a little lenient about non-branch terminators. - if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) { + if (!isa<TerminatorInst>(Inst) || isa<BranchInst>(Inst)) { ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; - BI->dump(); + Inst->dump(); } if (EnableFastISelAbort) // The "fast" selector couldn't handle something and bailed. @@ -881,17 +797,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } break; } + + FastIS->recomputeInsertPt(); } // Run SelectionDAG instruction selection on the remainder of the block // not handled by FastISel. If FastISel is not run, this is the entire // block. - if (BI != End) { - bool HadTailCall; - BB = SelectBasicBlock(BB, LLVMBB, BI, End, HadTailCall); - } + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); - FinishBasicBlock(BB); + FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); } @@ -899,11 +815,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } void -SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { +SelectionDAGISel::FinishBasicBlock() { DEBUG(dbgs() << "Total amount of phi nodes to update: " - << FuncInfo->PHINodesToUpdate.size() << "\n"); - DEBUG(for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) + << FuncInfo->PHINodesToUpdate.size() << "\n"; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) dbgs() << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); @@ -917,11 +833,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); - if (!BB->isSuccessor(PHI->getParent())) + if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) continue; PHI->addOperand( MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } return; } @@ -930,33 +846,35 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Lower header first, if it wasn't already lowered if (!SDB->BitTestCases[i].Emitted) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->BitTestCases[i].Parent; + FuncInfo->MBB = SDB->BitTestCases[i].Parent; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - SDB->visitBitTestHeader(SDB->BitTestCases[i], BB); + SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code if (j+1 != ej) SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], - BB); + FuncInfo->MBB); else SDB->visitBitTestCase(SDB->BitTestCases[i].Default, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], - BB); + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } // Update PHI Nodes @@ -1001,22 +919,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Lower header first, if it wasn't already lowered if (!SDB->JTCases[i].first.Emitted) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, - BB); + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->JTCases[i].second.MBB; + FuncInfo->MBB = SDB->JTCases[i].second.MBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code SDB->visitJumpTable(SDB->JTCases[i].second); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); // Update PHI Nodes for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); @@ -1034,11 +954,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); } // JT BB. Just iterate over successors here - if (BB->isSuccessor(PHIBB)) { + if (FuncInfo->MBB->isSuccessor(PHIBB)) { PHI->addOperand (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } } } @@ -1050,10 +970,10 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); - if (BB->isSuccessor(PHI->getParent())) { + if (FuncInfo->MBB->isSuccessor(PHI->getParent())) { PHI->addOperand( MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } } @@ -1061,7 +981,8 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // additional DAGs necessary. for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into - MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB; + MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Determine the unique successors. SmallVector<MachineBasicBlock *, 2> Succs; @@ -1071,21 +992,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Emit the code. Note that this could result in ThisBB being split, so // we need to check for updates. - SDB->visitSwitchCase(SDB->SwitchCases[i], BB); + SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - ThisBB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); + ThisBB = FuncInfo->MBB; // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. for (unsigned i = 0, e = Succs.size(); i != e; ++i) { - BB = Succs[i]; - // BB may have been removed from the CFG if a branch was constant folded. - if (ThisBB->isSuccessor(BB)) { - for (MachineBasicBlock::iterator Phi = BB->begin(); - Phi != BB->end() && Phi->isPHI(); + FuncInfo->MBB = Succs[i]; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // FuncInfo->MBB may have been removed from the CFG if a branch was + // constant folded. + if (ThisBB->isSuccessor(FuncInfo->MBB)) { + for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin(); + Phi != FuncInfo->MBB->end() && Phi->isPHI(); ++Phi) { // This value for this PHI node is recorded in PHINodesToUpdate. for (unsigned pn = 0; ; ++pn) { @@ -1205,6 +1129,7 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc + Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3 unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); if (InOps[e-1].getValueType() == MVT::Flag) @@ -1701,7 +1626,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, SDValue(Res, ResNumResults-1)); if ((EmitNodeInfo & OPFL_FlagOutput) != 0) - --ResNumResults; + --ResNumResults; // Move the chain reference if needed. if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 3786bd1..6cae804 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -278,7 +278,7 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { FlaggedNodes.push_back(N); while (!FlaggedNodes.empty()) { O << DOTGraphTraits<SelectionDAG*> - ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); + ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); FlaggedNodes.pop_back(); if (!FlaggedNodes.empty()) O << "\n "; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 44a80d3..4f38669 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -261,6 +262,38 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; + Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; + Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; + Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; + Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; + Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; + Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; + Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; + Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; + Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; + Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; + Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; + Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; + Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; + Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4"; + Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; + Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; + Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; + Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -546,9 +579,9 @@ TargetLowering::TargetLowering(const TargetMachine &tm, SchedPreferenceInfo = Sched::Latency; JumpBufSize = 0; JumpBufAlignment = 0; - IfCvtBlockSizeLimit = 2; - IfCvtDupBlockSizeLimit = 0; PrefLoopAlignment = 0; + MinStackArgumentAlignment = 1; + ShouldFoldAtomicFences = false; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); @@ -578,9 +611,9 @@ bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, - unsigned &NumIntermediates, - EVT &RegisterVT, - TargetLowering* TLI) { + unsigned &NumIntermediates, + EVT &RegisterVT, + TargetLowering *TLI) { // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType(); @@ -610,16 +643,12 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, EVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; - if (EVT(DestVT).bitsLT(NewVT)) { - // Value is expanded, e.g. i64 -> i16. + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); - } else { - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; - } - return 1; + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; } /// computeRegisterProperties - Once all of the register classes are added, @@ -705,39 +734,39 @@ void TargetLowering::computeRegisterProperties() { for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; - if (!isTypeLegal(VT)) { - MVT IntermediateVT; - EVT RegisterVT; - unsigned NumIntermediates; - NumRegistersForVT[i] = - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, - RegisterVT, this); - RegisterTypeForVT[i] = RegisterVT; - - // Determine if there is a legal wider type. - bool IsLegalWiderType = false; - EVT EltVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - EVT SVT = (MVT::SimpleValueType)nVT; - if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && NElts != 1) { - TransformToType[i] = SVT; - ValueTypeActions.setTypeAction(VT, Promote); - IsLegalWiderType = true; - break; - } + if (isTypeLegal(VT)) continue; + + MVT IntermediateVT; + EVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, + RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + // Determine if there is a legal wider type. + bool IsLegalWiderType = false; + EVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + EVT SVT = (MVT::SimpleValueType)nVT; + if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && NElts != 1) { + TransformToType[i] = SVT; + ValueTypeActions.setTypeAction(VT, Promote); + IsLegalWiderType = true; + break; } - if (!IsLegalWiderType) { - EVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - ValueTypeActions.setTypeAction(VT, Expand); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, Promote); - } + } + if (!IsLegalWiderType) { + EVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + ValueTypeActions.setTypeAction(VT, Expand); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, Promote); } } } @@ -811,6 +840,65 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, return 1; } +/// Get the EVTs and ArgFlags collections that represent the legalized return +/// type of the given function. This does not require a DAG or a return value, +/// and is suitable for use before any DAGs for the function are constructed. +/// TODO: Move this out of TargetLowering.cpp. +void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr, + SmallVectorImpl<ISD::OutputArg> &Outs, + const TargetLowering &TLI, + SmallVectorImpl<uint64_t> *Offsets) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + unsigned Offset = 0; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr & Attribute::SExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr & Attribute::ZExt) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( + PartVT.getTypeForEVT(ReturnType->getContext())); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr & Attribute::InReg) + Flags.setInReg(); + + // Propagate extension type if any + if (attr & Attribute::SExt) + Flags.setSExt(); + else if (attr & Attribute::ZExt) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); + if (Offsets) { + Offsets->push_back(Offset); + Offset += PartSize; + } + } + } +} + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. @@ -1042,7 +1130,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -1076,7 +1164,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1101,7 +1189,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -1498,13 +1586,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::AssertZext: { - EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - APInt InMask = APInt::getLowBitsSet(BitWidth, - VT.getSizeInBits()); - if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask, + // Demand all the bits of the input that are demanded in the output. + // The low bits are obvious; the high bits are demanded because we're + // asserting that they're zero here. + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); KnownZero |= ~InMask & NewMask; break; } @@ -1544,7 +1636,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne2, TLO, Depth+1)) return true; // See if the operation should be performed at a smaller bit width. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; } // FALL THROUGH @@ -2346,7 +2438,6 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ /// vector. If it is invalid, don't add anything to Ops. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { switch (ConstraintLetter) { @@ -2384,7 +2475,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (ConstraintLetter != 'n') { int64_t Offs = GA->getOffset(); if (C) Offs += C->getZExtValue(); - Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + C->getDebugLoc(), Op.getValueType(), Offs)); return; } @@ -2507,18 +2599,18 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { /// 'm' over 'r', for example. /// static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, - bool hasMemory, const TargetLowering &TLI, + const TargetLowering &TLI, SDValue Op, SelectionDAG *DAG) { assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); unsigned BestIdx = 0; TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; int BestGenerality = -1; - + // Loop over the options, keeping track of the most general one. for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); - + // If this is an 'other' constraint, see if the operand is valid for it. // For example, on X86 we might have an 'rI' constraint. If the operand // is an integer in the range [0..31] we want to use I (saving a load @@ -2527,7 +2619,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); std::vector<SDValue> ResultOps; - TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory, + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], ResultOps, *DAG); if (!ResultOps.empty()) { BestType = CType; @@ -2536,6 +2628,11 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, } } + // Things with matching constraints can only be registers, per gcc + // documentation. This mainly affects "g" constraints. + if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) + continue; + // This constraint letter is more general than the previous one, use it. int Generality = getConstraintGenerality(CType); if (Generality > BestGenerality) { @@ -2554,7 +2651,6 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, /// OpInfo.ConstraintCode and OpInfo.ConstraintType. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, - bool hasMemory, SelectionDAG *DAG) const { assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); @@ -2563,7 +2659,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, OpInfo.ConstraintCode = OpInfo.Codes[0]; OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); } else { - ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG); + ChooseConstraint(OpInfo, *this, Op, DAG); } // 'X' matches anything. diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 5240bef..6ab0cb0 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/GCStrategy.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/IRBuilder.h" using namespace llvm; @@ -158,7 +159,8 @@ namespace { // Create a new invoke instruction. Args.clear(); - Args.append(CI->op_begin() + 1, CI->op_end()); + CallSite CS(CI); + Args.append(CS.arg_begin(), CS.arg_end()); InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, @@ -194,7 +196,7 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { unsigned NumMeta = 0; SmallVector<Constant*,16> Metadata; for (unsigned I = 0; I != Roots.size(); ++I) { - Constant *C = cast<Constant>(Roots[I].first->getOperand(2)); + Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1)); if (!C->isNullValue()) NumMeta = I + 1; Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); @@ -322,16 +324,16 @@ void ShadowStackGC::CollectRoots(Function &F) { assert(Roots.empty() && "Not cleaned up?"); - SmallVector<std::pair<CallInst*,AllocaInst*>,16> MetaRoots; + SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) if (Function *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::gcroot) { - std::pair<CallInst*,AllocaInst*> Pair = std::make_pair( - CI, cast<AllocaInst>(CI->getOperand(1)->stripPointerCasts())); - if (IsNullValue(CI->getOperand(2))) + std::pair<CallInst*, AllocaInst*> Pair = std::make_pair( + CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); + if (IsNullValue(CI->getArgOperand(1))) Roots.push_back(Pair); else MetaRoots.push_back(Pair); diff --git a/lib/CodeGen/SimpleHazardRecognizer.h b/lib/CodeGen/SimpleHazardRecognizer.h deleted file mode 100644 index f69feaf..0000000 --- a/lib/CodeGen/SimpleHazardRecognizer.h +++ /dev/null @@ -1,89 +0,0 @@ -//=- llvm/CodeGen/SimpleHazardRecognizer.h - Scheduling Support -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SimpleHazardRecognizer class, which -// implements hazard-avoidance heuristics for scheduling, based on the -// scheduling itineraries specified for the target. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H -#define LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H - -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" - -namespace llvm { - /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses - /// a coarse classification and attempts to avoid that instructions of - /// a given class aren't grouped too densely together. - class SimpleHazardRecognizer : public ScheduleHazardRecognizer { - /// Class - A simple classification for SUnits. - enum Class { - Other, Load, Store - }; - - /// Window - The Class values of the most recently issued - /// instructions. - Class Window[8]; - - /// getClass - Classify the given SUnit. - Class getClass(const SUnit *SU) { - const MachineInstr *MI = SU->getInstr(); - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayLoad()) - return Load; - if (TID.mayStore()) - return Store; - return Other; - } - - /// Step - Rotate the existing entries in Window and insert the - /// given class value in position as the most recent. - void Step(Class C) { - std::copy(Window+1, array_endof(Window), Window); - Window[array_lengthof(Window)-1] = C; - } - - public: - SimpleHazardRecognizer() : Window() { - Reset(); - } - - virtual HazardType getHazardType(SUnit *SU) { - Class C = getClass(SU); - if (C == Other) - return NoHazard; - unsigned Score = 0; - for (unsigned i = 0; i != array_lengthof(Window); ++i) - if (Window[i] == C) - Score += i + 1; - if (Score > array_lengthof(Window) * 2) - return Hazard; - return NoHazard; - } - - virtual void Reset() { - for (unsigned i = 0; i != array_lengthof(Window); ++i) - Window[i] = Other; - } - - virtual void EmitInstruction(SUnit *SU) { - Step(getClass(SU)); - } - - virtual void AdvanceCycle() { - Step(Other); - } - }; -} - -#endif diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index ed3c243..e69d3e4 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -99,15 +99,23 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { /// /// This returns true if an interval was modified. /// -bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, - LiveInterval &IntB, +bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI) { + // Bail if there is no dst interval - can happen when merging physical subreg + // operations. + if (!li_->hasInterval(CP.getDstReg())) + return false; + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - assert(BLR != IntB.end() && "Live range not found!"); + if (BLR == IntB.end()) return false; VNInfo *BValNo = BLR->valno; // Get the location that B is defined at. Two options: either this value has @@ -119,7 +127,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // AValNo is the value number in A that defines the copy, A3 in the example. SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); - assert(ALR != IntA.end() && "Live range not found!"); + // The live range might not exist after fun with physreg coalescing. + if (ALR == IntA.end()) return false; VNInfo *AValNo = ALR->valno; // If it's re-defined by an early clobber somewhere in the live range, then // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. @@ -145,26 +154,21 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. - unsigned SrcReg = li_->getVNInfoSourceReg(AValNo); - if (!SrcReg) return false; // Not defined by a copy. - - // If the value number is not defined by a copy instruction, ignore it. - - // If the source register comes from an interval other than IntB, we can't - // handle this. - if (SrcReg != IntB.reg) return false; + if (!CP.isCoalescable(AValNo->getCopy())) + return false; // Get the LiveRange in IntB that this value number starts with. LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); - assert(ValLR != IntB.end() && "Live range not found!"); + if (ValLR == IntB.end()) + return false; // Make sure that the end of the live range is inside the same block as // CopyMI. MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end.getPrevSlot()); - if (!ValLREndInst || - ValLREndInst->getParent() != CopyMI->getParent()) return false; + if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + return false; // Okay, we now know that ValLR ends in the same block that the CopyMI // live-range starts. If there are no intervening live ranges between them in @@ -207,6 +211,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // physreg has sub-registers, update their live intervals as well. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; LiveInterval &SRLI = li_->getInterval(*SR); SRLI.addRange(LiveRange(FillerStart, FillerEnd, SRLI.getNextValue(FillerStart, 0, true, @@ -216,7 +222,6 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // Okay, merge "B1" into the same value number as "B0". if (BValNo != ValLR->valno) { - IntB.addKills(ValLR->valno, BValNo->kills); IntB.MergeValueNumberInto(BValNo, ValLR->valno); } DEBUG({ @@ -230,13 +235,12 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); if (UIdx != -1) { ValLREndInst->getOperand(UIdx).setIsKill(false); - ValLR->valno->removeKill(FillerStart); } // If the copy instruction was killing the destination register before the // merge, find the last use and trim the live range. That will also add the // isKill marker. - if (ALR->valno->isKill(CopyIdx)) + if (ALR->end == CopyIdx) TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR); ++numExtends; @@ -304,23 +308,31 @@ TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) { /// /// This returns true if an interval was modified. /// -bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, - LiveInterval &IntB, +bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *CopyMI) { - SlotIndex CopyIdx = - li_->getInstructionIndex(CopyMI).getDefIndex(); - // FIXME: For now, only eliminate the copy by commuting its def when the // source register is a virtual register. We want to guard against cases // where the copy is a back edge copy and commuting the def lengthen the // live interval of the source register to the entire loop. - if (TargetRegisterInfo::isPhysicalRegister(IntA.reg)) + if (CP.isPhys() && CP.isFlipped()) + return false; + + // Bail if there is no dst interval. + if (!li_->hasInterval(CP.getDstReg())) return false; + SlotIndex CopyIdx = + li_->getInstructionIndex(CopyMI).getDefIndex(); + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - assert(BLR != IntB.end() && "Live range not found!"); + if (BLR == IntB.end()) return false; VNInfo *BValNo = BLR->valno; // Get the location that B is defined at. Two options: either this value has @@ -342,6 +354,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, AValNo->isUnused() || AValNo->hasPHIKill()) return false; MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); + if (!DefMI) + return false; const TargetInstrDesc &TID = DefMI->getDesc(); if (!TID.isCommutable()) return false; @@ -380,7 +394,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // clobbers from the superreg. if (BHasSubRegs) for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) - if (HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0)) + if (li_->hasInterval(*SR) && + HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0)) return false; // If some of the uses of IntA.reg is already coalesced away, return false. @@ -413,7 +428,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, bool BHasPHIKill = BValNo->hasPHIKill(); SmallVector<VNInfo*, 4> BDeadValNos; - VNInfo::KillSet BKills; std::map<SlotIndex, SlotIndex> BExtend; // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. @@ -424,8 +438,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // C = A<kill> // ... // = B - // - // then do not add kills of A to the newly created B interval. bool Extended = BLR->end > ALR->end && ALR->end != ALR->start; if (Extended) BExtend[ALR->end] = BLR->end; @@ -448,34 +460,38 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; - UseMO.setReg(NewReg); + if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + UseMO.substPhysReg(NewReg, *tri_); + else + UseMO.setReg(NewReg); if (UseMI == CopyMI) continue; if (UseMO.isKill()) { if (Extended) UseMO.setIsKill(false); - else - BKills.push_back(UseIdx.getDefIndex()); } unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + if (UseMI->isCopy()) { + if (UseMI->getOperand(0).getReg() != IntB.reg || + UseMI->getOperand(0).getSubReg()) + continue; + } else if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){ + if (DstReg != IntB.reg || DstSubIdx) + continue; + } else continue; - if (DstReg == IntB.reg && DstSubIdx == 0) { - // This copy will become a noop. If it's defining a new val#, - // remove that val# as well. However this live range is being - // extended to the end of the existing live range defined by the copy. - SlotIndex DefIdx = UseIdx.getDefIndex(); - const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); - BHasPHIKill |= DLR->valno->hasPHIKill(); - assert(DLR->valno->def == DefIdx); - BDeadValNos.push_back(DLR->valno); - BExtend[DLR->start] = DLR->end; - JoinedCopies.insert(UseMI); - // If this is a kill but it's going to be removed, the last use - // of the same val# is the new kill. - if (UseMO.isKill()) - BKills.pop_back(); - } + // This copy will become a noop. If it's defining a new val#, + // remove that val# as well. However this live range is being + // extended to the end of the existing live range defined by the copy. + SlotIndex DefIdx = UseIdx.getDefIndex(); + const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); + if (!DLR) + continue; + BHasPHIKill |= DLR->valno->hasPHIKill(); + assert(DLR->valno->def == DefIdx); + BDeadValNos.push_back(DLR->valno); + BExtend[DLR->start] = DLR->end; + JoinedCopies.insert(UseMI); } // We need to insert a new liverange: [ALR.start, LastUse). It may be we can @@ -490,24 +506,21 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, VNInfo *DeadVNI = BDeadValNos[i]; if (BHasSubRegs) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; LiveInterval &SRLI = li_->getInterval(*SR); - const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def); - SRLI.removeValNo(SRLR->valno); + if (const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def)) + SRLI.removeValNo(SRLR->valno); } } IntB.removeValNo(BDeadValNos[i]); } // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition - // is updated. Kills are also updated. + // is updated. VNInfo *ValNo = BValNo; ValNo->def = AValNo->def; ValNo->setCopy(0); - for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) { - if (ValNo->kills[j] != BLR->end) - BKills.push_back(ValNo->kills[j]); - } - ValNo->kills.clear(); for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; @@ -517,18 +530,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, if (EI != BExtend.end()) End = EI->second; IntB.addRange(LiveRange(AI->start, End, ValNo)); - - // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. - if (BHasSubRegs) { - for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { - LiveInterval &SRLI = li_->getInterval(*SR); - SRLI.MergeInClobberRange(*li_, AI->start, End, - li_->getVNInfoAllocator()); - } - } } - IntB.addKills(ValNo, BKills); ValNo->setHasPHIKill(BHasPHIKill); DEBUG({ @@ -621,7 +623,11 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx, // of last use. LastUse->setIsKill(); removeRange(li, LastUseIdx.getDefIndex(), LR->end, li_, tri_); - LR->valno->addKill(LastUseIdx.getDefIndex()); + if (LastUseMI->isCopy()) { + MachineOperand &DefMO = LastUseMI->getOperand(0); + if (DefMO.getReg() == li.reg && !DefMO.getSubReg()) + DefMO.setIsDead(); + } unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && DstReg == li.reg && DstSubIdx == 0) { @@ -663,6 +669,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, ValNo->isUnused() || ValNo->hasPHIKill()) return false; MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); + assert(DefMI && "Defining instruction disappeared"); const TargetInstrDesc &TID = DefMI->getDesc(); if (!TID.isAsCheapAsAMove()) return false; @@ -701,33 +708,20 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, return false; } - SlotIndex DefIdx = CopyIdx.getDefIndex(); - const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx); - DLR->valno->setCopy(0); - // Don't forget to update sub-register intervals. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - const LiveRange *DLR = - li_->getInterval(*SR).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->getCopy() == CopyMI) - DLR->valno->setCopy(0); - } - } + RemoveCopyFlag(DstReg, CopyMI); // If copy kills the source register, find the last use and propagate // kill. bool checkForDeadDef = false; MachineBasicBlock *MBB = CopyMI->getParent(); - if (SrcLR->valno->isKill(DefIdx)) + if (SrcLR->end == CopyIdx.getDefIndex()) if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) { checkForDeadDef = true; } MachineBasicBlock::iterator MII = llvm::next(MachineBasicBlock::iterator(CopyMI)); - tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_); + tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); MachineInstr *NewMI = prior(MII); if (checkForDeadDef) { @@ -747,24 +741,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, MachineOperand &MO = CopyMI->getOperand(i); if (MO.isReg() && MO.isImplicit()) NewMI->addOperand(MO); - if (MO.isDef() && li_->hasInterval(MO.getReg())) { - unsigned Reg = MO.getReg(); - const LiveRange *DLR = - li_->getInterval(Reg).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->getCopy() == CopyMI) - DLR->valno->setCopy(0); - // Handle subregs as well - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (const unsigned* SR = tri_->getSubRegisters(Reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - const LiveRange *DLR = - li_->getInterval(*SR).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->getCopy() == CopyMI) - DLR->valno->setCopy(0); - } - } - } + if (MO.isDef()) + RemoveCopyFlag(MO.getReg(), CopyMI); } TransferImplicitOps(CopyMI, NewMI); @@ -783,84 +761,72 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, /// being updated is not zero, make sure to set it to the correct physical /// subregister. void -SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, - unsigned SubIdx) { - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (DstIsPhys && SubIdx) { - // Figure out the real physical register we are updating with. - DstReg = tri_->getSubReg(DstReg, SubIdx); - SubIdx = 0; - } - - // Copy the register use-list before traversing it. We may be adding operands - // and invalidating pointers. - SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist; - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg), - E = mri_->reg_end(); I != E; ++I) - reglist.push_back(std::make_pair(&*I, I.getOperandNo())); - - for (unsigned N=0; N != reglist.size(); ++N) { - MachineInstr *UseMI = reglist[N].first; - MachineOperand &O = UseMI->getOperand(reglist[N].second); - unsigned OldSubIdx = O.getSubReg(); +SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { + bool DstIsPhys = CP.isPhys(); + unsigned SrcReg = CP.getSrcReg(); + unsigned DstReg = CP.getDstReg(); + unsigned SubIdx = CP.getSubIdx(); + + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); + MachineInstr *UseMI = I.skipInstruction();) { + // A PhysReg copy that won't be coalesced can perhaps be rematerialized + // instead. if (DstIsPhys) { - unsigned UseDstReg = DstReg; - if (OldSubIdx) - UseDstReg = tri_->getSubReg(DstReg, OldSubIdx); - unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx) && - CopySrcSubIdx == 0 && - CopyDstSubIdx == 0 && - CopySrcReg != CopyDstReg && - CopySrcReg == SrcReg && CopyDstReg != UseDstReg) { - // If the use is a copy and it won't be coalesced away, and its source - // is defined by a trivial computation, try to rematerialize it instead. - if (!JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, - CopyDstSubIdx, UseMI)) - continue; - } + CopySrcSubIdx == 0 && CopyDstSubIdx == 0 && + CopySrcReg != CopyDstReg && CopySrcReg == SrcReg && + CopyDstReg != DstReg && !JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, 0, + UseMI)) + continue; - O.setReg(UseDstReg); - O.setSubReg(0); - if (OldSubIdx) { - // Def and kill of subregister of a virtual register actually defs and - // kills the whole register. Add imp-defs and imp-kills as needed. - if (O.isDef()) { - if(O.isDead()) - UseMI->addRegisterDead(DstReg, tri_, true); - else - UseMI->addRegisterDefined(DstReg, tri_); - } else if (!O.isUndef() && - (O.isKill() || - UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0)))) - UseMI->addRegisterKilled(DstReg, tri_, true); - } + if (UseMI->isCopy() && + !UseMI->getOperand(1).getSubReg() && + !UseMI->getOperand(0).getSubReg() && + UseMI->getOperand(1).getReg() == SrcReg && + UseMI->getOperand(0).getReg() != SrcReg && + UseMI->getOperand(0).getReg() != DstReg && + !JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), + UseMI->getOperand(0).getReg(), 0, UseMI)) + continue; + } - DEBUG({ - dbgs() << "\t\tupdated: "; - if (!UseMI->isDebugValue()) - dbgs() << li_->getInstructionIndex(UseMI) << "\t"; - dbgs() << *UseMI; - }); - continue; + SmallVector<unsigned,8> Ops; + bool Reads, Writes; + tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + bool Kills = false, Deads = false; + + // Replace SrcReg with DstReg in all UseMI operands. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(Ops[i]); + Kills |= MO.isKill(); + Deads |= MO.isDead(); + + if (DstIsPhys) + MO.substPhysReg(DstReg, *tri_); + else + MO.substVirtReg(DstReg, SubIdx, *tri_); } - // Sub-register indexes goes from small to large. e.g. - // RAX: 1 -> AL, 2 -> AX, 3 -> EAX - // EAX: 1 -> AL, 2 -> AX - // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose - // sub-register 2 is also AX. - // - // FIXME: Properly compose subreg indices for all targets. - // - if (SubIdx && OldSubIdx && SubIdx != OldSubIdx) - ; - else if (SubIdx) - O.setSubReg(SubIdx); - O.setReg(DstReg); + // This instruction is a copy that will be removed. + if (JoinedCopies.count(UseMI)) + continue; + + if (SubIdx) { + // If UseMI was a simple SrcReg def, make sure we didn't turn it into a + // read-modify-write of DstReg. + if (Deads) + UseMI->addRegisterDead(DstReg, tri_); + else if (!Reads && Writes) + UseMI->addRegisterDefined(DstReg, tri_); + + // Kill flags apply to the whole physical register. + if (DstIsPhys && Kills) + UseMI->addRegisterKilled(DstReg, tri_); + } DEBUG({ dbgs() << "\t\tupdated: "; @@ -869,15 +835,15 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, dbgs() << *UseMI; }); + // After updating the operand, check if the machine instruction has // become a copy. If so, update its val# information. - if (JoinedCopies.count(UseMI)) + const TargetInstrDesc &TID = UseMI->getDesc(); + if (DstIsPhys || TID.getNumDefs() != 1 || TID.getNumOperands() <= 2) continue; - const TargetInstrDesc &TID = UseMI->getDesc(); unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; - if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 && - tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, + if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx) && CopySrcReg != CopyDstReg && (TargetRegisterInfo::isVirtualRegister(CopyDstReg) || @@ -945,6 +911,27 @@ bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, return removeIntervalIfEmpty(li, li_, tri_); } +void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg, + const MachineInstr *CopyMI) { + SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + if (li_->hasInterval(DstReg)) { + LiveInterval &LI = li_->getInterval(DstReg); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->getCopy() == CopyMI) + LR->valno->setCopy(0); + } + if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) + return; + for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) { + if (!li_->hasInterval(*AS)) + continue; + LiveInterval &LI = li_->getInterval(*AS); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->getCopy() == CopyMI) + LR->valno->setCopy(0); + } +} + /// PropagateDeadness - Propagate the dead marker to the instruction which /// defines the val#. static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, @@ -978,8 +965,8 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, // Live-in to the function but dead. Remove it from entry live-in set. if (mf_->begin()->isLiveIn(li.reg)) mf_->begin()->removeLiveIn(li.reg); - const LiveRange *LR = li.getLiveRangeContaining(CopyIdx); - removeRange(li, LR->start, LR->end, li_, tri_); + if (const LiveRange *LR = li.getLiveRangeContaining(CopyIdx)) + removeRange(li, LR->start, LR->end, li_, tri_); return removeIntervalIfEmpty(li, li_, tri_); } @@ -1017,147 +1004,12 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, // val#, then propagate the dead marker. PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_); ++numDeadValNo; - - if (LR->valno->isKill(RemoveEnd)) - LR->valno->removeKill(RemoveEnd); } removeRange(li, RemoveStart, RemoveEnd, li_, tri_); return removeIntervalIfEmpty(li, li_, tri_); } -/// CanCoalesceWithImpDef - Returns true if the specified copy instruction -/// from an implicit def to another register can be coalesced away. -bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI, - LiveInterval &li, - LiveInterval &ImpLi) const{ - if (!CopyMI->killsRegister(ImpLi.reg)) - return false; - // Make sure this is the only use. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(ImpLi.reg), - UE = mri_->use_end(); UI != UE;) { - MachineInstr *UseMI = &*UI; - ++UI; - if (CopyMI == UseMI || JoinedCopies.count(UseMI)) - continue; - return false; - } - return true; -} - - -/// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a -/// a virtual destination register with physical source register. -bool -SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, - LiveInterval &SrcInt) { - // If the virtual register live interval is long but it has low use desity, - // do not join them, instead mark the physical register as its allocation - // preference. - const TargetRegisterClass *RC = mri_->getRegClass(DstInt.reg); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(DstInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(DstInt.reg), - mri_->use_nodbg_end()) * Threshold < Length) - return false; - - // If the virtual register live interval extends into a loop, turn down - // aggressiveness. - SlotIndex CopyIdx = - li_->getInstructionIndex(CopyMI).getDefIndex(); - const MachineLoop *L = loopInfo->getLoopFor(CopyMBB); - if (!L) { - // Let's see if the virtual register live interval extends into the loop. - LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx); - assert(DLR != DstInt.end() && "Live range not found!"); - DLR = DstInt.FindLiveRangeContaining(DLR->end.getNextSlot()); - if (DLR != DstInt.end()) { - CopyMBB = li_->getMBBFromIndex(DLR->start); - L = loopInfo->getLoopFor(CopyMBB); - } - } - - if (!L || Length <= Threshold) - return true; - - SlotIndex UseIdx = CopyIdx.getUseIndex(); - LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); - MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); - if (loopInfo->getLoopFor(SMBB) != L) { - if (!loopInfo->isLoopHeader(CopyMBB)) - return false; - // If vr's live interval extends pass the loop header, do not join. - for (MachineBasicBlock::succ_iterator SI = CopyMBB->succ_begin(), - SE = CopyMBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - if (SuccMBB == CopyMBB) - continue; - if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB), - li_->getMBBEndIdx(SuccMBB))) - return false; - } - } - return true; -} - -/// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a -/// copy from a virtual source register to a physical destination register. -bool -SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, - LiveInterval &SrcInt) { - // If the virtual register live interval is long but it has low use density, - // do not join them, instead mark the physical register as its allocation - // preference. - const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(SrcInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(SrcInt.reg), - mri_->use_nodbg_end()) * Threshold < Length) - return false; - - if (SrcInt.empty()) - // Must be implicit_def. - return false; - - // If the virtual register live interval is defined or cross a loop, turn - // down aggressiveness. - SlotIndex CopyIdx = - li_->getInstructionIndex(CopyMI).getDefIndex(); - SlotIndex UseIdx = CopyIdx.getUseIndex(); - LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); - assert(SLR != SrcInt.end() && "Live range not found!"); - SLR = SrcInt.FindLiveRangeContaining(SLR->start.getPrevSlot()); - if (SLR == SrcInt.end()) - return true; - MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); - const MachineLoop *L = loopInfo->getLoopFor(SMBB); - - if (!L || Length <= Threshold) - return true; - - if (loopInfo->getLoopFor(CopyMBB) != L) { - if (SMBB != L->getLoopLatch()) - return false; - // If vr's live interval is extended from before the loop latch, do not - // join. - for (MachineBasicBlock::pred_iterator PI = SMBB->pred_begin(), - PE = SMBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *PredMBB = *PI; - if (PredMBB == SMBB) - continue; - if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB), - li_->getMBBEndIdx(PredMBB))) - return false; - } - } - return true; -} /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. @@ -1203,157 +1055,6 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, return true; } -/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual -/// register with a physical register, check if any of the virtual register -/// operand is a sub-register use or def. If so, make sure it won't result -/// in an illegal extract_subreg or insert_subreg instruction. e.g. -/// vr1024 = extract_subreg vr1025, 1 -/// ... -/// vr1024 = mov8rr AH -/// If vr1024 is coalesced with AH, the extract_subreg is now illegal since -/// AH does not have a super-reg whose sub-register 1 is AH. -bool -SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, - unsigned VirtReg, - unsigned PhysReg) { - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg), - E = mri_->reg_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - if (O.isDebug()) - continue; - MachineInstr *MI = &*I; - if (MI == CopyMI || JoinedCopies.count(MI)) - continue; - unsigned SubIdx = O.getSubReg(); - if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx)) - return true; - if (MI->isExtractSubreg()) { - SubIdx = MI->getOperand(2).getImm(); - if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx)) - return true; - if (O.isDef()) { - unsigned SrcReg = MI->getOperand(1).getReg(); - const TargetRegisterClass *RC = - TargetRegisterInfo::isPhysicalRegister(SrcReg) - ? tri_->getPhysicalRegisterRegClass(SrcReg) - : mri_->getRegClass(SrcReg); - if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC)) - return true; - } - } - if (MI->isInsertSubreg() || MI->isSubregToReg()) { - SubIdx = MI->getOperand(3).getImm(); - if (VirtReg == MI->getOperand(0).getReg()) { - if (!tri_->getSubReg(PhysReg, SubIdx)) - return true; - } else { - unsigned DstReg = MI->getOperand(0).getReg(); - const TargetRegisterClass *RC = - TargetRegisterInfo::isPhysicalRegister(DstReg) - ? tri_->getPhysicalRegisterRegClass(DstReg) - : mri_->getRegClass(DstReg); - if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC)) - return true; - } - } - } - return false; -} - - -/// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce -/// an extract_subreg where dst is a physical register, e.g. -/// cl = EXTRACT_SUBREG reg1024, 1 -bool -SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, - unsigned SrcReg, unsigned SubIdx, - unsigned &RealDstReg) { - const TargetRegisterClass *RC = mri_->getRegClass(SrcReg); - RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC); - if (!RealDstReg) { - DEBUG(dbgs() << "\tIncompatible source regclass: " - << "none of the super-registers of " << tri_->getName(DstReg) - << " are in " << RC->getName() << ".\n"); - return false; - } - - LiveInterval &RHS = li_->getInterval(SrcReg); - // For this type of EXTRACT_SUBREG, conservatively - // check if the live interval of the source register interfere with the - // actual super physical register we are trying to coalesce with. - if (li_->hasInterval(RealDstReg) && - RHS.overlaps(li_->getInterval(RealDstReg))) { - DEBUG({ - dbgs() << "\t\tInterfere with register "; - li_->getInterval(RealDstReg).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR) - // Do not check DstReg or its sub-register. JoinIntervals() will take care - // of that. - if (*SR != DstReg && - !tri_->isSubRegister(DstReg, *SR) && - li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\t\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - return true; -} - -/// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce -/// an insert_subreg where src is a physical register, e.g. -/// reg1024 = INSERT_SUBREG reg1024, c1, 0 -bool -SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, - unsigned SrcReg, unsigned SubIdx, - unsigned &RealSrcReg) { - const TargetRegisterClass *RC = mri_->getRegClass(DstReg); - RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC); - if (!RealSrcReg) { - DEBUG(dbgs() << "\tIncompatible destination regclass: " - << "none of the super-registers of " << tri_->getName(SrcReg) - << " are in " << RC->getName() << ".\n"); - return false; - } - - LiveInterval &LHS = li_->getInterval(DstReg); - if (li_->hasInterval(RealSrcReg) && - LHS.overlaps(li_->getInterval(RealSrcReg))) { - DEBUG({ - dbgs() << "\t\tInterfere with register "; - li_->getInterval(RealSrcReg).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR) - // Do not check SrcReg or its sub-register. JoinIntervals() will take care - // of that. - if (*SR != SrcReg && - !tri_->isSubRegister(SrcReg, *SR) && - li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\t\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - return true; -} - -/// getRegAllocPreference - Return register allocation preference register. -/// -static unsigned getRegAllocPreference(unsigned Reg, MachineFunction &MF, - MachineRegisterInfo *MRI, - const TargetRegisterInfo *TRI) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return 0; - std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg); - return TRI->ResolveRegAllocHint(Hint.first, Hint.second, MF); -} /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true @@ -1369,354 +1070,97 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); - unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0; - bool isExtSubReg = CopyMI->isExtractSubreg(); - bool isInsSubReg = CopyMI->isInsertSubreg(); - bool isSubRegToReg = CopyMI->isSubregToReg(); - unsigned SubIdx = 0; - if (isExtSubReg) { - DstReg = CopyMI->getOperand(0).getReg(); - DstSubIdx = CopyMI->getOperand(0).getSubReg(); - SrcReg = CopyMI->getOperand(1).getReg(); - SrcSubIdx = CopyMI->getOperand(2).getImm(); - } else if (isInsSubReg || isSubRegToReg) { - DstReg = CopyMI->getOperand(0).getReg(); - DstSubIdx = CopyMI->getOperand(3).getImm(); - SrcReg = CopyMI->getOperand(2).getReg(); - SrcSubIdx = CopyMI->getOperand(2).getSubReg(); - if (SrcSubIdx && SrcSubIdx != DstSubIdx) { - // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - DEBUG(dbgs() << "\tSource of insert_subreg or subreg_to_reg is already " - "coalesced to another register.\n"); - return false; // Not coalescable. - } - } else if (tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (SrcSubIdx && DstSubIdx && SrcSubIdx != DstSubIdx) { - // e.g. %reg16404:1<def> = MOV8rr %reg16412:2<kill> - Again = true; - return false; // Not coalescable. - } - } else { - llvm_unreachable("Unrecognized copy instruction!"); + CoalescerPair CP(*tii_, *tri_); + if (!CP.setRegisters(CopyMI)) { + DEBUG(dbgs() << "\tNot coalescable.\n"); + return false; } // If they are already joined we continue. - if (SrcReg == DstReg) { + if (CP.getSrcReg() == CP.getDstReg()) { DEBUG(dbgs() << "\tCopy already coalesced.\n"); return false; // Not coalescable. } - bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - - // If they are both physical registers, we cannot join them. - if (SrcIsPhys && DstIsPhys) { - DEBUG(dbgs() << "\tCan not coalesce physregs.\n"); - return false; // Not coalescable. - } - - // We only join virtual registers with allocatable physical registers. - if (SrcIsPhys && !allocatableRegs_[SrcReg]) { - DEBUG(dbgs() << "\tSrc reg is unallocatable physreg.\n"); - return false; // Not coalescable. - } - if (DstIsPhys && !allocatableRegs_[DstReg]) { - DEBUG(dbgs() << "\tDst reg is unallocatable physreg.\n"); - return false; // Not coalescable. - } - - // We cannot handle dual subreg indices and mismatched classes at the same - // time. - if (SrcSubIdx && DstSubIdx && differingRegisterClasses(SrcReg, DstReg)) { - DEBUG(dbgs() << "\tCannot handle subreg indices and mismatched classes.\n"); - return false; - } + DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg()); - // Check that a physical source register is compatible with dst regclass - if (SrcIsPhys) { - unsigned SrcSubReg = SrcSubIdx ? - tri_->getSubReg(SrcReg, SrcSubIdx) : SrcReg; - const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); - const TargetRegisterClass *DstSubRC = DstRC; - if (DstSubIdx) - DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx); - assert(DstSubRC && "Illegal subregister index"); - if (!DstSubRC->contains(SrcSubReg)) { - DEBUG(dbgs() << "\tIncompatible destination regclass: " - << "none of the super-registers of " - << tri_->getName(SrcSubReg) << " are in " - << DstSubRC->getName() << ".\n"); - return false; // Not coalescable. - } - } - - // Check that a physical dst register is compatible with source regclass - if (DstIsPhys) { - unsigned DstSubReg = DstSubIdx ? - tri_->getSubReg(DstReg, DstSubIdx) : DstReg; - const TargetRegisterClass *SrcRC = mri_->getRegClass(SrcReg); - const TargetRegisterClass *SrcSubRC = SrcRC; - if (SrcSubIdx) - SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx); - assert(SrcSubRC && "Illegal subregister index"); - if (!SrcSubRC->contains(DstSubReg)) { - DEBUG(dbgs() << "\tIncompatible source regclass: " - << "none of the super-registers of " - << tri_->getName(DstSubReg) << " are in " - << SrcSubRC->getName() << ".\n"); - (void)DstSubReg; - return false; // Not coalescable. + // Enforce policies. + if (CP.isPhys()) { + DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n"); + // Only coalesce to allocatable physreg. + if (!allocatableRegs_[CP.getDstReg()]) { + DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); + return false; // Not coalescable. } - } - - // Should be non-null only when coalescing to a sub-register class. - bool CrossRC = false; - const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); - const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); - const TargetRegisterClass *NewRC = NULL; - unsigned RealDstReg = 0; - unsigned RealSrcReg = 0; - if (isExtSubReg || isInsSubReg || isSubRegToReg) { - SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm(); - if (SrcIsPhys && isExtSubReg) { - // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be - // coalesced with AX. - unsigned DstSubIdx = CopyMI->getOperand(0).getSubReg(); - if (DstSubIdx) { - // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - if (DstSubIdx != SubIdx) { - DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); - return false; // Not coalescable. - } - } else - SrcReg = tri_->getSubReg(SrcReg, SubIdx); - SubIdx = 0; - } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) { - // EAX = INSERT_SUBREG EAX, r1024, 0 - unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg(); - if (SrcSubIdx) { - // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - if (SrcSubIdx != SubIdx) { - DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); - return false; // Not coalescable. - } - } else - DstReg = tri_->getSubReg(DstReg, SubIdx); - SubIdx = 0; - } else if ((DstIsPhys && isExtSubReg) || - (SrcIsPhys && (isInsSubReg || isSubRegToReg))) { - if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) { - DEBUG(dbgs() << "\tSrc of extract_subreg already coalesced with reg" - << " of a super-class.\n"); - return false; // Not coalescable. - } - - // FIXME: The following checks are somewhat conservative. Perhaps a better - // way to implement this is to treat this as coalescing a vr with the - // super physical register. - if (isExtSubReg) { - if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg)) - return false; // Not coalescable - } else { - if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg)) - return false; // Not coalescable - } - SubIdx = 0; - } else { - unsigned OldSubIdx = isExtSubReg ? CopyMI->getOperand(0).getSubReg() - : CopyMI->getOperand(2).getSubReg(); - if (OldSubIdx) { - if (OldSubIdx == SubIdx && !differingRegisterClasses(SrcReg, DstReg)) - // r1024<2> = EXTRACT_SUBREG r1025, 2. Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - // Also check if the other larger register is of the same register - // class as the would be resulting register. - SubIdx = 0; - else { - DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); - return false; // Not coalescable. - } - } - if (SubIdx) { - if (!DstIsPhys && !SrcIsPhys) { - if (isInsSubReg || isSubRegToReg) { - NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx); - } else // extract_subreg { - NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx); - } - if (!NewRC) { - DEBUG(dbgs() << "\t Conflicting sub-register indices.\n"); - return false; // Not coalescable - } + } else { + DEBUG({ + dbgs() << " with reg%" << CP.getDstReg(); + if (CP.getSubIdx()) + dbgs() << ":" << tri_->getSubRegIndexName(CP.getSubIdx()); + dbgs() << " to " << CP.getNewRC()->getName() << "\n"; + }); - if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { - DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " - << SrcRC->getName() << "/" - << DstRC->getName() << " -> " - << NewRC->getName() << ".\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } - } - } else if (differingRegisterClasses(SrcReg, DstReg)) { - if (DisableCrossClassJoin) - return false; - CrossRC = true; - - // FIXME: What if the result of a EXTRACT_SUBREG is then coalesced - // with another? If it's the resulting destination register, then - // the subidx must be propagated to uses (but only those defined - // by the EXTRACT_SUBREG). If it's being coalesced into another - // register, it should be safe because register is assumed to have - // the register class of the super-register. - - // Process moves where one of the registers have a sub-register index. - MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg); - MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg); - SubIdx = DstMO->getSubReg(); - if (SubIdx) { - if (SrcMO->getSubReg()) - // FIXME: can we handle this? + // Avoid constraining virtual register regclass too much. + if (CP.isCrossClass()) { + if (DisableCrossClassJoin) { + DEBUG(dbgs() << "\tCross-class joins disabled.\n"); return false; - // This is not an insert_subreg but it looks like one. - // e.g. %reg1024:4 = MOV32rr %EAX - isInsSubReg = true; - if (SrcIsPhys) { - if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg)) - return false; // Not coalescable - SubIdx = 0; - } - } else { - SubIdx = SrcMO->getSubReg(); - if (SubIdx) { - // This is not a extract_subreg but it looks like one. - // e.g. %cl = MOV16rr %reg1024:1 - isExtSubReg = true; - if (DstIsPhys) { - if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg)) - return false; // Not coalescable - SubIdx = 0; - } - } - } - - // Now determine the register class of the joined register. - if (!SrcIsPhys && !DstIsPhys) { - if (isExtSubReg) { - NewRC = - SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC; - } else if (isInsSubReg) { - NewRC = - SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC; - } else { - NewRC = getCommonSubClass(SrcRC, DstRC); - } - - if (!NewRC) { - DEBUG(dbgs() << "\tDisjoint regclasses: " - << SrcRC->getName() << ", " - << DstRC->getName() << ".\n"); - return false; // Not coalescable. } - - // If we are joining two virtual registers and the resulting register - // class is more restrictive (fewer register, smaller size). Check if it's - // worth doing the merge. - if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { + if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), + mri_->getRegClass(CP.getSrcReg()), + mri_->getRegClass(CP.getDstReg()), + CP.getNewRC())) { DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " - << SrcRC->getName() << "/" - << DstRC->getName() << " -> " - << NewRC->getName() << ".\n"); - // Allow the coalescer to try again in case either side gets coalesced to - // a physical register that's compatible with the other side. e.g. - // r1024 = MOV32to32_ r1025 - // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. + << CP.getNewRC()->getName() << ".\n"); Again = true; // May be possible to coalesce later. return false; } } - } - - // Will it create illegal extract_subreg / insert_subreg? - if (SrcIsPhys && HasIncompatibleSubRegDefUse(CopyMI, DstReg, SrcReg)) - return false; - if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg)) - return false; - - LiveInterval &SrcInt = li_->getInterval(SrcReg); - LiveInterval &DstInt = li_->getInterval(DstReg); - assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg && - "Register mapping is horribly broken!"); - DEBUG({ - dbgs() << "\t\tInspecting "; - if (SrcRC) dbgs() << SrcRC->getName() << ": "; - SrcInt.print(dbgs(), tri_); - dbgs() << "\n\t\t and "; - if (DstRC) dbgs() << DstRC->getName() << ": "; - DstInt.print(dbgs(), tri_); - dbgs() << "\n"; - }); + // When possible, let DstReg be the larger interval. + if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() > + li_->getInterval(CP.getDstReg()).ranges.size()) + CP.flip(); + } + + // We need to be careful about coalescing a source physical register with a + // virtual register. Once the coalescing is done, it cannot be broken and + // these are not spillable! If the destination interval uses are far away, + // think twice about coalescing them! + // FIXME: Why are we skipping this test for partial copies? + // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. + if (!CP.isPartial() && CP.isPhys()) { + LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + + // Don't join with physregs that have a ridiculous number of live + // ranges. The data structure performance is really bad when that + // happens. + if (li_->hasInterval(CP.getDstReg()) && + li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { + mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg()); + ++numAborts; + DEBUG(dbgs() + << "\tPhysical register live interval too complicated, abort!\n"); + return false; + } - // Save a copy of the virtual register live interval. We'll manually - // merge this into the "real" physical register live interval this is - // coalesced with. - OwningPtr<LiveInterval> SavedLI; - if (RealDstReg) - SavedLI.reset(li_->dupInterval(&SrcInt)); - else if (RealSrcReg) - SavedLI.reset(li_->dupInterval(&DstInt)); - - if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) { - // Check if it is necessary to propagate "isDead" property. - MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false); - bool isDead = mopd->isDead(); - - // We need to be careful about coalescing a source physical register with a - // virtual register. Once the coalescing is done, it cannot be broken and - // these are not spillable! If the destination interval uses are far away, - // think twice about coalescing them! - if (!isDead && (SrcIsPhys || DstIsPhys)) { - // If the virtual register live interval is long but it has low use - // density, do not join them, instead mark the physical register as its - // allocation preference. - LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; - LiveInterval &JoinPInt = SrcIsPhys ? SrcInt : DstInt; - unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg; - unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (JoinPInt.ranges.size() > 1000) { - mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); - ++numAborts; - DEBUG(dbgs() - << "\tPhysical register live interval too complicated, abort!\n"); - return false; - } + const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + if (Length > Threshold && + std::distance(mri_->use_nodbg_begin(CP.getSrcReg()), + mri_->use_nodbg_end()) * Threshold < Length) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI)) + return true; - const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(JoinVReg), - mri_->use_nodbg_end()) * Threshold < Length) { - // Before giving up coalescing, if definition of source is defined by - // trivial computation, try rematerializing it. - if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI)) - return true; - - mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - Again = true; // May be possible to coalesce later. - return false; - } + mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg()); + ++numAborts; + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); + Again = true; // May be possible to coalesce later. + return false; } } @@ -1724,32 +1168,24 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Otherwise, if one of the intervals being joined is a physreg, this method // always canonicalizes DstInt to be it. The output "SrcInt" will not have // been modified, so we can use this information below to update aliases. - bool Swapped = false; - // If SrcInt is implicitly defined, it's safe to coalesce. - if (SrcInt.empty()) { - if (!CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) { - // Only coalesce an empty interval (defined by implicit_def) with - // another interval which has a valno defined by the CopyMI and the CopyMI - // is a kill of the implicit def. - DEBUG(dbgs() << "\tNot profitable!\n"); - return false; - } - } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) { + if (!JoinIntervals(CP)) { // Coalescing failed. // If definition of source is defined by trivial computation, try // rematerializing it. - if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && - ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI)) + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), + CP.getDstReg(), 0, CopyMI)) return true; // If we can eliminate the copy without merging the live ranges, do so now. - if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && - (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || - RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) { - JoinedCopies.insert(CopyMI); - DEBUG(dbgs() << "\tTrivial!\n"); - return true; + if (!CP.isPartial()) { + if (AdjustCopiesBackFrom(CP, CopyMI) || + RemoveCopyByCommutingDef(CP, CopyMI)) { + JoinedCopies.insert(CopyMI); + DEBUG(dbgs() << "\tTrivial!\n"); + return true; + } } // Otherwise, we are unable to join the intervals. @@ -1758,86 +1194,32 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; } - LiveInterval *ResSrcInt = &SrcInt; - LiveInterval *ResDstInt = &DstInt; - if (Swapped) { - std::swap(SrcReg, DstReg); - std::swap(ResSrcInt, ResDstInt); - } - assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && - "LiveInterval::join didn't work right!"); - - // If we're about to merge live ranges into a physical register live interval, - // we have to update any aliased register's live ranges to indicate that they - // have clobbered values for this range. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - // If this is a extract_subreg where dst is a physical register, e.g. - // cl = EXTRACT_SUBREG reg1024, 1 - // then create and update the actual physical register allocated to RHS. - if (RealDstReg || RealSrcReg) { - LiveInterval &RealInt = - li_->getOrCreateInterval(RealDstReg ? RealDstReg : RealSrcReg); - for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(), - E = SavedLI->vni_end(); I != E; ++I) { - const VNInfo *ValNo = *I; - VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->getCopy(), - false, // updated at * - li_->getVNInfoAllocator()); - NewValNo->setFlags(ValNo->getFlags()); // * updated here. - RealInt.addKills(NewValNo, ValNo->kills); - RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo); - } - RealInt.weight += SavedLI->weight; - DstReg = RealDstReg ? RealDstReg : RealSrcReg; - } - - // Update the liveintervals of sub-registers. - for (const unsigned *AS = tri_->getSubRegisters(DstReg); *AS; ++AS) - li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, *ResSrcInt, - li_->getVNInfoAllocator()); - } - - // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the - // larger super-register. - if ((isExtSubReg || isInsSubReg || isSubRegToReg) && - !SrcIsPhys && !DstIsPhys) { - if ((isExtSubReg && !Swapped) || - ((isInsSubReg || isSubRegToReg) && Swapped)) { - ResSrcInt->Copy(*ResDstInt, mri_, li_->getVNInfoAllocator()); - std::swap(SrcReg, DstReg); - std::swap(ResSrcInt, ResDstInt); - } - } - // Coalescing to a virtual register that is of a sub-register class of the // other. Make sure the resulting register is set to the right register class. - if (CrossRC) + if (CP.isCrossClass()) { ++numCrossRCs; - - // This may happen even if it's cross-rc coalescing. e.g. - // %reg1026<def> = SUBREG_TO_REG 0, %reg1037<kill>, 4 - // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to - // be allocate a register from GR64_ABCD. - if (NewRC) - mri_->setRegClass(DstReg, NewRC); + mri_->setRegClass(CP.getDstReg(), CP.getNewRC()); + } // Remember to delete the copy instruction. JoinedCopies.insert(CopyMI); - UpdateRegDefsUses(SrcReg, DstReg, SubIdx); + UpdateRegDefsUses(CP); // If we have extended the live range of a physical register, make sure we // update live-in lists as well. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - const LiveInterval &VRegInterval = li_->getInterval(SrcReg); + if (CP.isPhys()) { SmallVector<MachineBasicBlock*, 16> BlockSeq; - for (LiveInterval::const_iterator I = VRegInterval.begin(), - E = VRegInterval.end(); I != E; ++I ) { + // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the + // ranges for this, and they are preserved. + LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg()); + for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); + I != E; ++I ) { li_->findLiveInMBBs(I->start, I->end, BlockSeq); for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { MachineBasicBlock &block = *BlockSeq[idx]; - if (!block.isLiveIn(DstReg)) - block.addLiveIn(DstReg); + if (!block.isLiveIn(CP.getDstReg())) + block.addLiveIn(CP.getDstReg()); } BlockSeq.clear(); } @@ -1845,32 +1227,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // SrcReg is guarateed to be the register whose live interval that is // being merged. - li_->removeInterval(SrcReg); + li_->removeInterval(CP.getSrcReg()); // Update regalloc hint. - tri_->UpdateRegAllocHint(SrcReg, DstReg, *mf_); - - // Manually deleted the live interval copy. - if (SavedLI) { - SavedLI->clear(); - SavedLI.reset(); - } - - // If resulting interval has a preference that no longer fits because of subreg - // coalescing, just clear the preference. - unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_); - if (Preference && (isExtSubReg || isInsSubReg || isSubRegToReg) && - TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) { - const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg); - if (!RC->contains(Preference)) - mri_->setRegAllocationHint(ResDstInt->reg, 0, 0); - } + tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_); DEBUG({ - dbgs() << "\t\tJoined. Result = "; - ResDstInt->print(dbgs(), tri_); - dbgs() << "\n"; - }); + LiveInterval &DstInt = li_->getInterval(CP.getDstReg()); + dbgs() << "\tJoined. Result = "; + DstInt.print(dbgs(), tri_); + dbgs() << "\n"; + }); ++numJoins; return true; @@ -1927,263 +1294,53 @@ static unsigned ComputeUltimateVN(VNInfo *VNI, return ThisValNoAssignments[VN] = UltimateVN; } -static bool InVector(VNInfo *Val, const SmallVector<VNInfo*, 8> &V) { - return std::find(V.begin(), V.end(), Val) != V.end(); -} - -static bool isValNoDefMove(const MachineInstr *MI, unsigned DR, unsigned SR, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) { - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) - ; - else if (MI->isExtractSubreg()) { - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); - } else if (MI->isSubregToReg() || - MI->isInsertSubreg()) { - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(2).getReg(); - } else - return false; - return (SrcReg == SR || TRI->isSuperRegister(SR, SrcReg)) && - (DstReg == DR || TRI->isSuperRegister(DR, DstReg)); -} - -/// RangeIsDefinedByCopyFromReg - Return true if the specified live range of -/// the specified live interval is defined by a copy from the specified -/// register. -bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, - LiveRange *LR, - unsigned Reg) { - unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno); - if (SrcReg == Reg) - return true; - // FIXME: Do isPHIDef and isDefAccurate both need to be tested? - if ((LR->valno->isPHIDef() || !LR->valno->isDefAccurate()) && - TargetRegisterInfo::isPhysicalRegister(li.reg) && - *tri_->getSuperRegisters(li.reg)) { - // It's a sub-register live interval, we may not have precise information. - // Re-compute it. - MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start); - if (DefMI && isValNoDefMove(DefMI, li.reg, Reg, tii_, tri_)) { - // Cache computed info. - LR->valno->def = LR->start; - LR->valno->setCopy(DefMI); - return true; - } - } - return false; -} - - -/// ValueLiveAt - Return true if the LiveRange pointed to by the given -/// iterator, or any subsequent range with the same value number, -/// is live at the given point. -bool SimpleRegisterCoalescing::ValueLiveAt(LiveInterval::iterator LRItr, - LiveInterval::iterator LREnd, - SlotIndex defPoint) const { - for (const VNInfo *valno = LRItr->valno; - (LRItr != LREnd) && (LRItr->valno == valno); ++LRItr) { - if (LRItr->contains(defPoint)) - return true; - } - - return false; -} - - -/// SimpleJoin - Attempt to joint the specified interval into this one. The -/// caller of this method must guarantee that the RHS only contains a single -/// value number and that the RHS is not defined by a copy from this -/// interval. This returns false if the intervals are not joinable, or it -/// joins them and returns true. -bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ - assert(RHS.containsOneValue()); - - // Some number (potentially more than one) value numbers in the current - // interval may be defined as copies from the RHS. Scan the overlapping - // portions of the LHS and RHS, keeping track of this and looking for - // overlapping live ranges that are NOT defined as copies. If these exist, we - // cannot coalesce. - - LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end(); - LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end(); - - if (LHSIt->start < RHSIt->start) { - LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start); - if (LHSIt != LHS.begin()) --LHSIt; - } else if (RHSIt->start < LHSIt->start) { - RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start); - if (RHSIt != RHS.begin()) --RHSIt; - } - - SmallVector<VNInfo*, 8> EliminatedLHSVals; - - while (1) { - // Determine if these live intervals overlap. - bool Overlaps = false; - if (LHSIt->start <= RHSIt->start) - Overlaps = LHSIt->end > RHSIt->start; - else - Overlaps = RHSIt->end > LHSIt->start; - - // If the live intervals overlap, there are two interesting cases: if the - // LHS interval is defined by a copy from the RHS, it's ok and we record - // that the LHS value # is the same as the RHS. If it's not, then we cannot - // coalesce these live ranges and we bail out. - if (Overlaps) { - // If we haven't already recorded that this value # is safe, check it. - if (!InVector(LHSIt->valno, EliminatedLHSVals)) { - // If it's re-defined by an early clobber somewhere in the live range, - // then conservatively abort coalescing. - if (LHSIt->valno->hasRedefByEC()) - return false; - // Copy from the RHS? - if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) - return false; // Nope, bail out. - - if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def)) - // Here is an interesting situation: - // BB1: - // vr1025 = copy vr1024 - // .. - // BB2: - // vr1024 = op - // = vr1025 - // Even though vr1025 is copied from vr1024, it's not safe to - // coalesce them since the live range of vr1025 intersects the - // def of vr1024. This happens because vr1025 is assigned the - // value of the previous iteration of vr1024. +/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. +bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) { + LiveInterval &RHS = li_->getInterval(CP.getSrcReg()); + DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; }); + + // If a live interval is a physical register, check for interference with any + // aliases. The interference check implemented here is a bit more conservative + // than the full interfeence check below. We allow overlapping live ranges + // only when one is a copy of the other. + if (CP.isPhys()) { + for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){ + if (!li_->hasInterval(*AS)) + continue; + const LiveInterval &LHS = li_->getInterval(*AS); + LiveInterval::const_iterator LI = LHS.begin(); + for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); + RI != RE; ++RI) { + LI = std::lower_bound(LI, LHS.end(), RI->start); + // Does LHS have an overlapping live range starting before RI? + if ((LI != LHS.begin() && LI[-1].end > RI->start) && + (RI->start != RI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; + }); return false; - EliminatedLHSVals.push_back(LHSIt->valno); - } - - // We know this entire LHS live range is okay, so skip it now. - if (++LHSIt == LHSEnd) break; - continue; - } + } - if (LHSIt->end < RHSIt->end) { - if (++LHSIt == LHSEnd) break; - } else { - // One interesting case to check here. It's possible that we have - // something like "X3 = Y" which defines a new value number in the LHS, - // and is the last use of this liverange of the RHS. In this case, we - // want to notice this copy (so that it gets coalesced away) even though - // the live ranges don't actually overlap. - if (LHSIt->start == RHSIt->end) { - if (InVector(LHSIt->valno, EliminatedLHSVals)) { - // We already know that this value number is going to be merged in - // if coalescing succeeds. Just skip the liverange. - if (++LHSIt == LHSEnd) break; - } else { - // If it's re-defined by an early clobber somewhere in the live range, - // then conservatively abort coalescing. - if (LHSIt->valno->hasRedefByEC()) + // Check that LHS ranges beginning in this range are copies. + for (; LI != LHS.end() && LI->start < RI->end; ++LI) { + if (LI->start != LI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; + }); return false; - // Otherwise, if this is a copy from the RHS, mark it as being merged - // in. - if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) { - if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def)) - // Here is an interesting situation: - // BB1: - // vr1025 = copy vr1024 - // .. - // BB2: - // vr1024 = op - // = vr1025 - // Even though vr1025 is copied from vr1024, it's not safe to - // coalesced them since live range of vr1025 intersects the - // def of vr1024. This happens because vr1025 is assigned the - // value of the previous iteration of vr1024. - return false; - EliminatedLHSVals.push_back(LHSIt->valno); - - // We know this entire LHS live range is okay, so skip it now. - if (++LHSIt == LHSEnd) break; } } } - - if (++RHSIt == RHSEnd) break; - } - } - - // If we got here, we know that the coalescing will be successful and that - // the value numbers in EliminatedLHSVals will all be merged together. Since - // the most common case is that EliminatedLHSVals has a single number, we - // optimize for it: if there is more than one value, we merge them all into - // the lowest numbered one, then handle the interval as if we were merging - // with one value number. - VNInfo *LHSValNo = NULL; - if (EliminatedLHSVals.size() > 1) { - // Loop through all the equal value numbers merging them into the smallest - // one. - VNInfo *Smallest = EliminatedLHSVals[0]; - for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) { - if (EliminatedLHSVals[i]->id < Smallest->id) { - // Merge the current notion of the smallest into the smaller one. - LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]); - Smallest = EliminatedLHSVals[i]; - } else { - // Merge into the smallest. - LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest); - } } - LHSValNo = Smallest; - } else if (EliminatedLHSVals.empty()) { - if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) && - *tri_->getSuperRegisters(LHS.reg)) - // Imprecise sub-register information. Can't handle it. - return false; - llvm_unreachable("No copies from the RHS?"); - } else { - LHSValNo = EliminatedLHSVals[0]; - } - - // Okay, now that there is a single LHS value number that we're merging the - // RHS into, update the value number info for the LHS to indicate that the - // value number is defined where the RHS value number was. - const VNInfo *VNI = RHS.getValNumInfo(0); - LHSValNo->def = VNI->def; - LHSValNo->setCopy(VNI->getCopy()); - - // Okay, the final step is to loop over the RHS live intervals, adding them to - // the LHS. - if (VNI->hasPHIKill()) - LHSValNo->setHasPHIKill(true); - LHS.addKills(LHSValNo, VNI->kills); - LHS.MergeRangesInAsValue(RHS, LHSValNo); - - LHS.ComputeJoinedWeight(RHS); - - // Update regalloc hint if both are virtual registers. - if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && - TargetRegisterInfo::isVirtualRegister(RHS.reg)) { - std::pair<unsigned, unsigned> RHSPref = mri_->getRegAllocationHint(RHS.reg); - std::pair<unsigned, unsigned> LHSPref = mri_->getRegAllocationHint(LHS.reg); - if (RHSPref != LHSPref) - mri_->setRegAllocationHint(LHS.reg, RHSPref.first, RHSPref.second); } - // Update the liveintervals of sub-registers. - if (TargetRegisterInfo::isPhysicalRegister(LHS.reg)) - for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS) - li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, LHS, - li_->getVNInfoAllocator()); - - return true; -} - -/// JoinIntervals - Attempt to join these two intervals. On failure, this -/// returns false. Otherwise, if one of the intervals being joined is a -/// physreg, this method always canonicalizes LHS to be it. The output -/// "RHS" will not have been modified, so we can use this information -/// below to update aliases. -bool -SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, - bool &Swapped) { // Compute the final value assignment, assuming that the live ranges can be // coalesced. SmallVector<int, 16> LHSValNoAssignments; @@ -2192,203 +1349,87 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; SmallVector<VNInfo*, 16> NewVNInfo; - // If a live interval is a physical register, conservatively check if any - // of its sub-registers is overlapping the live interval of the virtual - // register. If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) && - *tri_->getSubRegisters(LHS.reg)) { - // If it's coalescing a virtual register to a physical register, estimate - // its live interval length. This is the *cost* of scanning an entire live - // interval. If the cost is low, we'll do an exhaustive check instead. - - // If this is something like this: - // BB1: - // v1024 = op - // ... - // BB2: - // ... - // RAX = v1024 - // - // That is, the live interval of v1024 crosses a bb. Then we can't rely on - // less conservative check. It's possible a sub-register is defined before - // v1024 (or live in) and live out of BB1. - if (RHS.containsOneValue() && - li_->intervalIsInOneMBB(RHS) && - li_->getApproximateInstructionCount(RHS) <= 10) { - // Perform a more exhaustive check for some common cases. - if (li_->conflictsWithSubPhysRegRef(RHS, LHS.reg, true, JoinedCopies)) - return false; - } else { - for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; - } - } - } else if (TargetRegisterInfo::isPhysicalRegister(RHS.reg) && - *tri_->getSubRegisters(RHS.reg)) { - if (LHS.containsOneValue() && - li_->getApproximateInstructionCount(LHS) <= 10) { - // Perform a more exhaustive check for some common cases. - if (li_->conflictsWithSubPhysRegRef(LHS, RHS.reg, false, JoinedCopies)) - return false; - } else { - for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; - } - } - } + LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg()); + DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; }); - // Compute ultimate value numbers for the LHS and RHS values. - if (RHS.containsOneValue()) { - // Copies from a liveinterval with a single value are simple to handle and - // very common, handle the special case here. This is important, because - // often RHS is small and LHS is large (e.g. a physreg). - - // Find out if the RHS is defined as a copy from some value in the LHS. - int RHSVal0DefinedFromLHS = -1; - int RHSValID = -1; - VNInfo *RHSValNoInfo = NULL; - VNInfo *RHSValNoInfo0 = RHS.getValNumInfo(0); - unsigned RHSSrcReg = li_->getVNInfoSourceReg(RHSValNoInfo0); - if (RHSSrcReg == 0 || RHSSrcReg != LHS.reg) { - // If RHS is not defined as a copy from the LHS, we can use simpler and - // faster checks to see if the live ranges are coalescable. This joiner - // can't swap the LHS/RHS intervals though. - if (!TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { - return SimpleJoin(LHS, RHS); - } else { - RHSValNoInfo = RHSValNoInfo0; - } - } else { - // It was defined as a copy from the LHS, find out what value # it is. - RHSValNoInfo = - LHS.getLiveRangeContaining(RHSValNoInfo0->def.getPrevSlot())->valno; - RHSValID = RHSValNoInfo->id; - RHSVal0DefinedFromLHS = RHSValID; - } + // Loop over the value numbers of the LHS, seeing if any are defined from + // the RHS. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? + continue; - LHSValNoAssignments.resize(LHS.getNumValNums(), -1); - RHSValNoAssignments.resize(RHS.getNumValNums(), -1); - NewVNInfo.resize(LHS.getNumValNums(), NULL); - - // Okay, *all* of the values in LHS that are defined as a copy from RHS - // should now get updated. - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (unsigned LHSSrcReg = li_->getVNInfoSourceReg(VNI)) { - if (LHSSrcReg != RHS.reg) { - // If this is not a copy from the RHS, its value number will be - // unmodified by the coalescing. - NewVNInfo[VN] = VNI; - LHSValNoAssignments[VN] = VN; - } else if (RHSValID == -1) { - // Otherwise, it is a copy from the RHS, and we don't already have a - // value# for it. Keep the current value number, but remember it. - LHSValNoAssignments[VN] = RHSValID = VN; - NewVNInfo[VN] = RHSValNoInfo; - LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0; - } else { - // Otherwise, use the specified value #. - LHSValNoAssignments[VN] = RHSValID; - if (VN == (unsigned)RHSValID) { // Else this val# is dead. - NewVNInfo[VN] = RHSValNoInfo; - LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0; - } - } - } else { - NewVNInfo[VN] = VNI; - LHSValNoAssignments[VN] = VN; - } - } + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; - assert(RHSValID != -1 && "Didn't find value #?"); - RHSValNoAssignments[0] = RHSValID; - if (RHSVal0DefinedFromLHS != -1) { - // This path doesn't go through ComputeUltimateVN so just set - // it to anything. - RHSValsDefinedFromLHS[RHSValNoInfo0] = (VNInfo*)1; - } - } else { - // Loop over the value numbers of the LHS, seeing if any are defined from - // the RHS. - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? - continue; + // DstReg is known to be a register in the LHS interval. If the src is + // from the RHS interval, we can use its value #. + if (!CP.isCoalescable(VNI->getCopy())) + continue; - // DstReg is known to be a register in the LHS interval. If the src is - // from the RHS interval, we can use its value #. - if (li_->getVNInfoSourceReg(VNI) != RHS.reg) - continue; + // Figure out the value # from the RHS. + LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + LHSValsDefinedFromRHS[VNI] = lr->valno; + } - // Figure out the value # from the RHS. - LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - assert(lr && "Cannot find live range"); - LHSValsDefinedFromRHS[VNI] = lr->valno; - } + // Loop over the value numbers of the RHS, seeing if any are defined from + // the LHS. + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? + continue; - // Loop over the value numbers of the RHS, seeing if any are defined from - // the LHS. - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? - continue; + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; - // DstReg is known to be a register in the RHS interval. If the src is - // from the LHS interval, we can use its value #. - if (li_->getVNInfoSourceReg(VNI) != LHS.reg) - continue; + // DstReg is known to be a register in the RHS interval. If the src is + // from the LHS interval, we can use its value #. + if (!CP.isCoalescable(VNI->getCopy())) + continue; - // Figure out the value # from the LHS. - LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - assert(lr && "Cannot find live range"); - RHSValsDefinedFromLHS[VNI] = lr->valno; - } + // Figure out the value # from the LHS. + LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + RHSValsDefinedFromLHS[VNI] = lr->valno; + } - LHSValNoAssignments.resize(LHS.getNumValNums(), -1); - RHSValNoAssignments.resize(RHS.getNumValNums(), -1); - NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - ComputeUltimateVN(VNI, NewVNInfo, - LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, - LHSValNoAssignments, RHSValNoAssignments); + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + ComputeUltimateVN(VNI, NewVNInfo, + LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, + LHSValNoAssignments, RHSValNoAssignments); + } + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + // If this value number isn't a copy from the LHS, it's a new number. + if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { + NewVNInfo.push_back(VNI); + RHSValNoAssignments[VN] = NewVNInfo.size()-1; + continue; } - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - // If this value number isn't a copy from the LHS, it's a new number. - if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { - NewVNInfo.push_back(VNI); - RHSValNoAssignments[VN] = NewVNInfo.size()-1; - continue; - } - ComputeUltimateVN(VNI, NewVNInfo, - RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, - RHSValNoAssignments, LHSValNoAssignments); - } + ComputeUltimateVN(VNI, NewVNInfo, + RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, + RHSValNoAssignments, LHSValNoAssignments); } // Armed with the mappings of LHS/RHS values to ultimate values, walk the @@ -2399,15 +1440,17 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, LiveInterval::const_iterator JE = RHS.end(); // Skip ahead until the first place of potential sharing. - if (I->start < J->start) { - I = std::upper_bound(I, IE, J->start); - if (I != LHS.begin()) --I; - } else if (J->start < I->start) { - J = std::upper_bound(J, JE, I->start); - if (J != RHS.begin()) --J; + if (I != IE && J != JE) { + if (I->start < J->start) { + I = std::upper_bound(I, IE, J->start); + if (I != LHS.begin()) --I; + } else if (J->start < I->start) { + J = std::upper_bound(J, JE, I->start); + if (J != RHS.begin()) --J; + } } - while (1) { + while (I != IE && J != JE) { // Determine if these two live ranges overlap. bool Overlaps; if (I->start < J->start) { @@ -2429,13 +1472,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, return false; } - if (I->end < J->end) { + if (I->end < J->end) ++I; - if (I == IE) break; - } else { + else ++J; - if (J == JE) break; - } } // Update kill info. Some live ranges are extended due to copy coalescing. @@ -2443,10 +1483,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, E = LHSValsDefinedFromRHS.end(); I != E; ++I) { VNInfo *VNI = I->first; unsigned LHSValID = LHSValNoAssignments[VNI->id]; - NewVNInfo[LHSValID]->removeKill(VNI->def); if (VNI->hasPHIKill()) NewVNInfo[LHSValID]->setHasPHIKill(true); - RHS.addKills(NewVNInfo[LHSValID], VNI->kills); } // Update kill info. Some live ranges are extended due to copy coalescing. @@ -2454,25 +1492,19 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, E = RHSValsDefinedFromLHS.end(); I != E; ++I) { VNInfo *VNI = I->first; unsigned RHSValID = RHSValNoAssignments[VNI->id]; - NewVNInfo[RHSValID]->removeKill(VNI->def); if (VNI->hasPHIKill()) NewVNInfo[RHSValID]->setHasPHIKill(true); - LHS.addKills(NewVNInfo[RHSValID], VNI->kills); } + if (LHSValNoAssignments.empty()) + LHSValNoAssignments.push_back(-1); + if (RHSValNoAssignments.empty()) + RHSValNoAssignments.push_back(-1); + // If we get here, we know that we can coalesce the live ranges. Ask the // intervals to coalesce themselves now. - if ((RHS.ranges.size() > LHS.ranges.size() && - TargetRegisterInfo::isVirtualRegister(LHS.reg)) || - TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { - RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo, - mri_); - Swapped = true; - } else { - LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, - mri_); - Swapped = false; - } + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, + mri_); return true; } @@ -2513,15 +1545,10 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, // If this isn't a copy nor a extract_subreg, we can't join intervals. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; bool isInsUndef = false; - if (Inst->isExtractSubreg()) { + if (Inst->isCopy()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(1).getReg(); - } else if (Inst->isInsertSubreg()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(2).getReg(); - if (Inst->getOperand(1).isUndef()) - isInsUndef = true; - } else if (Inst->isInsertSubreg() || Inst->isSubregToReg()) { + } else if (Inst->isSubregToReg()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(2).getReg(); } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) @@ -2650,6 +1677,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, E = mri_->use_nodbg_end(); I != E; ++I) { MachineOperand &Use = I.getOperand(); MachineInstr *UseMI = Use.getParent(); + if (UseMI->isIdentityCopy()) + continue; unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && SrcReg == DstReg && SrcSubIdx == DstSubIdx) @@ -2680,7 +1709,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, // Ignore identity copies. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + if (!MI->isIdentityCopy() && + !(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { MachineOperand &Use = MI->getOperand(i); @@ -2750,10 +1780,9 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // Delete all coalesced copies. bool DoDelete = true; if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - assert((MI->isExtractSubreg() || MI->isInsertSubreg() || - MI->isSubregToReg()) && "Unrecognized copy instruction"); - DstReg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + assert(MI->isCopyLike() && "Unrecognized copy instruction"); + SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) // Do not delete extract_subreg, insert_subreg of physical // registers unless the definition is dead. e.g. // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 @@ -2762,7 +1791,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { DoDelete = false; } if (MI->allDefsAreDead()) { - LiveInterval &li = li_->getInterval(DstReg); + LiveInterval &li = li_->getInterval(SrcReg); if (!ShortenDeadCopySrcLiveRange(li, MI)) ShortenDeadCopyLiveRange(li, MI); DoDelete = true; @@ -2812,12 +1841,13 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // If the move will be an identity move delete it bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); - if (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx) { + if (MI->isIdentityCopy() || + (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) { if (li_->hasInterval(SrcReg)) { LiveInterval &RegInt = li_->getInterval(SrcReg); // If def of this move instruction is dead, remove its live range - // from the dstination register's live interval. - if (MI->registerDefIsDead(DstReg)) { + // from the destination register's live interval. + if (MI->allDefsAreDead()) { if (!ShortenDeadCopySrcLiveRange(RegInt, MI)) ShortenDeadCopyLiveRange(RegInt, MI); } @@ -2832,17 +1862,13 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // Check for now unnecessary kill flags. if (li_->isNotInMIMap(MI)) continue; - SlotIndex UseIdx = li_->getInstructionIndex(MI).getUseIndex(); + SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isKill()) continue; unsigned reg = MO.getReg(); if (!reg || !li_->hasInterval(reg)) continue; - LiveInterval &LI = li_->getInterval(reg); - const LiveRange *LR = LI.getLiveRangeContaining(UseIdx); - if (!LR || - (!LR->valno->isKill(UseIdx.getDefIndex()) && - LR->valno->def != UseIdx.getDefIndex())) + if (!li_->getInterval(reg).killedAt(DefIdx)) MO.setIsKill(false); } } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 1be04f3..e154da6 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -105,21 +105,12 @@ namespace llvm { /// possible to coalesce this interval, but it may be possible if other /// things get coalesced, then it returns true by reference in 'Again'. bool JoinCopy(CopyRec &TheCopy, bool &Again); - + /// JoinIntervals - Attempt to join these two intervals. On failure, this - /// returns false. Otherwise, if one of the intervals being joined is a - /// physreg, this method always canonicalizes DestInt to be it. The output - /// "SrcInt" will not have been modified, so we can use this information - /// below to update aliases. - bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped); - - /// SimpleJoin - Attempt to join the specified interval into this one. The - /// caller of this method must guarantee that the RHS only contains a single - /// value number and that the RHS is not defined by a copy from this - /// interval. This returns false if the intervals are not joinable, or it - /// joins them and returns true. - bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS); - + /// returns false. The output "SrcInt" will not have been modified, so we can + /// use this information below to update aliases. + bool JoinIntervals(CoalescerPair &CP); + /// Return true if the two specified registers belong to different register /// classes. The registers may be either phys or virt regs. bool differingRegisterClasses(unsigned RegA, unsigned RegB) const; @@ -128,8 +119,7 @@ namespace llvm { /// the source value number is defined by a copy from the destination reg /// see if we can merge these two destination reg valno# into a single /// value number, eliminating a copy. - bool AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB, - MachineInstr *CopyMI); + bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); /// HasOtherReachingDefs - Return true if there are definitions of IntB /// other than BValNo val# that can reach uses of AValno val# of IntA. @@ -140,8 +130,7 @@ namespace llvm { /// If the source value number is defined by a commutable instruction and /// its other operand is coalesced to the copy dest register, see if we /// can transform the copy into a noop by commuting the definition. - bool RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB, - MachineInstr *CopyMI); + bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); /// TrimLiveIntervalToLastUse - If there is a last use in the same basic /// block as the copy instruction, trim the ive interval to the last use @@ -155,28 +144,6 @@ namespace llvm { bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, unsigned DstSubIdx, MachineInstr *CopyMI); - /// CanCoalesceWithImpDef - Returns true if the specified copy instruction - /// from an implicit def to another register can be coalesced away. - bool CanCoalesceWithImpDef(MachineInstr *CopyMI, - LiveInterval &li, LiveInterval &ImpLi) const; - - /// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an - /// implicit_def and it is being removed. Turn all copies from this value# - /// into implicit_defs. - void TurnCopiesFromValNoToImpDefs(LiveInterval &li, VNInfo *VNI); - - /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a - /// a virtual destination register with physical source register. - bool isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, LiveInterval &SrcInt); - - /// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a - /// copy from a virtual source register to a physical destination register. - bool isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, LiveInterval &SrcInt); - /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. bool isWinToJoinCrossClass(unsigned SrcReg, @@ -185,43 +152,12 @@ namespace llvm { const TargetRegisterClass *DstRC, const TargetRegisterClass *NewRC); - /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual - /// register with a physical register, check if any of the virtual register - /// operand is a sub-register use or def. If so, make sure it won't result - /// in an illegal extract_subreg or insert_subreg instruction. - bool HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, - unsigned VirtReg, unsigned PhysReg); - - /// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce - /// an extract_subreg where dst is a physical register, e.g. - /// cl = EXTRACT_SUBREG reg1024, 1 - bool CanJoinExtractSubRegToPhysReg(unsigned DstReg, unsigned SrcReg, - unsigned SubIdx, unsigned &RealDstReg); - - /// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce - /// an insert_subreg where src is a physical register, e.g. - /// reg1024 = INSERT_SUBREG reg1024, c1, 0 - bool CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned SrcReg, - unsigned SubIdx, unsigned &RealDstReg); - - /// ValueLiveAt - Return true if the LiveRange pointed to by the given - /// iterator, or any subsequent range with the same value number, - /// is live at the given point. - bool ValueLiveAt(LiveInterval::iterator LRItr, LiveInterval::iterator LREnd, - SlotIndex defPoint) const; - - /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of - /// the specified live interval is defined by a copy from the specified - /// register. - bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR, - unsigned Reg); - /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use /// being updated is not zero, make sure to set it to the correct physical /// subregister. - void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); + void UpdateRegDefsUses(const CoalescerPair &CP); /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. /// Return true if live interval is removed. @@ -238,6 +174,10 @@ namespace llvm { /// it as well. bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); + /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the + /// VNInfo copy flag for DstReg and all aliases. + void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI); + /// lastRegisterUse - Returns the last use of the specific register between /// cycles Start and End or NULL if there are no uses. MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 059e8d6..e90869d 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -46,6 +46,8 @@ namespace { Constant *UnregisterFn; Constant *BuiltinSetjmpFn; Constant *FrameAddrFn; + Constant *StackAddrFn; + Constant *StackRestoreFn; Constant *LSDAAddrFn; Value *PersonalityFn; Constant *SelectorFn; @@ -69,7 +71,7 @@ namespace { void insertCallSiteStore(Instruction *I, int Number, Value *CallSite); void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, SwitchInst *CatchSwitch); - void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes); + void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes); bool insertSjLjEHSupport(Function &F); }; } // end anonymous namespace @@ -107,6 +109,8 @@ bool SjLjEHPass::doInitialization(Module &M) { PointerType::getUnqual(FunctionContextTy), (Type *)0); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); + StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector); @@ -175,8 +179,10 @@ static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) { /// we spill into a stack location, guaranteeing that there is nothing live /// across the unwind edge. This process also splits all critical edges /// coming out of invoke's. +/// FIXME: Move this function to a common utility file (Local.cpp?) so +/// both SjLj and LowerInvoke can use it. void SjLjEHPass:: -splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { +splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { // First step, split all critical edges from invoke instructions. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; @@ -198,16 +204,33 @@ splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { - // This is always a no-op cast because we're casting AI to AI->getType() so - // src and destination types are identical. BitCast is the only possibility. - CastInst *NC = new BitCastInst( - AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); - AI->replaceAllUsesWith(NC); - // Normally its is forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, we're - // replacing it here with the same value it was constructed with to simply - // make NC its user. - NC->setOperand(0, AI); + const Type *Ty = AI->getType(); + // Aggregate types can't be cast, but are legal argument types, so we have + // to handle them differently. We use an extract/insert pair as a + // lightweight method to achieve the same goal. + if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { + Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); + Instruction *NI = InsertValueInst::Create(AI, EI, 0); + NI->insertAfter(EI); + AI->replaceAllUsesWith(NI); + // Set the operand of the instructions back to the AllocaInst. + EI->setOperand(0, AI); + NI->setOperand(0, AI); + } else { + // This is always a no-op cast because we're casting AI to AI->getType() + // so src and destination types are identical. BitCast is the only + // possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Set the operand of the cast instruction back to the AllocaInst. + // Normally it's forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, + // we're replacing it here with the same value it was constructed with. + // We do this because the above replaceAllUsesWith() clobbered the + // operand, but we want this one to remain. + NC->setOperand(0, AI); + } } // Finally, scan the code looking for instructions with bad live ranges. @@ -266,6 +289,9 @@ splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { } // If we decided we need a spill, do it. + // FIXME: Spilling this way is overkill, as it forces all uses of + // the value to be reloaded from the stack slot, even those that aren't + // in the unwind blocks. We should be more selective. if (NeedsSpill) { ++NumSpilled; DemoteRegToStack(*Inst, true); @@ -294,22 +320,34 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // If we don't have any invokes or unwinds, there's nothing to do. if (Unwinds.empty() && Invokes.empty()) return false; - // Find the eh.selector.* and eh.exception calls. We'll use the first - // eh.selector to determine the right personality function to use. For - // SJLJ, we always use the same personality for the whole function, - // not on a per-selector basis. + // Find the eh.selector.*, eh.exception and alloca calls. + // + // Remember any allocas() that aren't in the entry block, as the + // jmpbuf saved SP will need to be updated for them. + // + // We'll use the first eh.selector to determine the right personality + // function to use. For SJLJ, we always use the same personality for the + // whole function, not on a per-selector basis. // FIXME: That's a bit ugly. Better way? SmallVector<CallInst*,16> EH_Selectors; SmallVector<CallInst*,16> EH_Exceptions; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + SmallVector<Instruction*,16> JmpbufUpdatePoints; + // Note: Skip the entry block since there's nothing there that interests + // us. eh.selector and eh.exception shouldn't ever be there, and we + // want to disregard any allocas that are there. + for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (CallInst *CI = dyn_cast<CallInst>(I)) { if (CI->getCalledFunction() == SelectorFn) { - if (!PersonalityFn) PersonalityFn = CI->getOperand(2); + if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1); EH_Selectors.push_back(CI); } else if (CI->getCalledFunction() == ExceptionFn) { EH_Exceptions.push_back(CI); + } else if (CI->getCalledFunction() == StackRestoreFn) { + JmpbufUpdatePoints.push_back(CI); } + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + JmpbufUpdatePoints.push_back(AI); } } } @@ -329,7 +367,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // we spill into a stack location, guaranteeing that there is nothing live // across the unwind edge. This process also splits all critical edges // coming out of invoke's. - splitLiveRangesLiveAcrossInvokes(Invokes); + splitLiveRangesAcrossInvokes(Invokes); BasicBlock *EntryBB = F.begin(); // Create an alloca for the incoming jump buffer ptr and the new jump buffer @@ -419,7 +457,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Populate the Function Context // 1. LSDA address // 2. Personality function address - // 3. jmpbuf (save FP and call eh.sjlj.setjmp) + // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp) // LSDA address Idxs[0] = Zero; @@ -440,31 +478,41 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { new StoreInst(PersonalityFn, PersonalityFieldPtr, true, EntryBB->getTerminator()); - // Save the frame pointer. + // Save the frame pointer. Idxs[1] = ConstantInt::get(Int32Ty, 5); - Value *FieldPtr + Value *JBufPtr = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, "jbuf_gep", EntryBB->getTerminator()); Idxs[1] = ConstantInt::get(Int32Ty, 0); - Value *ElemPtr = - GetElementPtrInst::Create(FieldPtr, Idxs, Idxs+2, "jbuf_fp_gep", + Value *FramePtr = + GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep", EntryBB->getTerminator()); Value *Val = CallInst::Create(FrameAddrFn, ConstantInt::get(Int32Ty, 0), "fp", EntryBB->getTerminator()); - new StoreInst(Val, ElemPtr, true, EntryBB->getTerminator()); - // Call the setjmp instrinsic. It fills in the rest of the jmpbuf + new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); + + // Save the stack pointer. + Idxs[1] = ConstantInt::get(Int32Ty, 2); + Value *StackPtr = + GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep", + EntryBB->getTerminator()); + + Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); + new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); + + // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. Value *SetjmpArg = - CastInst::Create(Instruction::BitCast, FieldPtr, + CastInst::Create(Instruction::BitCast, JBufPtr, Type::getInt8PtrTy(F.getContext()), "", EntryBB->getTerminator()); Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "dispatch", EntryBB->getTerminator()); - // check the return value of the setjmp. non-zero goes to dispatcher + // check the return value of the setjmp. non-zero goes to dispatcher. Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), ICmpInst::ICMP_EQ, DispatchVal, Zero, "notunwind"); @@ -509,6 +557,16 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { Unwinds[i]->eraseFromParent(); } + // Following any allocas not in the entry block, update the saved SP + // in the jmpbuf to the new value. + for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) { + Instruction *AI = JmpbufUpdatePoints[i]; + Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); + StackAddr->insertAfter(AI); + Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); + StoreStackAddr->insertAfter(StackAddr); + } + // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. for (unsigned i = 0, e = Returns.size(); i != e; ++i) diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 6110ef5..7a227cf 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -213,9 +213,11 @@ void SlotIndexes::dump() const { // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { - os << getIndex(); + os << entry().getIndex(); if (isPHI()) os << "*"; + else + os << "LudS"[getSlot()]; } // Dump a SlotIndex to stderr. diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index a7b2efe..56bcb28 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -14,18 +14,20 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <set> using namespace llvm; namespace { - enum SpillerName { trivial, standard, splitting }; + enum SpillerName { trivial, standard, splitting, inline_ }; } static cl::opt<SpillerName> @@ -35,6 +37,7 @@ spillerOpt("spiller", cl::values(clEnumVal(trivial, "trivial spiller"), clEnumVal(standard, "default spiller"), clEnumVal(splitting, "splitting spiller"), + clEnumValN(inline_, "inline", "inline spiller"), clEnumValEnd), cl::init(standard)); @@ -53,8 +56,8 @@ protected: const TargetInstrInfo *tii; const TargetRegisterInfo *tri; VirtRegMap *vrm; - - /// Construct a spiller base. + + /// Construct a spiller base. SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) : mf(mf), lis(lis), vrm(vrm) { @@ -67,7 +70,8 @@ protected: /// Add spill ranges for every use/def of the live interval, inserting loads /// immediately before each use, and stores after each def. No folding or /// remat is attempted. - std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) { + void trivialSpillEverywhere(LiveInterval *li, + std::vector<LiveInterval*> &newIntervals) { DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); assert(li->weight != HUGE_VALF && @@ -78,8 +82,6 @@ protected: DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n"); - std::vector<LiveInterval*> added; - const TargetRegisterClass *trc = mri->getRegClass(li->reg); unsigned ss = vrm->assignVirt2StackSlot(li->reg); @@ -96,7 +98,7 @@ protected: do { ++regItr; } while (regItr != mri->reg_end() && (&*regItr == mi)); - + // Collect uses & defs for this instr. SmallVector<unsigned, 2> indices; bool hasUse = false; @@ -116,7 +118,7 @@ protected: vrm->assignVirt2StackSlot(newVReg, ss); LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); newLI->weight = HUGE_VALF; - + // Update the reg operands & kill flags. for (unsigned i = 0; i < indices.size(); ++i) { unsigned mopIdx = indices[i]; @@ -136,10 +138,10 @@ protected: MachineInstr *loadInstr(prior(miItr)); SlotIndex loadIndex = lis->InsertMachineInstrInMaps(loadInstr).getDefIndex(); + vrm->addSpillSlotUse(ss, loadInstr); SlotIndex endIndex = loadIndex.getNextIndex(); VNInfo *loadVNI = newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator()); - loadVNI->addKill(endIndex); newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); } @@ -150,17 +152,15 @@ protected: MachineInstr *storeInstr(llvm::next(miItr)); SlotIndex storeIndex = lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); + vrm->addSpillSlotUse(ss, storeInstr); SlotIndex beginIndex = storeIndex.getPrevIndex(); VNInfo *storeVNI = newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator()); - storeVNI->addKill(storeIndex); newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); } - added.push_back(newLI); + newIntervals.push_back(newLI); } - - return added; } }; @@ -176,11 +176,12 @@ public: TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) : SpillerBase(mf, lis, vrm) {} - std::vector<LiveInterval*> spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex*) { + void spill(LiveInterval *li, + std::vector<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &, + SlotIndex*) { // Ignore spillIs - we don't use it. - return trivialSpillEverywhere(li); + trivialSpillEverywhere(li, newIntervals); } }; @@ -200,10 +201,13 @@ public: : lis(lis), loopInfo(loopInfo), vrm(vrm) {} /// Falls back on LiveIntervals::addIntervalsForSpills. - std::vector<LiveInterval*> spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex*) { - return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); + void spill(LiveInterval *li, + std::vector<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex*) { + std::vector<LiveInterval*> added = + lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); + newIntervals.insert(newIntervals.end(), added.begin(), added.end()); } }; @@ -214,7 +218,7 @@ namespace { /// When a call to spill is placed this spiller will first try to break the /// interval up into its component values (one new interval per value). /// If this fails, or if a call is placed to spill a previously split interval -/// then the spiller falls back on the standard spilling mechanism. +/// then the spiller falls back on the standard spilling mechanism. class SplittingSpiller : public StandardSpiller { public: SplittingSpiller(MachineFunction *mf, LiveIntervals *lis, @@ -226,22 +230,21 @@ public: tri = mf->getTarget().getRegisterInfo(); } - std::vector<LiveInterval*> spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex *earliestStart) { - - if (worthTryingToSplit(li)) { - return tryVNISplit(li, earliestStart); - } - // else - return StandardSpiller::spill(li, spillIs, earliestStart); + void spill(LiveInterval *li, + std::vector<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex *earliestStart) { + if (worthTryingToSplit(li)) + tryVNISplit(li, earliestStart); + else + StandardSpiller::spill(li, newIntervals, spillIs, earliestStart); } private: MachineRegisterInfo *mri; const TargetInstrInfo *tii; - const TargetRegisterInfo *tri; + const TargetRegisterInfo *tri; DenseSet<LiveInterval*> alreadySplit; bool worthTryingToSplit(LiveInterval *li) const { @@ -258,18 +261,18 @@ private: SmallVector<VNInfo*, 4> vnis; std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis)); - + for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(), vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) { VNInfo *vni = *vniItr; - - // Skip unused VNIs, or VNIs with no kills. - if (vni->isUnused() || vni->kills.empty()) + + // Skip unused VNIs. + if (vni->isUnused()) continue; DEBUG(dbgs() << " Extracted Val #" << vni->id << " as "); LiveInterval *splitInterval = extractVNI(li, vni); - + if (splitInterval != 0) { DEBUG(dbgs() << *splitInterval << "\n"); added.push_back(splitInterval); @@ -281,12 +284,12 @@ private: } else { DEBUG(dbgs() << "0\n"); } - } + } DEBUG(dbgs() << "Original LI: " << *li << "\n"); // If there original interval still contains some live ranges - // add it to added and alreadySplit. + // add it to added and alreadySplit. if (!li->empty()) { added.push_back(li); alreadySplit.insert(li); @@ -302,16 +305,15 @@ private: /// Extract the given value number from the interval. LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const { assert(vni->isDefAccurate() || vni->isPHIDef()); - assert(!vni->kills.empty()); - // Create a new vreg and live interval, copy VNI kills & ranges over. + // Create a new vreg and live interval, copy VNI ranges over. const TargetRegisterClass *trc = mri->getRegClass(li->reg); unsigned newVReg = mri->createVirtualRegister(trc); vrm->grow(); LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator()); - // Start by copying all live ranges in the VN to the new interval. + // Start by copying all live ranges in the VN to the new interval. for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end(); rItr != rEnd; ++rItr) { if (rItr->valno == vni) { @@ -319,7 +321,7 @@ private: } } - // Erase the old VNI & ranges. + // Erase the old VNI & ranges. li->removeValNo(vni); // Collect all current uses of the register belonging to the given VNI. @@ -336,15 +338,13 @@ private: // Insert a copy at the start of the MBB. The range proceeding the // copy will be attached to the original LiveInterval. MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def); - tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = defMBB->begin(); - copyMI->addRegisterKilled(li->reg, tri); + MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB), 0, false, lis->getVNInfoAllocator()); phiDefVNI->setIsPHIDef(true); - phiDefVNI->addKill(copyIdx.getDefIndex()); li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI)); LiveRange *oldPHIDefRange = newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB)); @@ -367,8 +367,8 @@ private: newVNI->setIsPHIDef(false); // not a PHI def anymore. newVNI->setIsDefAccurate(true); } else { - // non-PHI def. Rename the def. If it's two-addr that means renaming the use - // and inserting a new copy too. + // non-PHI def. Rename the def. If it's two-addr that means renaming the + // use and inserting a new copy too. MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def); // We'll rename this now, so we can remove it from uses. uses.erase(defInst); @@ -384,38 +384,26 @@ private: twoAddrUseIsUndef = true; } } - + SlotIndex defIdx = lis->getInstructionIndex(defInst); newVNI->def = defIdx.getDefIndex(); if (isTwoAddr && !twoAddrUseIsUndef) { MachineBasicBlock *defMBB = defInst->getParent(); - tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst)); + MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - copyMI->addRegisterKilled(li->reg, tri); LiveRange *origUseRange = li->getLiveRangeContaining(newVNI->def.getUseIndex()); - VNInfo *origUseVNI = origUseRange->valno; origUseRange->end = copyIdx.getDefIndex(); - bool updatedKills = false; - for (unsigned k = 0; k < origUseVNI->kills.size(); ++k) { - if (origUseVNI->kills[k] == defIdx.getDefIndex()) { - origUseVNI->kills[k] = copyIdx.getDefIndex(); - updatedKills = true; - break; - } - } - assert(updatedKills && "Failed to update VNI kill list."); VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI, true, lis->getVNInfoAllocator()); - copyVNI->addKill(defIdx.getDefIndex()); LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI); newLI->addRange(copyRange); - } + } } - + for (std::set<MachineInstr*>::iterator usesItr = uses.begin(), usesEnd = uses.end(); usesItr != usesEnd; ++usesItr) { @@ -435,7 +423,7 @@ private: // Check if this instr is two address. unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg); bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx); - + // Rename uses (and defs for two-address instrs). for (unsigned i = 0; i < useInst->getNumOperands(); ++i) { MachineOperand &mo = useInst->getOperand(i); @@ -451,10 +439,9 @@ private: // reg. MachineBasicBlock *useMBB = useInst->getParent(); MachineBasicBlock::iterator useItr(useInst); - tii->copyRegToReg(*useMBB, llvm::next(useItr), li->reg, newVReg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = llvm::next(useItr); - copyMI->addRegisterKilled(newVReg, tri); + MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); // Change the old two-address defined range & vni to start at @@ -470,56 +457,44 @@ private: VNInfo *copyVNI = newLI->getNextValue(useIdx.getDefIndex(), 0, true, lis->getVNInfoAllocator()); - copyVNI->addKill(copyIdx.getDefIndex()); LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI); newLI->addRange(copyRange); } } - - // Iterate over any PHI kills - we'll need to insert new copies for them. - for (VNInfo::KillSet::iterator - killItr = newVNI->kills.begin(), killEnd = newVNI->kills.end(); - killItr != killEnd; ++killItr) { - SlotIndex killIdx(*killItr); - if (killItr->isPHI()) { - MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx); - LiveRange *oldKillRange = - newLI->getLiveRangeContaining(killIdx); - - assert(oldKillRange != 0 && "No kill range?"); - - tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(), - li->reg, newVReg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = prior(killMBB->getFirstTerminator()); - copyMI->addRegisterKilled(newVReg, tri); - SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - // Save the current end. We may need it to add a new range if the - // current range runs of the end of the MBB. - SlotIndex newKillRangeEnd = oldKillRange->end; - oldKillRange->end = copyIdx.getDefIndex(); + // Iterate over any PHI kills - we'll need to insert new copies for them. + for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end(); + LRI != LRE; ++LRI) { + if (LRI->valno != newVNI || LRI->end.isPHI()) + continue; + SlotIndex killIdx = LRI->end; + MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx); + MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(), + DebugLoc(), tii->get(TargetOpcode::COPY), + li->reg) + .addReg(newVReg, RegState::Kill); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) { - assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) && - "PHI kill range doesn't reach kill-block end. Not sane."); - newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB), - newKillRangeEnd, newVNI)); - } + // Save the current end. We may need it to add a new range if the + // current range runs of the end of the MBB. + SlotIndex newKillRangeEnd = LRI->end; + LRI->end = copyIdx.getDefIndex(); - *killItr = oldKillRange->end; - VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(), - copyMI, true, - lis->getVNInfoAllocator()); - newKillVNI->addKill(lis->getMBBTerminatorGap(killMBB)); - newKillVNI->setHasPHIKill(true); - li->addRange(LiveRange(copyIdx.getDefIndex(), - lis->getMBBEndIdx(killMBB), - newKillVNI)); + if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) { + assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) && + "PHI kill range doesn't reach kill-block end. Not sane."); + newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB), + newKillRangeEnd, newVNI)); } + VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(), + copyMI, true, + lis->getVNInfoAllocator()); + newKillVNI->setHasPHIKill(true); + li->addRange(LiveRange(copyIdx.getDefIndex(), + lis->getMBBEndIdx(killMBB), + newKillVNI)); } - newVNI->setHasPHIKill(false); return newLI; @@ -530,6 +505,13 @@ private: } // end anonymous namespace +namespace llvm { +Spiller *createInlineSpiller(MachineFunction*, + LiveIntervals*, + const MachineLoopInfo*, + VirtRegMap*); +} + llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, const MachineLoopInfo *loopInfo, VirtRegMap *vrm) { @@ -538,5 +520,6 @@ llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, case trivial: return new TrivialSpiller(mf, lis, vrm); case standard: return new StandardSpiller(lis, loopInfo, vrm); case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); + case inline_: return createInlineSpiller(mf, lis, loopInfo, vrm); } } diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index dda52e8..450447b 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -33,11 +33,19 @@ namespace llvm { public: virtual ~Spiller() = 0; - /// Spill the given live range. The method used will depend on the Spiller - /// implementation selected. - virtual std::vector<LiveInterval*> spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex *earliestIndex = 0) = 0; + /// spill - Spill the given live interval. The method used will depend on + /// the Spiller implementation selected. + /// + /// @param li The live interval to be spilled. + /// @param spillIs A list of intervals that are about to be spilled, + /// and so cannot be used for remat etc. + /// @param newIntervals The newly created intervals will be appended here. + /// @param earliestIndex The earliest point for splitting. (OK, it's another + /// pointer to the allocator guts). + virtual void spill(LiveInterval *li, + std::vector<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex *earliestIndex = 0) = 0; }; diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 8a6a727..ca5c28c 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -136,7 +136,7 @@ bool StackProtector::RequiresStackProtector() const { bool StackProtector::InsertStackProtectors() { BasicBlock *FailBB = 0; // The basic block to jump to if check fails. AllocaInst *AI = 0; // Place on stack that stores the stack guard. - Constant *StackGuardVar = 0; // The stack guard variable. + Value *StackGuardVar = 0; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { BasicBlock *BB = I++; @@ -153,9 +153,17 @@ bool StackProtector::InsertStackProtectors() { // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) // - PointerType *PtrTy = PointerType::getUnqual( - Type::getInt8Ty(RI->getContext())); - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + unsigned AddressSpace, Offset; + if (TLI->getStackCookieLocation(AddressSpace, Offset)) { + Constant *OffsetVal = + ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); + + StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, + PointerType::get(PtrTy, AddressSpace)); + } else { + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + } BasicBlock &Entry = F->getEntryBlock(); Instruction *InsPt = &Entry.front(); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 7f3b452..eff3c33 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -508,8 +509,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, // Abort the use is actually a sub-register def. We don't have enough // information to figure out if it is really legal. - if (MO.getSubReg() || MII->isExtractSubreg() || - MII->isInsertSubreg() || MII->isSubregToReg()) + if (MO.getSubReg() || MII->isSubregToReg()) return false; const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); @@ -571,7 +571,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, // Abort the use is actually a sub-register use. We don't have enough // information to figure out if it is really legal. - if (MO.getSubReg() || MII->isExtractSubreg()) + if (MO.getSubReg()) return false; const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); @@ -610,8 +610,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, DEBUG(MI->dump()); ++NumLoadElim; } else { - TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC, - MI->getDebugLoc()); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), + DstReg).addReg(Reg); ++NumRegRepl; } @@ -627,8 +627,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, DEBUG(MI->dump()); ++NumStoreElim; } else { - TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC, - MI->getDebugLoc()); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(SrcReg); ++NumRegRepl; } diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 142398c..59315cf 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterCoalescer.h" @@ -695,9 +696,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, // Insert copy from curr.second to a temporary at // the Phi defining curr.second MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second); - TII->copyRegToReg(*PI->getParent(), PI, t, - curr.second, RC, RC, DebugLoc()); - + BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY), + t).addReg(curr.second); DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t << "\n"); @@ -712,8 +712,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, } // Insert copy from map[curr.first] to curr.second - TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, - map[curr.first], RC, RC, DebugLoc()); + BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]); map[curr.first] = curr.second; DEBUG(dbgs() << "Inserted copy from " << curr.first << " to " << curr.second << "\n"); @@ -761,8 +761,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, // Insert a copy from dest to a new temporary t at the end of b unsigned t = MF->getRegInfo().createVirtualRegister(RC); - TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t, - curr.second, RC, RC, DebugLoc()); + BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), t).addReg(curr.second); map[curr.second] = t; MachineBasicBlock::iterator TI = MBB->getFirstTerminator(); @@ -830,9 +830,6 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg()); VNInfo* FirstVN = *Int.vni_begin(); FirstVN->setHasPHIKill(false); - if (I->getOperand(i).isKill()) - FirstVN->addKill(LI.getInstructionIndex(I).getUseIndex()); - LiveRange LR (LI.getMBBStartIdx(I->getParent()), LI.getInstructionIndex(I).getUseIndex().getNextSlot(), FirstVN); @@ -959,9 +956,8 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { } else { // Insert a last-minute copy if a conflict was detected. const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); - const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first); - TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(), - I->first, SI->first, RC, RC, DebugLoc()); + BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), I->first).addReg(SI->first); LI.renumber(); diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index f2e2a76..075db80 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/Target/TargetInstrInfo.h" @@ -559,11 +560,9 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); - TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first, - CopyInfos[i].second, RC,RC, DebugLoc()); - MachineInstr *CopyMI = prior(Loc); - Copies.push_back(CopyMI); + Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first).addReg(CopyInfos[i].second)); } NumInstrDups += TailBB->size() - 1; // subtract one for removed branch @@ -618,11 +617,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, } MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); - TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first, - CopyInfos[i].second, RC, RC, DebugLoc()); - MachineInstr *CopyMI = prior(Loc); - Copies.push_back(CopyMI); + Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first) + .addReg(CopyInfos[i].second)); } } else { // No PHIs to worry about, just splice the instructions over. diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 0ad6619..cdacb98 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallVector.h" @@ -21,11 +22,34 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PostRAHazardRecognizer.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. +void +TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + + // Remove all the old successors of MBB from the CFG. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_begin()); + + // Remove all the dead instructions from the end of MBB. + MBB->erase(Tail, MBB->end()); + + // If MBB isn't immediately before MBB, insert a branch to it. + if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) + InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(), + Tail->getDebugLoc()); + MBB->addSuccessor(NewDest); +} + // commuteInstruction - The default implementation of this method just exchanges // the two operands returned by findCommutedOpIndices. MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, @@ -136,17 +160,9 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo &TRI) const { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MachineOperand &MO = MI->getOperand(0); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { - MO.setReg(DestReg); - MO.setSubReg(SubIdx); - } else if (SubIdx) { - MO.setReg(TRI->getSubReg(DestReg, SubIdx)); - } else { - MO.setReg(DestReg); - } + MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); MBB.insert(I, MI); } @@ -175,6 +191,47 @@ TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { return FnSize; } +// If the COPY instruction in MI can be folded to a stack operation, return +// the register class to use. +static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, + unsigned FoldIdx) { + assert(MI->isCopy() && "MI must be a COPY instruction"); + if (MI->getNumOperands() != 2) + return 0; + assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); + + const MachineOperand &FoldOp = MI->getOperand(FoldIdx); + const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx); + + if (FoldOp.getSubReg() || LiveOp.getSubReg()) + return 0; + + unsigned FoldReg = FoldOp.getReg(); + unsigned LiveReg = LiveOp.getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && + "Cannot fold physregs"); + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); + + if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) + return RC->contains(LiveOp.getReg()) ? RC : 0; + + const TargetRegisterClass *LiveRC = MRI.getRegClass(LiveReg); + if (RC == LiveRC || RC->hasSubClass(LiveRC)) + return RC; + + // FIXME: Allow folding when register classes are memory compatible. + return 0; +} + +bool TargetInstrInfoImpl:: +canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const { + return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); +} + /// foldMemoryOperand - Attempt to fold a load or store of the specified stack /// slot into the specified machine instruction for the specified operand(s). /// If this is possible, a new instruction is returned with the specified @@ -182,10 +239,9 @@ TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { /// removing the old instruction and adding the new one in the instruction /// stream. MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, - MachineInstr* MI, +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { + int FI) const { unsigned Flags = 0; for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (MI->getOperand(Ops[i]).isDef()) @@ -193,34 +249,56 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, else Flags |= MachineMemOperand::MOLoad; + MachineBasicBlock *MBB = MI->getParent(); + assert(MBB && "foldMemoryOperand needs an inserted instruction"); + MachineFunction &MF = *MBB->getParent(); + // Ask the target to do the actual folding. - MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); - if (!NewMI) return 0; + if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + // Add a memory operand, foldMemoryOperandImpl doesn't do that. + assert((!(Flags & MachineMemOperand::MOStore) || + NewMI->getDesc().mayStore()) && + "Folded a def to a non-store!"); + assert((!(Flags & MachineMemOperand::MOLoad) || + NewMI->getDesc().mayLoad()) && + "Folded a use to a non-load!"); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FI) != -1); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + Flags, /*Offset=*/0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + NewMI->addMemOperand(MF, MMO); - assert((!(Flags & MachineMemOperand::MOStore) || - NewMI->getDesc().mayStore()) && - "Folded a def to a non-store!"); - assert((!(Flags & MachineMemOperand::MOLoad) || - NewMI->getDesc().mayLoad()) && - "Folded a use to a non-load!"); - const MachineFrameInfo &MFI = *MF.getFrameInfo(); - assert(MFI.getObjectOffset(FrameIndex) != -1); - MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIndex), - Flags, /*Offset=*/0, - MFI.getObjectSize(FrameIndex), - MFI.getObjectAlignment(FrameIndex)); - NewMI->addMemOperand(MF, MMO); + // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI. + return MBB->insert(MI, NewMI); + } - return NewMI; + // Straight COPY may fold as load/store. + if (!MI->isCopy() || Ops.size() != 1) + return 0; + + const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); + if (!RC) + return 0; + + const MachineOperand &MO = MI->getOperand(1-Ops[0]); + MachineBasicBlock::iterator Pos = MI; + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + + if (Flags == MachineMemOperand::MOStore) + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); + else + loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI); + return --Pos; } /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, - MachineInstr* MI, +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl<unsigned> &Ops, MachineInstr* LoadMI) const { assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!"); @@ -228,11 +306,15 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, for (unsigned i = 0, e = Ops.size(); i != e; ++i) assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); #endif + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); // Ask the target to do the actual folding. MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); if (!NewMI) return 0; + NewMI = MBB.insert(MI, NewMI); + // Copy the memoperands from the load to the folded instruction. NewMI->setMemRefs(LoadMI->memoperands_begin(), LoadMI->memoperands_end()); @@ -240,11 +322,9 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, return NewMI; } -bool -TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr * - MI, - AliasAnalysis * - AA) const { +bool TargetInstrInfo:: +isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, + AliasAnalysis *AA) const { const MachineFunction &MF = *MI->getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetMachine &TM = MF.getTarget(); @@ -324,3 +404,31 @@ TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr * // Everything checked out. return true; } + +/// isSchedulingBoundary - Test if the given instruction should be +/// considered a scheduling boundary. This primarily includes labels +/// and terminators. +bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const{ + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel()) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); + if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore())) + return true; + + return false; +} + +// Default implementation of CreateTargetPostRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfoImpl:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { + return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II); +} diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 71ad3fb..a80cfc4 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -825,32 +825,32 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, TargetLoweringObjectFile::Initialize(Ctx, TM); TextSection = getContext().getCOFFSection(".text", - MCSectionCOFF::IMAGE_SCN_CNT_CODE | - MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_CNT_CODE | + COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getText()); DataSection = getContext().getCOFFSection(".data", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); ReadOnlySection = getContext().getCOFFSection(".rdata", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); StaticCtorSection = getContext().getCOFFSection(".ctors", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); StaticDtorSection = getContext().getCOFFSection(".dtors", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); // FIXME: We're emitting LSDA info into a readonly section on COFF, even @@ -859,76 +859,76 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, // adjusted or this should be a data section. LSDASection = getContext().getCOFFSection(".gcc_except_table", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); EHFrameSection = getContext().getCOFFSection(".eh_frame", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); // Debug info. DwarfAbbrevSection = getContext().getCOFFSection(".debug_abbrev", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfInfoSection = getContext().getCOFFSection(".debug_info", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfLineSection = getContext().getCOFFSection(".debug_line", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfFrameSection = getContext().getCOFFSection(".debug_frame", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfPubNamesSection = getContext().getCOFFSection(".debug_pubnames", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfPubTypesSection = getContext().getCOFFSection(".debug_pubtypes", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfStrSection = getContext().getCOFFSection(".debug_str", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfLocSection = getContext().getCOFFSection(".debug_loc", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfARangesSection = getContext().getCOFFSection(".debug_aranges", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfRangesSection = getContext().getCOFFSection(".debug_ranges", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfMacroInfoSection = getContext().getCOFFSection(".debug_macinfo", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DrectveSection = getContext().getCOFFSection(".drectve", - MCSectionCOFF::IMAGE_SCN_LNK_INFO, + COFF::IMAGE_SCN_LNK_INFO, SectionKind::getMetadata()); } @@ -936,27 +936,27 @@ static unsigned getCOFFSectionFlags(SectionKind K) { unsigned Flags = 0; - if (!K.isMetadata()) + if (K.isMetadata()) Flags |= - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE; + COFF::IMAGE_SCN_MEM_DISCARDABLE; else if (K.isText()) Flags |= - MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE | - MCSectionCOFF::IMAGE_SCN_CNT_CODE; + COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_CNT_CODE; else if (K.isBSS ()) Flags |= - MCSectionCOFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE; + COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; else if (K.isReadOnly()) Flags |= - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ; + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ; else if (K.isWriteable()) Flags |= - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE; + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; return Flags; } @@ -995,10 +995,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, unsigned Characteristics = getCOFFSectionFlags(Kind); - Characteristics |= MCSectionCOFF::IMAGE_SCN_LNK_COMDAT; + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; return getContext().getCOFFSection(Name.str(), Characteristics, - MCSectionCOFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind); + COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind); } if (Kind.isText()) diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 3d10dc1..5649143 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -381,7 +382,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, DstReg = 0; unsigned SrcSubIdx, DstSubIdx; if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (MI.isExtractSubreg()) { + if (MI.isCopy()) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); } else if (MI.isInsertSubreg()) { @@ -897,6 +898,108 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } } } + + // If this is an instruction with a load folded into it, try unfolding + // the load, e.g. avoid this: + // movq %rdx, %rcx + // addq (%rax), %rcx + // in favor of this: + // movq (%rax), %rcx + // addq %rdx, %rcx + // because it's preferable to schedule a load than a register copy. + if (TID.mayLoad() && !regBKilled) { + // Determine if a load can be unfolded. + unsigned LoadRegIndex; + unsigned NewOpc = + TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(), + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, + &LoadRegIndex); + if (NewOpc != 0) { + const TargetInstrDesc &UnfoldTID = TII->get(NewOpc); + if (UnfoldTID.getNumDefs() == 1) { + MachineFunction &MF = *mbbi->getParent(); + + // Unfold the load. + DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); + const TargetRegisterClass *RC = + UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI); + unsigned Reg = MRI->createVirtualRegister(RC); + SmallVector<MachineInstr *, 2> NewMIs; + if (!TII->unfoldMemoryOperand(MF, mi, Reg, + /*UnfoldLoad=*/true,/*UnfoldStore=*/false, + NewMIs)) { + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + return false; + } + assert(NewMIs.size() == 2 && + "Unfolded a load into multiple instructions!"); + // The load was previously folded, so this is the only use. + NewMIs[1]->addRegisterKilled(Reg, TRI); + + // Tentatively insert the instructions into the block so that they + // look "normal" to the transformation logic. + mbbi->insert(mi, NewMIs[0]); + mbbi->insert(mi, NewMIs[1]); + + DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] + << "2addr: NEW INST: " << *NewMIs[1]); + + // Transform the instruction, now that it no longer has a load. + unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); + unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); + MachineBasicBlock::iterator NewMI = NewMIs[1]; + bool TransformSuccess = + TryInstructionTransform(NewMI, mi, mbbi, + NewSrcIdx, NewDstIdx, Dist); + if (TransformSuccess || + NewMIs[1]->getOperand(NewSrcIdx).isKill()) { + // Success, or at least we made an improvement. Keep the unfolded + // instructions and discard the original. + if (LV) { + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() != 0 && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isUse()) { + if (MO.isKill()) { + if (NewMIs[0]->killsRegister(MO.getReg())) + LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]); + else { + assert(NewMIs[1]->killsRegister(MO.getReg()) && + "Kill missing after load unfold!"); + LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]); + } + } + } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) { + if (NewMIs[1]->registerDefIsDead(MO.getReg())) + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); + else { + assert(NewMIs[0]->registerDefIsDead(MO.getReg()) && + "Dead flag missing after load unfold!"); + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]); + } + } + } + } + LV->addVirtualRegisterKilled(Reg, NewMIs[1]); + } + mi->eraseFromParent(); + mi = NewMIs[1]; + if (TransformSuccess) + return true; + } else { + // Transforming didn't eliminate the tie and didn't lead to an + // improvement. Clean up the unfolded instructions and keep the + // original. + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + NewMIs[0]->eraseFromParent(); + NewMIs[1]->eraseFromParent(); + } + } + } + } + return false; } @@ -1047,14 +1150,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); - TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); + TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI); ReMatRegs.set(regB); ++NumReMats; } else { - bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc, - mi->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Unable to issue a copy instruction!\n"); + BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY), + regA).addReg(regB); } MachineBasicBlock::iterator prevMI = prior(mi); @@ -1104,12 +1205,30 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } } } - + + // Schedule the source copy / remat inserted to form two-address + // instruction. FIXME: Does it matter the distance map may not be + // accurate after it's scheduled? + TII->scheduleTwoAddrSource(prior(mi), mi, *TRI); + MadeChange = true; DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } + // Clear TiedOperands here instead of at the top of the loop // since most instructions do not have tied operands. TiedOperands.clear(); @@ -1136,14 +1255,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { static void UpdateRegSequenceSrcs(unsigned SrcReg, unsigned DstReg, unsigned SubIdx, - MachineRegisterInfo *MRI) { + MachineRegisterInfo *MRI, + const TargetRegisterInfo &TRI) { for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), RE = MRI->reg_end(); RI != RE; ) { MachineOperand &MO = RI.getOperand(); ++RI; - MO.setReg(DstReg); - assert(MO.getSubReg() == 0); - MO.setSubReg(SubIdx); + MO.substVirtReg(DstReg, SubIdx, TRI); } } @@ -1165,55 +1283,102 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, if (!Seen.insert(SrcReg)) continue; - // If there are no other uses than extract_subreg which feed into + // Check that the instructions are all in the same basic block. + MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg); + MachineInstr *DstDefMI = MRI->getVRegDef(DstReg); + if (SrcDefMI->getParent() != DstDefMI->getParent()) + continue; + + // If there are no other uses than copies which feed into // the reg_sequence, then we might be able to coalesce them. bool CanCoalesce = true; - SmallVector<unsigned, 4> SubIndices; + SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; - if (!UseMI->isExtractSubreg() || - UseMI->getOperand(0).getReg() != DstReg) { + if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) { CanCoalesce = false; break; } - SubIndices.push_back(UseMI->getOperand(2).getImm()); + SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg()); + DstSubIndices.push_back(UseMI->getOperand(0).getSubReg()); } - if (!CanCoalesce || SubIndices.size() < 2) + if (!CanCoalesce || SrcSubIndices.size() < 2) continue; - std::sort(SubIndices.begin(), SubIndices.end()); - unsigned NewSubIdx = 0; - if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices, - NewSubIdx)) { - bool Proceed = true; - if (NewSubIdx) - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - RE = MRI->reg_end(); RI != RE; ) { - MachineOperand &MO = RI.getOperand(); - ++RI; - // FIXME: If the sub-registers do not combine to the whole - // super-register, i.e. NewSubIdx != 0, and any of the use has a - // sub-register index, then abort the coalescing attempt. - if (MO.getSubReg()) { - Proceed = false; - break; - } - MO.setReg(DstReg); - MO.setSubReg(NewSubIdx); - } - if (Proceed) - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - RE = MRI->reg_end(); RI != RE; ) { - MachineOperand &MO = RI.getOperand(); - ++RI; - MO.setReg(DstReg); - if (NewSubIdx) - MO.setSubReg(NewSubIdx); - } + // Check that the source subregisters can be combined. + std::sort(SrcSubIndices.begin(), SrcSubIndices.end()); + unsigned NewSrcSubIdx = 0; + if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices, + NewSrcSubIdx)) + continue; + + // Check that the destination subregisters can also be combined. + std::sort(DstSubIndices.begin(), DstSubIndices.end()); + unsigned NewDstSubIdx = 0; + if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices, + NewDstSubIdx)) + continue; + + // If neither source nor destination can be combined to the full register, + // just give up. This could be improved if it ever matters. + if (NewSrcSubIdx != 0 && NewDstSubIdx != 0) + continue; + + // Now that we know that all the uses are extract_subregs and that those + // subregs can somehow be combined, scan all the extract_subregs again to + // make sure the subregs are in the right order and can be composed. + MachineInstr *SomeMI = 0; + CanCoalesce = true; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + assert(UseMI->isCopy()); + unsigned DstSubIdx = UseMI->getOperand(0).getSubReg(); + unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg(); + assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination"); + if ((NewDstSubIdx == 0 && + TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) || + (NewSrcSubIdx == 0 && + TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) { + CanCoalesce = false; + break; + } + // Keep track of one of the uses. + SomeMI = UseMI; + } + if (!CanCoalesce) + continue; + + // Insert a copy to replace the original. + MachineBasicBlock::iterator InsertLoc = SomeMI; + MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, + SomeMI->getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, NewDstSubIdx) + .addReg(SrcReg, 0, NewSrcSubIdx); + + // Remove all the old extract instructions. + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ) { + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI == CopyMI) + continue; + assert(UseMI->isCopy()); + // Move any kills to the new copy or extract instruction. + if (UseMI->getOperand(1).isKill()) { + CopyMI->getOperand(1).setIsKill(); + if (LV) + // Update live variables + LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI); } + UseMI->eraseFromParent(); + } } } @@ -1268,15 +1433,13 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { } IsImpDef = false; - // Remember EXTRACT_SUBREG sources. These might be candidate for - // coalescing. - if (DefMI->isExtractSubreg()) + // Remember COPY sources. These might be candidate for coalescing. + if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg()) RealSrcs.push_back(DefMI->getOperand(1).getReg()); - if (!Seen.insert(SrcReg) || - MI->getParent() != DefMI->getParent() || - !MI->getOperand(i).isKill() || - HasOtherRegSequenceUses(SrcReg, MI, MRI)) { + bool isKill = MI->getOperand(i).isKill(); + if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() || + !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) { // REG_SEQUENCE cannot have duplicated operands, add a copy. // Also add an copy if the source is live-in the block. We don't want // to end up with a partial-redef of a livein, e.g. @@ -1292,30 +1455,23 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { // If the REG_SEQUENCE doesn't kill its source, keeping live variables // correctly up to date becomes very difficult. Insert a copy. // - const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - unsigned NewReg = MRI->createVirtualRegister(RC); MachineBasicBlock::iterator InsertLoc = MI; - bool Emitted = - TII->copyRegToReg(*MI->getParent(), InsertLoc, NewReg, SrcReg, RC, RC, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Unable to issue a copy instruction!\n"); - MI->getOperand(i).setReg(NewReg); - if (MI->getOperand(i).isKill()) { - MachineBasicBlock::iterator CopyMI = prior(InsertLoc); - MachineOperand *KillMO = CopyMI->findRegisterUseOperand(SrcReg); - KillMO->setIsKill(); - if (LV) - // Update live variables - LV->replaceKillInstruction(SrcReg, MI, &*CopyMI); - } + MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, + MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm()) + .addReg(SrcReg, getKillRegState(isKill)); + MI->getOperand(i).setReg(0); + if (LV && isKill) + LV->replaceKillInstruction(SrcReg, MI, CopyMI); + DEBUG(dbgs() << "Inserted: " << *CopyMI); } } for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { unsigned SrcReg = MI->getOperand(i).getReg(); + if (!SrcReg) continue; unsigned SubIdx = MI->getOperand(i+1).getImm(); - UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI); + UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI); } if (IsImpDef) { @@ -1328,8 +1484,11 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { MI->eraseFromParent(); } - // Try coalescing some EXTRACT_SUBREG instructions. - CoalesceExtSubRegs(RealSrcs, DstReg); + // Try coalescing some EXTRACT_SUBREG instructions. This can create + // INSERT_SUBREG instructions that must have <undef> flags added by + // LiveIntervalAnalysis, so only run it when LiveVariables is available. + if (LV) + CoalesceExtSubRegs(RealSrcs, DstReg); } RegSequences.clear(); diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 871d836..57a1500 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -667,8 +667,7 @@ static void ReMaterialize(MachineBasicBlock &MBB, assert(TID.getNumDefs() == 1 && "Don't know how to remat instructions that define > 1 values!"); #endif - TII->reMaterialize(MBB, MII, DestReg, - ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI, TRI); + TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI); MachineInstr *NewMI = prior(MII); for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); @@ -769,7 +768,7 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end(); I != E; ++I) { unsigned Reg = I->first; - const TargetRegisterClass* RC = TRI->getPhysicalRegisterRegClass(Reg); + const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg); // FIXME: A temporary workaround. We can't reuse available value if it's // not safe to move the def of the virtual register's class. e.g. // X86::RFP* register classes. Do not add it as a live-in. @@ -1022,7 +1021,7 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, for (unsigned i = 0, e = Kills.size(); i != e; ++i) { unsigned Kill = Kills[i]; if (!Defs[Kill] && !Uses[Kill] && - TRI->getPhysicalRegisterRegClass(Kill) == RC) + RC->contains(Kill)) return Kill; } for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { @@ -1410,25 +1409,25 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII, if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) { assert(NewMIs.size() == 1); MachineInstr *NewMI = NewMIs.back(); + MBB->insert(MII, NewMI); NewMIs.clear(); int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false); assert(Idx != -1); SmallVector<unsigned, 1> Ops; Ops.push_back(Idx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS); + MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS); + NewMI->eraseFromParent(); if (FoldedMI) { VRM->addSpillSlotUse(SS, FoldedMI); if (!VRM->hasPhys(UnfoldVR)) VRM->assignVirt2Phys(UnfoldVR, UnfoldPR); VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); - MII = MBB->insert(MII, FoldedMI); + MII = FoldedMI; InvalidateKills(MI, TRI, RegKills, KillOps); VRM->RemoveMachineInstrFromMaps(&MI); MBB->erase(&MI); - MF.DeleteMachineInstr(NewMI); return true; } - MF.DeleteMachineInstr(NewMI); } } @@ -1480,7 +1479,6 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, if (MII == MBB->begin() || !MII->killsRegister(SrcReg)) return false; - MachineFunction &MF = *MBB->getParent(); MachineInstr &MI = *MII; MachineBasicBlock::iterator DefMII = prior(MII); MachineInstr *DefMI = DefMII; @@ -1511,11 +1509,12 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true); if (!CommutedMI) return false; + MBB->insert(MII, CommutedMI); SmallVector<unsigned, 1> Ops; Ops.push_back(NewDstIdx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS); + MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS); // Not needed since foldMemoryOperand returns new MI. - MF.DeleteMachineInstr(CommutedMI); + CommutedMI->eraseFromParent(); if (!FoldedMI) return false; @@ -1528,7 +1527,7 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, MachineInstr *StoreMI = MII; VRM->addSpillSlotUse(SS, StoreMI); VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - MII = MBB->insert(MII, FoldedMI); // Update MII to backtrack. + MII = FoldedMI; // Update MII to backtrack. // Delete all 3 old instructions. InvalidateKills(*ReloadMI, TRI, RegKills, KillOps); @@ -1704,7 +1703,7 @@ bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) { std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI); for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) { unsigned PhysReg = EmSpills[i]; - const TargetRegisterClass *RC = TRI->getPhysicalRegisterRegClass(PhysReg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg); assert(RC && "Unable to determine register class!"); int SS = VRM->getEmergencySpillSlot(RC); if (UsedSS.count(SS)) @@ -1759,7 +1758,6 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, bool DoReMat = VRM->isReMaterialized(VirtReg); int SSorRMId = DoReMat ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); if (InReg == Phys) { // If the value is already available in the expected register, save @@ -1793,20 +1791,16 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, MachineBasicBlock::iterator InsertLoc = ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, *MBB->getParent()); - - TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC, - MI->getDebugLoc()); + MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), Phys) + .addReg(InReg, RegState::Kill); // This invalidates Phys. Spills.ClobberPhysReg(Phys); // Remember it's available. Spills.addAvailable(SSorRMId, Phys); - // Mark is killed. - MachineInstr *CopyMI = prior(InsertLoc); CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg); - KillOpnd->setIsKill(); UpdateKills(*CopyMI, TRI, RegKills, KillOps); DEBUG(dbgs() << '\t' << *CopyMI); @@ -2013,7 +2007,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // = EXTRACT_SUBREG fi#1 // fi#1 is available in EDI, but it cannot be reused because it's not in // the right register file. - if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) { + if (PhysReg && !AvoidReload && SubIdx) { const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); if (!RC->contains(PhysReg)) PhysReg = 0; @@ -2034,6 +2028,18 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, CanReuse = !ReusedOperands.isClobbered(PhysReg) && Spills.canClobberPhysReg(PhysReg); } + // If this is an asm, and PhysReg is used elsewhere as an earlyclobber + // operand, we can't also use it as an input. (Outputs always come + // before inputs, so we can stop looking at i.) + if (MI.isInlineAsm()) { + for (unsigned k=0; k<i; ++k) { + MachineOperand &MOk = MI.getOperand(k); + if (MOk.isReg() && MOk.getReg()==PhysReg && MOk.isEarlyClobber()) { + CanReuse = false; + break; + } + } + } if (CanReuse) { // If this stack slot value is already available, reuse it! @@ -2104,6 +2110,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // To avoid this problem, and to avoid doing a load right after a store, // we emit a copy from PhysReg into the designated register for this // operand. + // + // This case also applies to an earlyclobber'd PhysReg. unsigned DesignatedReg = VRM->getPhys(VirtReg); assert(DesignatedReg && "Must map virtreg to physreg!"); @@ -2136,7 +2144,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, continue; } - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); MRI->setPhysRegUsed(DesignatedReg); ReusedOperands.markClobbered(DesignatedReg); @@ -2144,11 +2151,9 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, MachineBasicBlock::iterator InsertLoc = ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat, SSorRMId, TII, MF); - - TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC, - MI.getDebugLoc()); - - MachineInstr *CopyMI = prior(InsertLoc); + MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY), + DesignatedReg).addReg(PhysReg); CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); UpdateKills(*CopyMI, TRI, RegKills, KillOps); @@ -2269,27 +2274,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { DEBUG(dbgs() << "Promoted Load To Copy: " << MI); if (DestReg != InReg) { - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC, - MI.getDebugLoc()); MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg); - unsigned SubIdx = DefMO->getSubReg(); + MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DestReg, RegState::Define, DefMO->getSubReg()) + .addReg(InReg, RegState::Kill); // Revisit the copy so we make sure to notice the effects of the // operation on the destreg (either needing to RA it if it's // virtual or needing to clobber any values if it's physical). - NextMII = &MI; - --NextMII; // backtrack to the copy. + NextMII = CopyMI; NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse); - // Propagate the sub-register index over. - if (SubIdx) { - DefMO = NextMII->findRegisterDefOperand(DestReg); - DefMO->setSubReg(SubIdx); - } - - // Mark is killed. - MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg); - KillOpnd->setIsKill(); - BackTracked = true; } else { DEBUG(dbgs() << "Removing now-noop copy: " << MI); @@ -2430,6 +2424,24 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // Also check if it's copying from an "undef", if so, we can't // eliminate this or else the undef marker is lost and it will // confuses the scavenger. This is extremely rare. + if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() && + MI.getNumOperands() == 2) { + ++NumDCE; + DEBUG(dbgs() << "Removing now-noop copy: " << MI); + SmallVector<unsigned, 2> KillRegs; + InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); + if (MO.isDead() && !KillRegs.empty()) { + // Source register or an implicit super/sub-register use is killed. + assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg())); + // Last def is now dead. + TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps); + } + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + Spills.disallowClobberPhysReg(VirtReg); + goto ProcessNextInst; + } unsigned Src, Dst, SrcSR, DstSR; if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst && SrcSR == DstSR && @@ -2519,6 +2531,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // Check to see if this is a noop copy. If so, eliminate the // instruction before considering the dest reg to be changed. + if (MI.isIdentityCopy()) { + ++NumDCE; + DEBUG(dbgs() << "Removing now-noop copy: " << MI); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + UpdateKills(*LastStore, TRI, RegKills, KillOps); + goto ProcessNextInst; + } { unsigned Src, Dst, SrcSR, DstSR; if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp index 5e558ca..c8488b2 100644 --- a/lib/CompilerDriver/Tool.cpp +++ b/lib/CompilerDriver/Tool.cpp @@ -85,7 +85,8 @@ StrVector Tool::SortArgs(ArgsVector& Args) const { StrVector Out; // HACK: this won't be needed when we'll migrate away from CommandLine. - std::stable_sort(Args.begin(), Args.end(), &CompareFirst<unsigned, std::string>); + std::stable_sort(Args.begin(), Args.end(), + &CompareFirst<unsigned, std::string>); for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) { Out.push_back(B->second); } diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 0748b54..59ebe6e 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -591,7 +591,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy, ECStack.pop_back(); if (ECStack.empty()) { // Finished main. Put result into exit code... - if (RetTy && RetTy->isIntegerTy()) { // Nonvoid return type? + if (RetTy && !RetTy->isVoidTy()) { // Nonvoid return type? ExitValue = Result; // Capture the exit value of the program } else { memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped)); diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 26a53b5..57d1260 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -266,7 +266,7 @@ GenericValue Interpreter::callExternalFunction(Function *F, RawFn = (RawFunc)(intptr_t) sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName()); if (!RawFn) - RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F); + RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F); if (RawFn != 0) RawFunctions->insert(std::make_pair(F, RawFn)); // Cache for later } else { diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 546d2b2..67bd3ed 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -626,10 +626,7 @@ void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) { void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!"); - // JIT the function - isAlreadyCodeGenerating = true; - jitstate->getPM(locked).run(*F); - isAlreadyCodeGenerating = false; + jitTheFunction(F, locked); // If the function referred to another function that had not yet been // read from bitcode, and we are jitting non-lazily, emit it now. @@ -640,10 +637,7 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { assert(!PF->hasAvailableExternallyLinkage() && "Externally-defined function should not be in pending list."); - // JIT the function - isAlreadyCodeGenerating = true; - jitstate->getPM(locked).run(*PF); - isAlreadyCodeGenerating = false; + jitTheFunction(PF, locked); // Now that the function has been jitted, ask the JITEmitter to rewrite // the stub with real address of the function. @@ -651,6 +645,15 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { } } +void JIT::jitTheFunction(Function *F, const MutexGuard &locked) { + isAlreadyCodeGenerating = true; + jitstate->getPM(locked).run(*F); + isAlreadyCodeGenerating = false; + + // clear basic block addresses after this function is done + getBasicBlockAddressMap(locked).clear(); +} + /// getPointerToFunction - This method is used to get the address of the /// specified function, compiling it if neccesary. /// @@ -687,6 +690,41 @@ void *JIT::getPointerToFunction(Function *F) { return Addr; } +void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { + MutexGuard locked(lock); + + BasicBlockAddressMapTy::iterator I = + getBasicBlockAddressMap(locked).find(BB); + if (I == getBasicBlockAddressMap(locked).end()) { + getBasicBlockAddressMap(locked)[BB] = Addr; + } else { + // ignore repeats: some BBs can be split into few MBBs? + } +} + +void JIT::clearPointerToBasicBlock(const BasicBlock *BB) { + MutexGuard locked(lock); + getBasicBlockAddressMap(locked).erase(BB); +} + +void *JIT::getPointerToBasicBlock(BasicBlock *BB) { + // make sure it's function is compiled by JIT + (void)getPointerToFunction(BB->getParent()); + + // resolve basic block address + MutexGuard locked(lock); + + BasicBlockAddressMapTy::iterator I = + getBasicBlockAddressMap(locked).find(BB); + if (I != getBasicBlockAddressMap(locked).end()) { + return I->second; + } else { + assert(0 && "JIT does not have BB address for address-of-label, was" + " it eliminated by optimizer?"); + return 0; + } +} + /// getOrEmitGlobalVariable - Return the address of the specified global /// variable, possibly emitting it to memory if needed. This is used by the /// Emitter. diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index edae719..1d1763e 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -51,6 +51,10 @@ public: class JIT : public ExecutionEngine { + /// types + typedef ValueMap<const BasicBlock *, void *> + BasicBlockAddressMapTy; + /// data TargetMachine &TM; // The current target we are compiling to TargetJITInfo &TJI; // The JITInfo for the target we are compiling to JITCodeEmitter *JCE; // JCE object @@ -67,6 +71,12 @@ class JIT : public ExecutionEngine { JITState *jitstate; + /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their + /// actualized version, only filled for basic blocks that have their address + /// taken. + BasicBlockAddressMapTy BasicBlockAddressMap; + + JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool AllocateGVsWithCode); @@ -90,9 +100,9 @@ public: CodeGenOpt::Level OptLevel = CodeGenOpt::Default, bool GVsWithCode = true, - CodeModel::Model CMM = CodeModel::Default) { + CodeModel::Model CMM = CodeModel::Default) { return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode, - CMM); + CMM); } virtual void addModule(Module *M); @@ -127,10 +137,15 @@ public: /// void *getPointerToFunction(Function *F); - void *getPointerToBasicBlock(BasicBlock *BB) { - assert(0 && "JIT does not support address-of-label yet!"); - return 0; - } + /// addPointerToBasicBlock - Adds address of the specific basic block. + void addPointerToBasicBlock(const BasicBlock *BB, void *Addr); + + /// clearPointerToBasicBlock - Removes address of specific basic block. + void clearPointerToBasicBlock(const BasicBlock *BB); + + /// getPointerToBasicBlock - This returns the address of the specified basic + /// block, assuming function is compiled. + void *getPointerToBasicBlock(BasicBlock *BB); /// getOrEmitGlobalVariable - Return the address of the specified global /// variable, possibly emitting it to memory if needed. This is used by the @@ -197,11 +212,18 @@ public: const JITEvent_EmittedFunctionDetails &Details); void NotifyFreeingMachineCode(void *OldPtr); + BasicBlockAddressMapTy & + getBasicBlockAddressMap(const MutexGuard &) { + return BasicBlockAddressMap; + } + + private: static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM, TargetMachine &tm); void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked); void updateFunctionStub(Function *F); + void jitTheFunction(Function *F, const MutexGuard &locked); protected: diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index e3855b2..28d79da 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -435,6 +435,9 @@ namespace { if (MBBLocations.size() <= (unsigned)MBB->getNumber()) MBBLocations.resize((MBB->getNumber()+1)*2); MBBLocations[MBB->getNumber()] = getCurrentPCValue(); + if (MBB->hasAddressTaken()) + TheJIT->addPointerToBasicBlock(MBB->getBasicBlock(), + (void*)getCurrentPCValue()); DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" << (void*) getCurrentPCValue() << "]\n"); } @@ -442,7 +445,7 @@ namespace { virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const; virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const; - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const{ assert(MBBLocations.size() > (unsigned)MBB->getNumber() && MBBLocations[MBB->getNumber()] && "MBB not emitted!"); return MBBLocations[MBB->getNumber()]; @@ -1310,6 +1313,11 @@ void JITEmitter::retryWithMoreMemory(MachineFunction &F) { deallocateMemForFunction(F.getFunction()); // Try again with at least twice as much free space. SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin)); + + for (MachineFunction::iterator MBB = F.begin(), E = F.end(); MBB != E; ++MBB){ + if (MBB->hasAddressTaken()) + TheJIT->clearPointerToBasicBlock(MBB->getBasicBlock()); + } } /// deallocateMemForFunction - Deallocate all memory for the specified diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp index 2c22550..1be2bec 100644 --- a/lib/Linker/LinkItems.cpp +++ b/lib/Linker/LinkItems.cpp @@ -160,27 +160,26 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) { // Check for a file of name "-", which means "read standard input" if (File.str() == "-") { std::auto_ptr<Module> M; - MemoryBuffer *Buffer = MemoryBuffer::getSTDIN(); - if (!Buffer->getBufferSize()) { - delete Buffer; - Error = "standard input is empty"; - } else { - M.reset(ParseBitcodeFile(Buffer, Context, &Error)); - delete Buffer; - if (M.get()) - if (!LinkInModule(M.get(), &Error)) - return false; + if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN(&Error)) { + if (!Buffer->getBufferSize()) { + delete Buffer; + Error = "standard input is empty"; + } else { + M.reset(ParseBitcodeFile(Buffer, Context, &Error)); + delete Buffer; + if (M.get()) + if (!LinkInModule(M.get(), &Error)) + return false; + } } return error("Cannot link stdin: " + Error); } - // Make sure we can at least read the file - if (!File.canRead()) + // Determine what variety of file it is. + std::string Magic; + if (!File.getMagicNumber(Magic, 64)) return error("Cannot find linker input '" + File.str() + "'"); - // If its an archive, try to link it in - std::string Magic; - File.getMagicNumber(Magic, 64); switch (sys::IdentifyFileType(Magic.c_str(), 64)) { default: llvm_unreachable("Bad file type identification"); case sys::Unknown_FileType: diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 5e8a3b6..fc4f3c6 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_library(LLVMMC MCLoggingStreamer.cpp MCMachOStreamer.cpp MCNullStreamer.cpp + MCObjectStreamer.cpp MCObjectWriter.cpp MCSection.cpp MCSectionCOFF.cpp @@ -23,5 +24,7 @@ add_llvm_library(LLVMMC MCSymbol.cpp MCValue.cpp MachObjectWriter.cpp + WinCOFFStreamer.cpp + WinCOFFObjectWriter.cpp TargetAsmBackend.cpp ) diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 57b2bcc..e272b60 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -275,19 +275,20 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_Global: // .globl/.global OS << MAI.getGlobalDirective(); break; - case MCSA_Hidden: OS << ".hidden "; break; - case MCSA_IndirectSymbol: OS << ".indirect_symbol "; break; - case MCSA_Internal: OS << ".internal "; break; - case MCSA_LazyReference: OS << ".lazy_reference "; break; - case MCSA_Local: OS << ".local "; break; - case MCSA_NoDeadStrip: OS << ".no_dead_strip "; break; - case MCSA_PrivateExtern: OS << ".private_extern "; break; - case MCSA_Protected: OS << ".protected "; break; - case MCSA_Reference: OS << ".reference "; break; - case MCSA_Weak: OS << ".weak "; break; - case MCSA_WeakDefinition: OS << ".weak_definition "; break; + case MCSA_Hidden: OS << "\t.hidden\t"; break; + case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break; + case MCSA_Internal: OS << "\t.internal\t"; break; + case MCSA_LazyReference: OS << "\t.lazy_reference\t"; break; + case MCSA_Local: OS << "\t.local\t"; break; + case MCSA_NoDeadStrip: OS << "\t.no_dead_strip\t"; break; + case MCSA_PrivateExtern: OS << "\t.private_extern\t"; break; + case MCSA_Protected: OS << "\t.protected\t"; break; + case MCSA_Reference: OS << "\t.reference\t"; break; + case MCSA_Weak: OS << "\t.weak\t"; break; + case MCSA_WeakDefinition: OS << "\t.weak_definition\t"; break; // .weak_reference case MCSA_WeakReference: OS << MAI.getWeakRefDirective(); break; + case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break; } OS << *Symbol; @@ -693,7 +694,6 @@ void MCAsmStreamer::EmitRawText(StringRef String) { } void MCAsmStreamer::Finish() { - OS.flush(); } MCStreamer *llvm::createAsmStreamer(MCContext &Context, diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 5936656..7d84554 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -308,24 +308,23 @@ static bool isScatteredFixupFullyResolved(const MCAssembler &Asm, return !B_Base && BaseSymbol == A_Base; } -bool MCAssembler::isSymbolLinkerVisible(const MCSymbolData *SD) const { +bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const { // Non-temporary labels should always be visible to the linker. - if (!SD->getSymbol().isTemporary()) + if (!Symbol.isTemporary()) return true; // Absolute temporary labels are never visible. - if (!SD->getFragment()) + if (!Symbol.isInSection()) return false; // Otherwise, check if the section requires symbols even for temporary labels. - return getBackend().doesSectionRequireSymbols( - SD->getFragment()->getParent()->getSection()); + return getBackend().doesSectionRequireSymbols(Symbol.getSection()); } const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout, const MCSymbolData *SD) const { // Linker visible symbols define atoms. - if (isSymbolLinkerVisible(SD)) + if (isSymbolLinkerVisible(SD->getSymbol())) return SD; // Absolute and undefined symbols have no defining atom. @@ -685,12 +684,8 @@ void MCAssembler::Finish() { for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { // Create dummy fragments to eliminate any empty sections, this simplifies // layout. - if (it->getFragmentList().empty()) { - unsigned ValueSize = 1; - if (getBackend().isVirtualSection(it->getSection())) - ValueSize = 1; + if (it->getFragmentList().empty()) new MCFillFragment(0, 1, 0, it); - } it->setOrdinal(SectionIndex++); } @@ -759,7 +754,6 @@ void MCAssembler::Finish() { // Write the object file. Writer->WriteObject(*this, Layout); - OS.flush(); stats::ObjectBytes += OS.tell() - StartOffset; } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 53ffc94..1137064 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -27,6 +27,10 @@ MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) { MachOUniquingMap = 0; ELFUniquingMap = 0; COFFUniquingMap = 0; + + SecureLogFile = getenv("AS_SECURE_LOG_FILE"); + SecureLog = 0; + SecureLogUsed = false; } MCContext::~MCContext() { @@ -37,6 +41,9 @@ MCContext::~MCContext() { delete (MachOUniqueMapTy*)MachOUniquingMap; delete (ELFUniqueMapTy*)ELFUniquingMap; delete (COFFUniqueMapTy*)COFFUniquingMap; + + // If the stream for the .secure_log_unique directive was created free it. + delete (raw_ostream*)SecureLog; } //===----------------------------------------------------------------------===// @@ -90,14 +97,14 @@ MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) { return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + Twine(LocalLabelVal) + "\2" + - Twine(NextInstance(LocalLabelVal))); + Twine(NextInstance(LocalLabelVal))); } MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal, int bORf) { return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + Twine(LocalLabelVal) + "\2" + - Twine(GetInstance(LocalLabelVal) + bORf)); + Twine(GetInstance(LocalLabelVal) + bORf)); } MCSymbol *MCContext::LookupSymbol(StringRef Name) const { diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index c000dd7..343f334 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -40,7 +40,7 @@ void MCExpr::print(raw_ostream &OS) const { const MCSymbol &Sym = SRE.getSymbol(); if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_HI16 || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16) + SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16) OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); // Parenthesize names that start with $ so that they don't look like @@ -51,8 +51,8 @@ void MCExpr::print(raw_ostream &OS) const { OS << Sym; if (SRE.getKind() != MCSymbolRefExpr::VK_None && - SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 && - SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16) + SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 && + SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16) OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); return; diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 27e4e98..44bc267 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCMachOSymbolFlags.h" @@ -25,21 +26,13 @@ using namespace llvm; namespace { -class MCMachOStreamer : public MCStreamer { - -private: - MCAssembler Assembler; - MCSectionData *CurSectionData; - - /// Track the current atom for each section. - DenseMap<const MCSectionData*, MCSymbolData*> CurrentAtomMap; - +class MCMachOStreamer : public MCObjectStreamer { private: MCFragment *getCurrentFragment() const { - assert(CurSectionData && "No current section!"); + assert(getCurrentSectionData() && "No current section!"); - if (!CurSectionData->empty()) - return &CurSectionData->getFragmentList().back(); + if (!getCurrentSectionData()->empty()) + return &getCurrentSectionData()->getFragmentList().back(); return 0; } @@ -49,28 +42,17 @@ private: MCDataFragment *getOrCreateDataFragment() const { MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); if (!F) - F = createDataFragment(); + F = new MCDataFragment(getCurrentSectionData()); return F; } - /// Create a new data fragment in the current section. - MCDataFragment *createDataFragment() const { - MCDataFragment *DF = new MCDataFragment(CurSectionData); - DF->setAtom(CurrentAtomMap.lookup(CurSectionData)); - return DF; - } - void EmitInstToFragment(const MCInst &Inst); void EmitInstToData(const MCInst &Inst); public: MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, - raw_ostream &_OS, MCCodeEmitter *_Emitter) - : MCStreamer(Context), Assembler(Context, TAB, *_Emitter, _OS), - CurSectionData(0) {} - ~MCMachOStreamer() {} - - MCAssembler &getAssembler() { return Assembler; } + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCObjectStreamer(Context, TAB, OS, Emitter) {} const MCExpr *AddValueSymbols(const MCExpr *Value) { switch (Value->getKind()) { @@ -86,7 +68,7 @@ public: } case MCExpr::SymbolRef: - Assembler.getOrCreateSymbolData( + getAssembler().getOrCreateSymbolData( cast<MCSymbolRefExpr>(Value)->getSymbol()); break; @@ -101,7 +83,6 @@ public: /// @name MCStreamer Interface /// @{ - virtual void SwitchSection(const MCSection *Section); virtual void EmitLabel(MCSymbol *Symbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); @@ -152,6 +133,7 @@ public: } virtual void EmitInstruction(const MCInst &Inst); + virtual void Finish(); /// @} @@ -159,38 +141,25 @@ public: } // end anonymous namespace. -void MCMachOStreamer::SwitchSection(const MCSection *Section) { - assert(Section && "Cannot switch to a null section!"); - - // If already in this section, then this is a noop. - if (Section == CurSection) return; - - CurSection = Section; - CurSectionData = &Assembler.getOrCreateSectionData(*Section); -} - void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(CurSection && "Cannot emit before setting section!"); - MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + Symbol->setSection(*CurSection); - // Update the current atom map, if necessary. - bool MustCreateFragment = false; - if (Assembler.isSymbolLinkerVisible(&SD)) { - CurrentAtomMap[CurSectionData] = &SD; + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - // We have to create a new fragment, fragments cannot span atoms. - MustCreateFragment = true; - } + // We have to create a new fragment if this is an atom defining symbol, + // fragments cannot span atoms. + if (getAssembler().isSymbolLinkerVisible(SD.getSymbol())) + new MCDataFragment(getCurrentSectionData()); // FIXME: This is wasteful, we don't necessarily need to create a data // fragment. Instead, we should mark the symbol as pointing into the data // fragment if it exists, otherwise we should just queue the label and set its // fragment pointer when we emit the next fragment. - MCDataFragment *F = - MustCreateFragment ? createDataFragment() : getOrCreateDataFragment(); + MCDataFragment *F = getOrCreateDataFragment(); assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); SD.setFragment(F); SD.setOffset(F->getContents().size()); @@ -203,14 +172,12 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { // FIXME: Cleanup this code, these bits should be emitted based on semantic // properties, not on the order of definition, etc. SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask); - - Symbol->setSection(*CurSection); } void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { switch (Flag) { case MCAF_SubsectionsViaSymbols: - Assembler.setSubsectionsViaSymbols(true); + getAssembler().setSubsectionsViaSymbols(true); return; } @@ -219,7 +186,7 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { // FIXME: Lift context changes into super class. - Assembler.getOrCreateSymbolData(*Symbol); + getAssembler().getOrCreateSymbolData(*Symbol); Symbol->setVariableValue(AddValueSymbols(Value)); } @@ -232,15 +199,15 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, // important for matching the string table that 'as' generates. IndirectSymbolData ISD; ISD.Symbol = Symbol; - ISD.SectionData = CurSectionData; - Assembler.getIndirectSymbols().push_back(ISD); + ISD.SectionData = getCurrentSectionData(); + getAssembler().getIndirectSymbols().push_back(ISD); return; } // Adding a symbol attribute always introduces the symbol, note that an // important side effect of calling getOrCreateSymbolData here is to register // the symbol with the assembler. - MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); // The implementation of symbol attributes is designed to match 'as', but it // leaves much to desired. It doesn't really make sense to arbitrarily add and @@ -306,6 +273,10 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, // it has to be in a coalesced section, but this isn't enforced. SD.setFlags(SD.getFlags() | SF_WeakDefinition); break; + + case MCSA_WeakDefAutoPrivate: + SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference); + break; } } @@ -313,7 +284,8 @@ void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { // Encode the 'desc' value into the lowest implementation defined bits. assert(DescValue == (DescValue & SF_DescFlagsMask) && "Invalid .desc value!"); - Assembler.getOrCreateSymbolData(*Symbol).setFlags(DescValue&SF_DescFlagsMask); + getAssembler().getOrCreateSymbolData(*Symbol).setFlags( + DescValue & SF_DescFlagsMask); } void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, @@ -321,14 +293,14 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself. assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); SD.setExternal(true); SD.setCommon(Size, ByteAlignment); } void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, unsigned Size, unsigned ByteAlignment) { - MCSectionData &SectData = Assembler.getOrCreateSectionData(*Section); + MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section); // The symbol may not be present, which only creates the section. if (!Symbol) @@ -338,7 +310,7 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); // Emit an align fragment if necessary. if (ByteAlignment != 1) @@ -346,8 +318,6 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, MCFragment *F = new MCFillFragment(0, 0, Size, &SectData); SD.setFragment(F); - if (Assembler.isSymbolLinkerVisible(&SD)) - F->setAtom(&SD); Symbol->setSection(*Section); @@ -391,13 +361,12 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit) { if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - MCFragment *F = new MCAlignFragment(ByteAlignment, Value, ValueSize, - MaxBytesToEmit, CurSectionData); - F->setAtom(CurrentAtomMap.lookup(CurSectionData)); + new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, + getCurrentSectionData()); // Update the maximum alignment on the current section if necessary. - if (ByteAlignment > CurSectionData->getAlignment()) - CurSectionData->setAlignment(ByteAlignment); + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); } void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment, @@ -405,24 +374,21 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment, if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, - CurSectionData); + getCurrentSectionData()); F->setEmitNops(true); - F->setAtom(CurrentAtomMap.lookup(CurSectionData)); // Update the maximum alignment on the current section if necessary. - if (ByteAlignment > CurSectionData->getAlignment()) - CurSectionData->setAlignment(ByteAlignment); + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); } void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { - MCFragment *F = new MCOrgFragment(*Offset, Value, CurSectionData); - F->setAtom(CurrentAtomMap.lookup(CurSectionData)); + new MCOrgFragment(*Offset, Value, getCurrentSectionData()); } void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) { - MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData); - IF->setAtom(CurrentAtomMap.lookup(CurSectionData)); + MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData()); // Add the fixups and data. // @@ -431,7 +397,7 @@ void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) { SmallVector<MCFixup, 4> Fixups; SmallString<256> Code; raw_svector_ostream VecOS(Code); - Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); VecOS.flush(); IF->getCode() = Code; @@ -444,7 +410,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) { SmallVector<MCFixup, 4> Fixups; SmallString<256> Code; raw_svector_ostream VecOS(Code); - Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); VecOS.flush(); // Add the fixups and data. @@ -461,21 +427,21 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { if (Inst.getOperand(i).isExpr()) AddValueSymbols(Inst.getOperand(i).getExpr()); - CurSectionData->setHasInstructions(true); + getCurrentSectionData()->setHasInstructions(true); // If this instruction doesn't need relaxation, just emit it as data. - if (!Assembler.getBackend().MayNeedRelaxation(Inst)) { + if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) { EmitInstToData(Inst); return; } // Otherwise, if we are relaxing everything, relax the instruction as much as // possible and emit it as data. - if (Assembler.getRelaxAll()) { + if (getAssembler().getRelaxAll()) { MCInst Relaxed; - Assembler.getBackend().RelaxInstruction(Inst, Relaxed); - while (Assembler.getBackend().MayNeedRelaxation(Relaxed)) - Assembler.getBackend().RelaxInstruction(Relaxed, Relaxed); + getAssembler().getBackend().RelaxInstruction(Inst, Relaxed); + while (getAssembler().getBackend().MayNeedRelaxation(Relaxed)) + getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed); EmitInstToData(Relaxed); return; } @@ -485,7 +451,36 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { } void MCMachOStreamer::Finish() { - Assembler.Finish(); + // We have to set the fragment atom associations so we can relax properly for + // Mach-O. + + // First, scan the symbol table to build a lookup table from fragments to + // defining symbols. + DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap; + for (MCAssembler::symbol_iterator it = getAssembler().symbol_begin(), + ie = getAssembler().symbol_end(); it != ie; ++it) { + if (getAssembler().isSymbolLinkerVisible(it->getSymbol()) && + it->getFragment()) { + // An atom defining symbol should never be internal to a fragment. + assert(it->getOffset() == 0 && "Invalid offset in atom defining symbol!"); + DefiningSymbolMap[it->getFragment()] = it; + } + } + + // Set the fragment atom associations by tracking the last seen atom defining + // symbol. + for (MCAssembler::iterator it = getAssembler().begin(), + ie = getAssembler().end(); it != ie; ++it) { + MCSymbolData *CurrentAtom = 0; + for (MCSectionData::iterator it2 = it->begin(), + ie2 = it->end(); it2 != ie2; ++it2) { + if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2)) + CurrentAtom = SD; + it2->setAtom(CurrentAtom); + } + } + + this->MCObjectStreamer::Finish(); } MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp new file mode 100644 index 0000000..d3f7f77 --- /dev/null +++ b/lib/MC/MCObjectStreamer.cpp @@ -0,0 +1,39 @@ +//===- lib/MC/MCObjectStreamer.cpp - Object File MCStreamer Interface -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectStreamer.h" + +#include "llvm/MC/MCAssembler.h" +using namespace llvm; + +MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &_OS, MCCodeEmitter *_Emitter) + : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB, + *_Emitter, _OS)), + CurSectionData(0) +{ +} + +MCObjectStreamer::~MCObjectStreamer() { + delete Assembler; +} + +void MCObjectStreamer::SwitchSection(const MCSection *Section) { + assert(Section && "Cannot switch to a null section!"); + + // If already in this section, then this is a noop. + if (Section == CurSection) return; + + CurSection = Section; + CurSectionData = &getAssembler().getOrCreateSectionData(*Section); +} + +void MCObjectStreamer::Finish() { + getAssembler().Finish(); +} diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 1cbe09a..465d983 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -23,7 +23,6 @@ using namespace llvm; AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { CurBuf = NULL; CurPtr = NULL; - TokStart = 0; } AsmLexer::~AsmLexer() { @@ -40,10 +39,6 @@ void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { TokStart = 0; } -SMLoc AsmLexer::getLoc() const { - return SMLoc::getFromPointer(TokStart); -} - /// ReturnError - Set the error to the specified string at the specified /// location. This is defined to always return AsmToken::Error. AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { @@ -229,7 +224,7 @@ StringRef AsmLexer::LexUntilEndOfStatement() { TokStart = CurPtr; while (!isAtStartOfComment(*CurPtr) && // Start of line comment. - *CurPtr != ';' && // End of statement marker. + *CurPtr != ';' && // End of statement marker. *CurPtr != '\n' && *CurPtr != '\r' && (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 4523eab..793f3c7 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -18,34 +18,85 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetAsmParser.h" using namespace llvm; +namespace { + +/// \brief Generic implementations of directive handling, etc. which is shared +/// (or the default, at least) for all assembler parser. +class GenericAsmParser : public MCAsmParserExtension { +public: + GenericAsmParser() {} + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + // Debugging directives. + Parser.AddDirectiveHandler(this, ".file", MCAsmParser::DirectiveHandler( + &GenericAsmParser::ParseDirectiveFile)); + Parser.AddDirectiveHandler(this, ".line", MCAsmParser::DirectiveHandler( + &GenericAsmParser::ParseDirectiveLine)); + Parser.AddDirectiveHandler(this, ".loc", MCAsmParser::DirectiveHandler( + &GenericAsmParser::ParseDirectiveLoc)); + } + + bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file" + bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line" + bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc" +}; + +} + +namespace llvm { + +extern MCAsmParserExtension *createDarwinAsmParser(); +extern MCAsmParserExtension *createELFAsmParser(); + +} enum { DEFAULT_ADDRSPACE = 0 }; -AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, - const MCAsmInfo &_MAI) - : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), TargetParser(0), - CurBuffer(0) { +AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx, + MCStreamer &_Out, const MCAsmInfo &_MAI) + : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), + GenericParser(new GenericAsmParser), PlatformParser(0), + TargetParser(0), CurBuffer(0) { Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); - - // Debugging directives. - AddDirectiveHandler(".file", &AsmParser::ParseDirectiveFile); - AddDirectiveHandler(".line", &AsmParser::ParseDirectiveLine); - AddDirectiveHandler(".loc", &AsmParser::ParseDirectiveLoc); -} + // Initialize the generic parser. + GenericParser->Initialize(*this); + // Initialize the platform / file format parser. + // + // FIXME: This is a hack, we need to (majorly) cleanup how these objects are + // created. + if (_MAI.hasSubsectionsViaSymbols()) { + PlatformParser = createDarwinAsmParser(); + PlatformParser->Initialize(*this); + } else { + PlatformParser = createELFAsmParser(); + PlatformParser->Initialize(*this); + } +} AsmParser::~AsmParser() { + delete PlatformParser; + delete GenericParser; +} + +void AsmParser::setTargetParser(TargetAsmParser &P) { + assert(!TargetParser && "Target parser is already initialized!"); + TargetParser = &P; + TargetParser->Initialize(*this); } void AsmParser::Warning(SMLoc L, const Twine &Msg) { @@ -57,11 +108,6 @@ bool AsmParser::Error(SMLoc L, const Twine &Msg) { return true; } -bool AsmParser::TokError(const char *Msg) { - PrintMessage(Lexer.getLoc(), Msg, "error"); - return true; -} - void AsmParser::PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const { SrcMgr.PrintMessage(Loc, Msg, Type); @@ -163,11 +209,6 @@ bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { return false; } -MCSymbol *AsmParser::CreateSymbol(StringRef Name) { - // FIXME: Inline into callers. - return Ctx.GetOrCreateSymbol(Name); -} - /// ParsePrimaryExpr - Parse a primary expression and return it. /// primaryexpr ::= (parenexpr /// primaryexpr ::= symbol @@ -188,7 +229,7 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { case AsmToken::Identifier: { // This is a symbol reference. std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@'); - MCSymbol *Sym = CreateSymbol(Split.first); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first); // Mark the symbol as used in an expression. Sym->setUsedInExpr(true); @@ -454,8 +495,8 @@ bool AsmParser::ParseStatement() { IDVal = getTok().getString(); Lex(); // Consume the integer token to be used as an identifier token. if (Lexer.getKind() != AsmToken::Colon) { - if (!TheCondState.Ignore) - return TokError("unexpected token at start of statement"); + if (!TheCondState.Ignore) + return TokError("unexpected token at start of statement"); } } } @@ -498,7 +539,7 @@ bool AsmParser::ParseStatement() { // implicitly marked as external. MCSymbol *Sym; if (LocalLabelVal == -1) - Sym = CreateSymbol(IDVal); + Sym = getContext().GetOrCreateSymbol(IDVal); else Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal); if (!Sym->isUndefined() || Sym->isVariable()) @@ -530,158 +571,6 @@ bool AsmParser::ParseStatement() { // Otherwise, we have a normal instruction or directive. if (IDVal[0] == '.') { - // FIXME: This should be driven based on a hash lookup and callback. - if (IDVal == ".section") - return ParseDirectiveDarwinSection(); - if (IDVal == ".text") - // FIXME: This changes behavior based on the -static flag to the - // assembler. - return ParseDirectiveSectionSwitch("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); - if (IDVal == ".const") - return ParseDirectiveSectionSwitch("__TEXT", "__const"); - if (IDVal == ".static_const") - return ParseDirectiveSectionSwitch("__TEXT", "__static_const"); - if (IDVal == ".cstring") - return ParseDirectiveSectionSwitch("__TEXT","__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".literal4") - return ParseDirectiveSectionSwitch("__TEXT", "__literal4", - MCSectionMachO::S_4BYTE_LITERALS, - 4); - if (IDVal == ".literal8") - return ParseDirectiveSectionSwitch("__TEXT", "__literal8", - MCSectionMachO::S_8BYTE_LITERALS, - 8); - if (IDVal == ".literal16") - return ParseDirectiveSectionSwitch("__TEXT","__literal16", - MCSectionMachO::S_16BYTE_LITERALS, - 16); - if (IDVal == ".constructor") - return ParseDirectiveSectionSwitch("__TEXT","__constructor"); - if (IDVal == ".destructor") - return ParseDirectiveSectionSwitch("__TEXT","__destructor"); - if (IDVal == ".fvmlib_init0") - return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init0"); - if (IDVal == ".fvmlib_init1") - return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init1"); - - // FIXME: The assembler manual claims that this has the self modify code - // flag, at least on x86-32, but that does not appear to be correct. - if (IDVal == ".symbol_stub") - return ParseDirectiveSectionSwitch("__TEXT","__symbol_stub", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - // FIXME: Different on PPC and ARM. - 0, 16); - // FIXME: PowerPC only? - if (IDVal == ".picsymbol_stub") - return ParseDirectiveSectionSwitch("__TEXT","__picsymbol_stub", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 0, 26); - if (IDVal == ".data") - return ParseDirectiveSectionSwitch("__DATA", "__data"); - if (IDVal == ".static_data") - return ParseDirectiveSectionSwitch("__DATA", "__static_data"); - - // FIXME: The section names of these two are misspelled in the assembler - // manual. - if (IDVal == ".non_lazy_symbol_pointer") - return ParseDirectiveSectionSwitch("__DATA", "__nl_symbol_ptr", - MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, - 4); - if (IDVal == ".lazy_symbol_pointer") - return ParseDirectiveSectionSwitch("__DATA", "__la_symbol_ptr", - MCSectionMachO::S_LAZY_SYMBOL_POINTERS, - 4); - - if (IDVal == ".dyld") - return ParseDirectiveSectionSwitch("__DATA", "__dyld"); - if (IDVal == ".mod_init_func") - return ParseDirectiveSectionSwitch("__DATA", "__mod_init_func", - MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, - 4); - if (IDVal == ".mod_term_func") - return ParseDirectiveSectionSwitch("__DATA", "__mod_term_func", - MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, - 4); - if (IDVal == ".const_data") - return ParseDirectiveSectionSwitch("__DATA", "__const"); - - - if (IDVal == ".objc_class") - return ParseDirectiveSectionSwitch("__OBJC", "__class", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_meta_class") - return ParseDirectiveSectionSwitch("__OBJC", "__meta_class", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cat_cls_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__cat_cls_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cat_inst_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__cat_inst_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_protocol") - return ParseDirectiveSectionSwitch("__OBJC", "__protocol", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_string_object") - return ParseDirectiveSectionSwitch("__OBJC", "__string_object", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cls_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__cls_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_inst_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__inst_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cls_refs") - return ParseDirectiveSectionSwitch("__OBJC", "__cls_refs", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP | - MCSectionMachO::S_LITERAL_POINTERS, - 4); - if (IDVal == ".objc_message_refs") - return ParseDirectiveSectionSwitch("__OBJC", "__message_refs", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP | - MCSectionMachO::S_LITERAL_POINTERS, - 4); - if (IDVal == ".objc_symbols") - return ParseDirectiveSectionSwitch("__OBJC", "__symbols", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_category") - return ParseDirectiveSectionSwitch("__OBJC", "__category", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_class_vars") - return ParseDirectiveSectionSwitch("__OBJC", "__class_vars", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_instance_vars") - return ParseDirectiveSectionSwitch("__OBJC", "__instance_vars", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_module_info") - return ParseDirectiveSectionSwitch("__OBJC", "__module_info", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_class_names") - return ParseDirectiveSectionSwitch("__TEXT", "__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".objc_meth_var_types") - return ParseDirectiveSectionSwitch("__TEXT", "__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".objc_meth_var_names") - return ParseDirectiveSectionSwitch("__TEXT", "__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".objc_selector_strs") - return ParseDirectiveSectionSwitch("__OBJC", "__selector_strs", - MCSectionMachO::S_CSTRING_LITERALS); - - if (IDVal == ".tdata") - return ParseDirectiveSectionSwitch("__DATA", "__thread_data", - MCSectionMachO::S_THREAD_LOCAL_REGULAR); - if (IDVal == ".tlv") - return ParseDirectiveSectionSwitch("__DATA", "__thread_vars", - MCSectionMachO::S_THREAD_LOCAL_VARIABLES); - if (IDVal == ".thread_init_func") - return ParseDirectiveSectionSwitch("__DATA", "__thread_init", - MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); - // Assembler features if (IDVal == ".set") return ParseDirectiveSet(); @@ -756,36 +645,25 @@ bool AsmParser::ParseStatement() { return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition); if (IDVal == ".weak_reference") return ParseDirectiveSymbolAttribute(MCSA_WeakReference); + if (IDVal == ".weak_def_can_be_hidden") + return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate); if (IDVal == ".comm") return ParseDirectiveComm(/*IsLocal=*/false); if (IDVal == ".lcomm") return ParseDirectiveComm(/*IsLocal=*/true); - if (IDVal == ".zerofill") - return ParseDirectiveDarwinZerofill(); - if (IDVal == ".desc") - return ParseDirectiveDarwinSymbolDesc(); - if (IDVal == ".lsym") - return ParseDirectiveDarwinLsym(); - if (IDVal == ".tbss") - return ParseDirectiveDarwinTBSS(); - - if (IDVal == ".subsections_via_symbols") - return ParseDirectiveDarwinSubsectionsViaSymbols(); + if (IDVal == ".abort") return ParseDirectiveAbort(); if (IDVal == ".include") return ParseDirectiveInclude(); - if (IDVal == ".dump") - return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsDump=*/true); - if (IDVal == ".load") - return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsLoad=*/false); - - // Look up the handler in the handler table, - bool(AsmParser::*Handler)(StringRef, SMLoc) = DirectiveMap[IDVal]; - if (Handler) - return (this->*Handler)(IDVal, IDLoc); - + + // Look up the handler in the handler table. + std::pair<MCAsmParserExtension*, DirectiveHandler> Handler = + DirectiveMap.lookup(IDVal); + if (Handler.first) + return (Handler.first->*Handler.second)(IDVal, IDLoc); + // Target hook for parsing target specific directives. if (!getTargetParser().ParseDirective(ID)) return false; @@ -839,7 +717,6 @@ bool AsmParser::ParseAssignment(const StringRef &Name) { SMLoc EqualLoc = Lexer.getLoc(); const MCExpr *Value; - SMLoc StartLoc = Lexer.getLoc(); if (ParseExpression(Value)) return true; @@ -867,7 +744,7 @@ bool AsmParser::ParseAssignment(const StringRef &Name) { return Error(EqualLoc, "invalid reassignment of non-absolute variable '" + Name + "'"); } else - Sym = CreateSymbol(Name); + Sym = getContext().GetOrCreateSymbol(Name); // FIXME: Handle '.'. @@ -902,90 +779,15 @@ bool AsmParser::ParseDirectiveSet() { if (ParseIdentifier(Name)) return TokError("expected identifier after '.set' directive"); - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.set'"); Lex(); return ParseAssignment(Name); } -/// ParseDirectiveSection: -/// ::= .section identifier (',' identifier)* -/// FIXME: This should actually parse out the segment, section, attributes and -/// sizeof_stub fields. -bool AsmParser::ParseDirectiveDarwinSection() { - SMLoc Loc = Lexer.getLoc(); - - StringRef SectionName; - if (ParseIdentifier(SectionName)) - return Error(Loc, "expected identifier after '.section' directive"); - - // Verify there is a following comma. - if (!Lexer.is(AsmToken::Comma)) - return TokError("unexpected token in '.section' directive"); - - std::string SectionSpec = SectionName; - SectionSpec += ","; - - // Add all the tokens until the end of the line, ParseSectionSpecifier will - // handle this. - StringRef EOL = Lexer.LexUntilEndOfStatement(); - SectionSpec.append(EOL.begin(), EOL.end()); - - Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.section' directive"); - Lex(); - - - StringRef Segment, Section; - unsigned TAA, StubSize; - std::string ErrorStr = - MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section, - TAA, StubSize); - - if (!ErrorStr.empty()) - return Error(Loc, ErrorStr.c_str()); - - // FIXME: Arch specific. - bool isText = Segment == "__TEXT"; // FIXME: Hack. - Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize, - isText ? SectionKind::getText() - : SectionKind::getDataRel())); - return false; -} - -/// ParseDirectiveSectionSwitch - -bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment, - const char *Section, - unsigned TAA, unsigned Align, - unsigned StubSize) { - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in section switching directive"); - Lex(); - - // FIXME: Arch specific. - bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack. - Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize, - isText ? SectionKind::getText() - : SectionKind::getDataRel())); - - // Set the implicit alignment, if any. - // - // FIXME: This isn't really what 'as' does; I think it just uses the implicit - // alignment on the section (e.g., if one manually inserts bytes into the - // section, then just issueing the section switch directive will not realign - // the section. However, this is arguably more reasonable behavior, and there - // is no good reason for someone to intentionally emit incorrectly sized - // values into the implicitly aligned sections. - if (Align) - Out.EmitValueToAlignment(Align, 0, 1, 0); - - return false; -} - bool AsmParser::ParseEscapedString(std::string &Data) { - assert(Lexer.is(AsmToken::String) && "Unexpected current token!"); + assert(getLexer().is(AsmToken::String) && "Unexpected current token!"); Data = ""; StringRef Str = getTok().getStringContents(); @@ -1045,25 +847,25 @@ bool AsmParser::ParseEscapedString(std::string &Data) { /// ParseDirectiveAscii: /// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ] bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { - if (Lexer.isNot(AsmToken::String)) + if (getLexer().isNot(AsmToken::String)) return TokError("expected string in '.ascii' or '.asciz' directive"); - + std::string Data; if (ParseEscapedString(Data)) return true; - - Out.EmitBytes(Data, DEFAULT_ADDRSPACE); + + getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE); if (ZeroTerminated) - Out.EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE); - + getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE); + Lex(); - - if (Lexer.is(AsmToken::EndOfStatement)) + + if (getLexer().is(AsmToken::EndOfStatement)) break; - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.ascii' or '.asciz' directive"); Lex(); } @@ -1076,24 +878,24 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { /// ParseDirectiveValue /// ::= (.byte | .short | ... ) [ expression (, expression)* ] bool AsmParser::ParseDirectiveValue(unsigned Size) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - SMLoc ATTRIBUTE_UNUSED StartLoc = Lexer.getLoc(); + SMLoc ATTRIBUTE_UNUSED StartLoc = getLexer().getLoc(); if (ParseExpression(Value)) return true; // Special case constant expressions to match code generator. if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) - Out.EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE); + getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE); else - Out.EmitValue(Value, Size, DEFAULT_ADDRSPACE); + getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE); - if (Lexer.is(AsmToken::EndOfStatement)) + if (getLexer().is(AsmToken::EndOfStatement)) break; // FIXME: Improve diagnostic. - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); } @@ -1111,18 +913,15 @@ bool AsmParser::ParseDirectiveSpace() { return true; int64_t FillExpr = 0; - bool HasFillExpr = false; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.space' directive"); Lex(); if (ParseAbsoluteExpression(FillExpr)) return true; - HasFillExpr = true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.space' directive"); } @@ -1132,7 +931,7 @@ bool AsmParser::ParseDirectiveSpace() { return TokError("invalid number of bytes in '.space' directive"); // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. - Out.EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); + getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); return false; } @@ -1144,7 +943,7 @@ bool AsmParser::ParseDirectiveFill() { if (ParseAbsoluteExpression(NumValues)) return true; - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.fill' directive"); Lex(); @@ -1152,7 +951,7 @@ bool AsmParser::ParseDirectiveFill() { if (ParseAbsoluteExpression(FillSize)) return true; - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.fill' directive"); Lex(); @@ -1160,7 +959,7 @@ bool AsmParser::ParseDirectiveFill() { if (ParseAbsoluteExpression(FillExpr)) return true; - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.fill' directive"); Lex(); @@ -1169,7 +968,7 @@ bool AsmParser::ParseDirectiveFill() { return TokError("invalid '.fill' size, expected 1, 2, 4, or 8"); for (uint64_t i = 0, e = NumValues; i != e; ++i) - Out.EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE); + getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE); return false; } @@ -1178,21 +977,20 @@ bool AsmParser::ParseDirectiveFill() { /// ::= .org expression [ , expression ] bool AsmParser::ParseDirectiveOrg() { const MCExpr *Offset; - SMLoc StartLoc = Lexer.getLoc(); if (ParseExpression(Offset)) return true; // Parse optional fill expression. int64_t FillExpr = 0; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.org' directive"); Lex(); if (ParseAbsoluteExpression(FillExpr)) return true; - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.org' directive"); } @@ -1200,7 +998,7 @@ bool AsmParser::ParseDirectiveOrg() { // FIXME: Only limited forms of relocatable expressions are accepted here, it // has to be relative to the current section. - Out.EmitValueToOffset(Offset, FillExpr); + getStreamer().EmitValueToOffset(Offset, FillExpr); return false; } @@ -1208,7 +1006,7 @@ bool AsmParser::ParseDirectiveOrg() { /// ParseDirectiveAlign /// ::= {.align, ...} expression [ , expression [ , expression ]] bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { - SMLoc AlignmentLoc = Lexer.getLoc(); + SMLoc AlignmentLoc = getLexer().getLoc(); int64_t Alignment; if (ParseAbsoluteExpression(Alignment)) return true; @@ -1217,30 +1015,30 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { bool HasFillExpr = false; int64_t FillExpr = 0; int64_t MaxBytesToFill = 0; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); // The fill expression can be omitted while specifying a maximum number of // alignment bytes, e.g: // .align 3,,4 - if (Lexer.isNot(AsmToken::Comma)) { + if (getLexer().isNot(AsmToken::Comma)) { HasFillExpr = true; if (ParseAbsoluteExpression(FillExpr)) return true; } - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); - MaxBytesLoc = Lexer.getLoc(); + MaxBytesLoc = getLexer().getLoc(); if (ParseAbsoluteExpression(MaxBytesToFill)) return true; - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); } } @@ -1282,14 +1080,14 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { // FIXME: This should be using a target hook. bool UseCodeAlign = false; if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>( - Out.getCurrentSection())) + getStreamer().getCurrentSection())) UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) && ValueSize == 1 && UseCodeAlign) { - Out.EmitCodeAlignment(Alignment, MaxBytesToFill); + getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill); } else { // FIXME: Target specific behavior about how the "extra" bytes are filled. - Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); + getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); } return false; @@ -1298,21 +1096,21 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { /// ParseDirectiveSymbolAttribute /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { StringRef Name; if (ParseIdentifier(Name)) return TokError("expected identifier in directive"); - MCSymbol *Sym = CreateSymbol(Name); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - Out.EmitSymbolAttribute(Sym, Attr); + getStreamer().EmitSymbolAttribute(Sym, Attr); - if (Lexer.is(AsmToken::EndOfStatement)) + if (getLexer().is(AsmToken::EndOfStatement)) break; - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); } @@ -1330,20 +1128,20 @@ bool AsmParser::ParseDirectiveELFType() { return TokError("expected identifier in directive"); // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.type' directive"); Lex(); - if (Lexer.isNot(AsmToken::At)) + if (getLexer().isNot(AsmToken::At)) return TokError("expected '@' before type"); Lex(); StringRef Type; SMLoc TypeLoc; - TypeLoc = Lexer.getLoc(); + TypeLoc = getLexer().getLoc(); if (ParseIdentifier(Type)) return TokError("expected symbol type in directive"); @@ -1358,42 +1156,12 @@ bool AsmParser::ParseDirectiveELFType() { if (Attr == MCSA_Invalid) return Error(TypeLoc, "unsupported attribute in '.type' directive"); - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.type' directive"); Lex(); - Out.EmitSymbolAttribute(Sym, Attr); - - return false; -} - -/// ParseDirectiveDarwinSymbolDesc -/// ::= .desc identifier , expression -bool AsmParser::ParseDirectiveDarwinSymbolDesc() { - StringRef Name; - if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); - - // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.desc' directive"); - Lex(); - - SMLoc DescLoc = Lexer.getLoc(); - int64_t DescValue; - if (ParseAbsoluteExpression(DescValue)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.desc' directive"); - - Lex(); - - // Set the n_desc field of this Symbol to this DescValue - Out.EmitSymbolDesc(Sym, DescValue); + getStreamer().EmitSymbolAttribute(Sym, Attr); return false; } @@ -1401,28 +1169,28 @@ bool AsmParser::ParseDirectiveDarwinSymbolDesc() { /// ParseDirectiveComm /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] bool AsmParser::ParseDirectiveComm(bool IsLocal) { - SMLoc IDLoc = Lexer.getLoc(); + SMLoc IDLoc = getLexer().getLoc(); StringRef Name; if (ParseIdentifier(Name)) return TokError("expected identifier in directive"); // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); int64_t Size; - SMLoc SizeLoc = Lexer.getLoc(); + SMLoc SizeLoc = getLexer().getLoc(); if (ParseAbsoluteExpression(Size)) return true; int64_t Pow2Alignment = 0; SMLoc Pow2AlignmentLoc; - if (Lexer.is(AsmToken::Comma)) { + if (getLexer().is(AsmToken::Comma)) { Lex(); - Pow2AlignmentLoc = Lexer.getLoc(); + Pow2AlignmentLoc = getLexer().getLoc(); if (ParseAbsoluteExpression(Pow2Alignment)) return true; @@ -1434,7 +1202,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { } } - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.comm' or '.lcomm' directive"); Lex(); @@ -1458,168 +1226,14 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { // '.lcomm' is equivalent to '.zerofill'. // Create the Symbol as a common or local common with Size and Pow2Alignment if (IsLocal) { - Out.EmitZerofill(Ctx.getMachOSection("__DATA", "__bss", - MCSectionMachO::S_ZEROFILL, 0, - SectionKind::getBSS()), - Sym, Size, 1 << Pow2Alignment); + getStreamer().EmitZerofill(Ctx.getMachOSection( + "__DATA", "__bss", MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS()), + Sym, Size, 1 << Pow2Alignment); return false; } - Out.EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment); - return false; -} - -/// ParseDirectiveDarwinZerofill -/// ::= .zerofill segname , sectname [, identifier , size_expression [ -/// , align_expression ]] -bool AsmParser::ParseDirectiveDarwinZerofill() { - StringRef Segment; - if (ParseIdentifier(Segment)) - return TokError("expected segment name after '.zerofill' directive"); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - StringRef Section; - if (ParseIdentifier(Section)) - return TokError("expected section name after comma in '.zerofill' " - "directive"); - - // If this is the end of the line all that was wanted was to create the - // the section but with no symbol. - if (Lexer.is(AsmToken::EndOfStatement)) { - // Create the zerofill section but no symbol - Out.EmitZerofill(Ctx.getMachOSection(Segment, Section, - MCSectionMachO::S_ZEROFILL, 0, - SectionKind::getBSS())); - return false; - } - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - SMLoc IDLoc = Lexer.getLoc(); - StringRef IDStr; - if (ParseIdentifier(IDStr)) - return TokError("expected identifier in directive"); - - // handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(IDStr); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - int64_t Size; - SMLoc SizeLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Size)) - return true; - - int64_t Pow2Alignment = 0; - SMLoc Pow2AlignmentLoc; - if (Lexer.is(AsmToken::Comma)) { - Lex(); - Pow2AlignmentLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Pow2Alignment)) - return true; - } - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.zerofill' directive"); - - Lex(); - - if (Size < 0) - return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less " - "than zero"); - - // NOTE: The alignment in the directive is a power of 2 value, the assembler - // may internally end up wanting an alignment in bytes. - // FIXME: Diagnose overflow. - if (Pow2Alignment < 0) - return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, " - "can't be less than zero"); - - if (!Sym->isUndefined()) - return Error(IDLoc, "invalid symbol redefinition"); - - // Create the zerofill Symbol with Size and Pow2Alignment - // - // FIXME: Arch specific. - Out.EmitZerofill(Ctx.getMachOSection(Segment, Section, - MCSectionMachO::S_ZEROFILL, 0, - SectionKind::getBSS()), - Sym, Size, 1 << Pow2Alignment); - - return false; -} - -/// ParseDirectiveDarwinTBSS -/// ::= .tbss identifier, size, align -bool AsmParser::ParseDirectiveDarwinTBSS() { - SMLoc IDLoc = Lexer.getLoc(); - StringRef Name; - if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); - - // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - int64_t Size; - SMLoc SizeLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Size)) - return true; - - int64_t Pow2Alignment = 0; - SMLoc Pow2AlignmentLoc; - if (Lexer.is(AsmToken::Comma)) { - Lex(); - Pow2AlignmentLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Pow2Alignment)) - return true; - } - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.tbss' directive"); - - Lex(); - - if (Size < 0) - return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than" - "zero"); - - // FIXME: Diagnose overflow. - if (Pow2Alignment < 0) - return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less" - "than zero"); - - if (!Sym->isUndefined()) - return Error(IDLoc, "invalid symbol redefinition"); - - Out.EmitTBSSSymbol(Ctx.getMachOSection("__DATA", "__thread_bss", - MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, - 0, SectionKind::getThreadBSS()), - Sym, Size, 1 << Pow2Alignment); - - return false; -} - -/// ParseDirectiveDarwinSubsectionsViaSymbols -/// ::= .subsections_via_symbols -bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() { - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.subsections_via_symbols' directive"); - - Lex(); - - Out.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); - + getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment); return false; } @@ -1627,11 +1241,11 @@ bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() { /// ::= .abort [ "abort_string" ] bool AsmParser::ParseDirectiveAbort() { // FIXME: Use loc from directive. - SMLoc Loc = Lexer.getLoc(); + SMLoc Loc = getLexer().getLoc(); StringRef Str = ""; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::String)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::String)) return TokError("expected string in '.abort' directive"); Str = getTok().getString(); @@ -1639,7 +1253,7 @@ bool AsmParser::ParseDirectiveAbort() { Lex(); } - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.abort' directive"); Lex(); @@ -1653,48 +1267,17 @@ bool AsmParser::ParseDirectiveAbort() { return false; } -/// ParseDirectiveLsym -/// ::= .lsym identifier , expression -bool AsmParser::ParseDirectiveDarwinLsym() { - StringRef Name; - if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); - - // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.lsym' directive"); - Lex(); - - const MCExpr *Value; - SMLoc StartLoc = Lexer.getLoc(); - if (ParseExpression(Value)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.lsym' directive"); - - Lex(); - - // We don't currently support this directive. - // - // FIXME: Diagnostic location! - (void) Sym; - return TokError("directive '.lsym' is unsupported"); -} - /// ParseDirectiveInclude /// ::= .include "filename" bool AsmParser::ParseDirectiveInclude() { - if (Lexer.isNot(AsmToken::String)) + if (getLexer().isNot(AsmToken::String)) return TokError("expected string in '.include' directive"); std::string Filename = getTok().getString(); - SMLoc IncludeLoc = Lexer.getLoc(); + SMLoc IncludeLoc = getLexer().getLoc(); Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.include' directive"); // Strip the quotes. @@ -1712,29 +1295,6 @@ bool AsmParser::ParseDirectiveInclude() { return false; } -/// ParseDirectiveDarwinDumpOrLoad -/// ::= ( .dump | .load ) "filename" -bool AsmParser::ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump) { - if (Lexer.isNot(AsmToken::String)) - return TokError("expected string in '.dump' or '.load' directive"); - - Lex(); - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.dump' or '.load' directive"); - - Lex(); - - // FIXME: If/when .dump and .load are implemented they will be done in the - // the assembly parser and not have any need for an MCStreamer API. - if (IsDump) - Warning(IDLoc, "ignoring directive .dump for now"); - else - Warning(IDLoc, "ignoring directive .load for now"); - - return false; -} - /// ParseDirectiveIf /// ::= .if expression bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { @@ -1748,7 +1308,7 @@ bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { if (ParseAbsoluteExpression(ExprValue)) return true; - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.if' directive"); Lex(); @@ -1781,7 +1341,7 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { if (ParseAbsoluteExpression(ExprValue)) return true; - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.elseif' directive"); Lex(); @@ -1795,7 +1355,7 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { /// ParseDirectiveElse /// ::= .else bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.else' directive"); Lex(); @@ -1819,7 +1379,7 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { /// ParseDirectiveEndIf /// ::= .endif bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.endif' directive"); Lex(); @@ -1838,40 +1398,40 @@ bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { /// ParseDirectiveFile /// ::= .file [number] string -bool AsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { +bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { // FIXME: I'm not sure what this is. int64_t FileNumber = -1; - if (Lexer.is(AsmToken::Integer)) { + if (getLexer().is(AsmToken::Integer)) { FileNumber = getTok().getIntVal(); Lex(); - + if (FileNumber < 1) return TokError("file number less than one"); } - if (Lexer.isNot(AsmToken::String)) + if (getLexer().isNot(AsmToken::String)) return TokError("unexpected token in '.file' directive"); - + StringRef Filename = getTok().getString(); Filename = Filename.substr(1, Filename.size()-2); Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.file' directive"); if (FileNumber == -1) - Out.EmitFileDirective(Filename); + getStreamer().EmitFileDirective(Filename); else - Out.EmitDwarfFileDirective(FileNumber, Filename); - + getStreamer().EmitDwarfFileDirective(FileNumber, Filename); + return false; } /// ParseDirectiveLine /// ::= .line [number] -bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Integer)) +bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Integer)) return TokError("unexpected token in '.line' directive"); int64_t LineNumber = getTok().getIntVal(); @@ -1881,8 +1441,8 @@ bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { // FIXME: Do something with the .line. } - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.file' directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.line' directive"); return false; } @@ -1890,8 +1450,8 @@ bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { /// ParseDirectiveLoc /// ::= .loc number [number [number]] -bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { - if (Lexer.isNot(AsmToken::Integer)) +bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::Integer)) return TokError("unexpected token in '.loc' directive"); // FIXME: What are these fields? @@ -1900,16 +1460,16 @@ bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { // FIXME: Validate file. Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Integer)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Integer)) return TokError("unexpected token in '.loc' directive"); int64_t Param2 = getTok().getIntVal(); (void) Param2; Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Integer)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Integer)) return TokError("unexpected token in '.loc' directive"); int64_t Param3 = getTok().getIntVal(); @@ -1920,7 +1480,7 @@ bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { } } - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.file' directive"); return false; diff --git a/lib/MC/MCParser/CMakeLists.txt b/lib/MC/MCParser/CMakeLists.txt index a5c0818..25a7bf4 100644 --- a/lib/MC/MCParser/CMakeLists.txt +++ b/lib/MC/MCParser/CMakeLists.txt @@ -1,7 +1,10 @@ add_llvm_library(LLVMMCParser AsmLexer.cpp AsmParser.cpp + DarwinAsmParser.cpp + ELFAsmParser.cpp MCAsmLexer.cpp MCAsmParser.cpp + MCAsmParserExtension.cpp TargetAsmParser.cpp ) diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp new file mode 100644 index 0000000..7d8639e --- /dev/null +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -0,0 +1,758 @@ +//===- DarwinAsmParser.cpp - Darwin (Mach-O) Assembly Parser --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +using namespace llvm; + +namespace { + +/// \brief Implementation of directive handling which is shared across all +/// Darwin targets. +class DarwinAsmParser : public MCAsmParserExtension { + bool ParseSectionSwitch(const char *Segment, const char *Section, + unsigned TAA = 0, unsigned ImplicitAlign = 0, + unsigned StubSize = 0); + +public: + DarwinAsmParser() {} + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + Parser.AddDirectiveHandler(this, ".desc", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveDesc)); + Parser.AddDirectiveHandler(this, ".lsym", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveLsym)); + Parser.AddDirectiveHandler(this, ".subsections_via_symbols", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols)); + Parser.AddDirectiveHandler(this, ".dump", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveDumpOrLoad)); + Parser.AddDirectiveHandler(this, ".load", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveDumpOrLoad)); + Parser.AddDirectiveHandler(this, ".section", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSection)); + Parser.AddDirectiveHandler(this, ".secure_log_unique", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSecureLogUnique)); + Parser.AddDirectiveHandler(this, ".secure_log_reset", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSecureLogReset)); + Parser.AddDirectiveHandler(this, ".tbss", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveTBSS)); + Parser.AddDirectiveHandler(this, ".zerofill", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveZerofill)); + + // Special section directives. + Parser.AddDirectiveHandler(this, ".const", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveConst)); + Parser.AddDirectiveHandler(this, ".const_data", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveConstData)); + Parser.AddDirectiveHandler(this, ".constructor", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveConstructor)); + Parser.AddDirectiveHandler(this, ".cstring", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveCString)); + Parser.AddDirectiveHandler(this, ".data", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveData)); + Parser.AddDirectiveHandler(this, ".destructor", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveDestructor)); + Parser.AddDirectiveHandler(this, ".dyld", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveDyld)); + Parser.AddDirectiveHandler(this, ".fvmlib_init0", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveFVMLibInit0)); + Parser.AddDirectiveHandler(this, ".fvmlib_init1", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveFVMLibInit1)); + Parser.AddDirectiveHandler(this, ".lazy_symbol_pointer", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers)); + Parser.AddDirectiveHandler(this, ".literal16", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLiteral16)); + Parser.AddDirectiveHandler(this, ".literal4", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLiteral4)); + Parser.AddDirectiveHandler(this, ".literal8", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLiteral8)); + Parser.AddDirectiveHandler(this, ".mod_init_func", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveModInitFunc)); + Parser.AddDirectiveHandler(this, ".mod_term_func", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveModTermFunc)); + Parser.AddDirectiveHandler(this, ".non_lazy_symbol_pointer", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers)); + Parser.AddDirectiveHandler(this, ".objc_cat_cls_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth)); + Parser.AddDirectiveHandler(this, ".objc_cat_inst_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth)); + Parser.AddDirectiveHandler(this, ".objc_category", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCCategory)); + Parser.AddDirectiveHandler(this, ".objc_class", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClass)); + Parser.AddDirectiveHandler(this, ".objc_class_names", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClassNames)); + Parser.AddDirectiveHandler(this, ".objc_class_vars", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClassVars)); + Parser.AddDirectiveHandler(this, ".objc_cls_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClsMeth)); + Parser.AddDirectiveHandler(this, ".objc_cls_refs", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClsRefs)); + Parser.AddDirectiveHandler(this, ".objc_inst_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCInstMeth)); + Parser.AddDirectiveHandler(this, ".objc_instance_vars", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars)); + Parser.AddDirectiveHandler(this, ".objc_message_refs", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs)); + Parser.AddDirectiveHandler(this, ".objc_meta_class", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMetaClass)); + Parser.AddDirectiveHandler(this, ".objc_meth_var_names", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames)); + Parser.AddDirectiveHandler(this, ".objc_meth_var_types", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes)); + Parser.AddDirectiveHandler(this, ".objc_module_info", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo)); + Parser.AddDirectiveHandler(this, ".objc_protocol", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCProtocol)); + Parser.AddDirectiveHandler(this, ".objc_selector_strs", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs)); + Parser.AddDirectiveHandler(this, ".objc_string_object", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCStringObject)); + Parser.AddDirectiveHandler(this, ".objc_symbols", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCSymbols)); + Parser.AddDirectiveHandler(this, ".picsymbol_stub", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectivePICSymbolStub)); + Parser.AddDirectiveHandler(this, ".static_const", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveStaticConst)); + Parser.AddDirectiveHandler(this, ".static_data", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveStaticData)); + Parser.AddDirectiveHandler(this, ".symbol_stub", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveSymbolStub)); + Parser.AddDirectiveHandler(this, ".tdata", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveTData)); + Parser.AddDirectiveHandler(this, ".text", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveText)); + Parser.AddDirectiveHandler(this, ".thread_init_func", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveThreadInitFunc)); + Parser.AddDirectiveHandler(this, ".tlv", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveTLV)); + } + + bool ParseDirectiveDesc(StringRef, SMLoc); + bool ParseDirectiveDumpOrLoad(StringRef, SMLoc); + bool ParseDirectiveLsym(StringRef, SMLoc); + bool ParseDirectiveSection(); + bool ParseDirectiveSecureLogReset(StringRef, SMLoc); + bool ParseDirectiveSecureLogUnique(StringRef, SMLoc); + bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc); + bool ParseDirectiveTBSS(StringRef, SMLoc); + bool ParseDirectiveZerofill(StringRef, SMLoc); + + // Named Section Directive + bool ParseSectionDirectiveConst(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__const"); + } + bool ParseSectionDirectiveStaticConst(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__static_const"); + } + bool ParseSectionDirectiveCString(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveLiteral4(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__literal4", + MCSectionMachO::S_4BYTE_LITERALS, 4); + } + bool ParseSectionDirectiveLiteral8(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__literal8", + MCSectionMachO::S_8BYTE_LITERALS, 8); + } + bool ParseSectionDirectiveLiteral16(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__literal16", + MCSectionMachO::S_16BYTE_LITERALS, 16); + } + bool ParseSectionDirectiveConstructor(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__constructor"); + } + bool ParseSectionDirectiveDestructor(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__destructor"); + } + bool ParseSectionDirectiveFVMLibInit0(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__fvmlib_init0"); + } + bool ParseSectionDirectiveFVMLibInit1(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__fvmlib_init1"); + } + bool ParseSectionDirectiveSymbolStub(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__symbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + // FIXME: Different on PPC and ARM. + 0, 16); + } + bool ParseSectionDirectivePICSymbolStub(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__picsymbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, 26); + } + bool ParseSectionDirectiveData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__data"); + } + bool ParseSectionDirectiveStaticData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__static_data"); + } + bool ParseSectionDirectiveNonLazySymbolPointers(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, 4); + } + bool ParseSectionDirectiveLazySymbolPointers(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, 4); + } + bool ParseSectionDirectiveDyld(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__dyld"); + } + bool ParseSectionDirectiveModInitFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, 4); + } + bool ParseSectionDirectiveModTermFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, 4); + } + bool ParseSectionDirectiveConstData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__const"); + } + bool ParseSectionDirectiveObjCClass(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCMetaClass(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__meta_class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCatClsMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cat_cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCatInstMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cat_inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCProtocol(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__protocol", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCStringObject(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__string_object", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClsMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCInstMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClsRefs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cls_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, 4); + } + bool ParseSectionDirectiveObjCMessageRefs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__message_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, 4); + } + bool ParseSectionDirectiveObjCSymbols(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__symbols", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCategory(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__category", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClassVars(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__class_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCInstanceVars(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__instance_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCModuleInfo(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__module_info", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClassNames(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCMethVarTypes(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCMethVarNames(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCSelectorStrs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__selector_strs", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveTData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_data", + MCSectionMachO::S_THREAD_LOCAL_REGULAR); + } + bool ParseSectionDirectiveText(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); + } + bool ParseSectionDirectiveTLV(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_vars", + MCSectionMachO::S_THREAD_LOCAL_VARIABLES); + } + bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_init", + MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); + } + +}; + +} + +bool DarwinAsmParser::ParseSectionSwitch(const char *Segment, + const char *Section, + unsigned TAA, unsigned Align, + unsigned StubSize) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in section switching directive"); + Lex(); + + // FIXME: Arch specific. + bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack. + getStreamer().SwitchSection(getContext().getMachOSection( + Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() + : SectionKind::getDataRel())); + + // Set the implicit alignment, if any. + // + // FIXME: This isn't really what 'as' does; I think it just uses the implicit + // alignment on the section (e.g., if one manually inserts bytes into the + // section, then just issueing the section switch directive will not realign + // the section. However, this is arguably more reasonable behavior, and there + // is no good reason for someone to intentionally emit incorrectly sized + // values into the implicitly aligned sections. + if (Align) + getStreamer().EmitValueToAlignment(Align, 0, 1, 0); + + return false; +} + +/// ParseDirectiveDesc +/// ::= .desc identifier , expression +bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.desc' directive"); + Lex(); + + int64_t DescValue; + if (getParser().ParseAbsoluteExpression(DescValue)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.desc' directive"); + + Lex(); + + // Set the n_desc field of this Symbol to this DescValue + getStreamer().EmitSymbolDesc(Sym, DescValue); + + return false; +} + +/// ParseDirectiveDumpOrLoad +/// ::= ( .dump | .load ) "filename" +bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive, + SMLoc IDLoc) { + bool IsDump = Directive == ".dump"; + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.dump' or '.load' directive"); + + Lex(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.dump' or '.load' directive"); + + Lex(); + + // FIXME: If/when .dump and .load are implemented they will be done in the + // the assembly parser and not have any need for an MCStreamer API. + if (IsDump) + Warning(IDLoc, "ignoring directive .dump for now"); + else + Warning(IDLoc, "ignoring directive .load for now"); + + return false; +} + +/// ParseDirectiveLsym +/// ::= .lsym identifier , expression +bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.lsym' directive"); + Lex(); + + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.lsym' directive"); + + Lex(); + + // We don't currently support this directive. + // + // FIXME: Diagnostic location! + (void) Sym; + return TokError("directive '.lsym' is unsupported"); +} + +/// ParseDirectiveSection: +/// ::= .section identifier (',' identifier)* +bool DarwinAsmParser::ParseDirectiveSection() { + SMLoc Loc = getLexer().getLoc(); + + StringRef SectionName; + if (getParser().ParseIdentifier(SectionName)) + return Error(Loc, "expected identifier after '.section' directive"); + + // Verify there is a following comma. + if (!getLexer().is(AsmToken::Comma)) + return TokError("unexpected token in '.section' directive"); + + std::string SectionSpec = SectionName; + SectionSpec += ","; + + // Add all the tokens until the end of the line, ParseSectionSpecifier will + // handle this. + StringRef EOL = getLexer().LexUntilEndOfStatement(); + SectionSpec.append(EOL.begin(), EOL.end()); + + Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.section' directive"); + Lex(); + + + StringRef Segment, Section; + unsigned TAA, StubSize; + std::string ErrorStr = + MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section, + TAA, StubSize); + + if (!ErrorStr.empty()) + return Error(Loc, ErrorStr.c_str()); + + // FIXME: Arch specific. + bool isText = Segment == "__TEXT"; // FIXME: Hack. + getStreamer().SwitchSection(getContext().getMachOSection( + Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() + : SectionKind::getDataRel())); + return false; +} + +/// ParseDirectiveSecureLogUnique +/// ::= .secure_log_unique "log message" +bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { + std::string LogMessage; + + if (getLexer().isNot(AsmToken::String)) + LogMessage = ""; + else{ + LogMessage = getTok().getString(); + Lex(); + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.secure_log_unique' directive"); + + if (getContext().getSecureLogUsed() != false) + return Error(IDLoc, ".secure_log_unique specified multiple times"); + + char *SecureLogFile = getContext().getSecureLogFile(); + if (SecureLogFile == NULL) + return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE " + "environment variable unset."); + + raw_ostream *OS = getContext().getSecureLog(); + if (OS == NULL) { + std::string Err; + OS = new raw_fd_ostream(SecureLogFile, Err, raw_fd_ostream::F_Append); + if (!Err.empty()) { + delete OS; + return Error(IDLoc, Twine("can't open secure log file: ") + + SecureLogFile + " (" + Err + ")"); + } + getContext().setSecureLog(OS); + } + + int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc); + *OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier() + << ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":" + << LogMessage + "\n"; + + getContext().setSecureLogUsed(true); + + return false; +} + +/// ParseDirectiveSecureLogReset +/// ::= .secure_log_reset +bool DarwinAsmParser::ParseDirectiveSecureLogReset(StringRef, SMLoc IDLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.secure_log_reset' directive"); + + Lex(); + + getContext().setSecureLogUsed(false); + + return false; +} + +/// ParseDirectiveSubsectionsViaSymbols +/// ::= .subsections_via_symbols +bool DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.subsections_via_symbols' directive"); + + Lex(); + + getStreamer().EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); + + return false; +} + +/// ParseDirectiveTBSS +/// ::= .tbss identifier, size, align +bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) { + SMLoc IDLoc = getLexer().getLoc(); + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.tbss' directive"); + + Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than" + "zero"); + + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less" + "than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + getStreamer().EmitTBSSSymbol(getContext().getMachOSection( + "__DATA", "__thread_bss", + MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, + 0, SectionKind::getThreadBSS()), + Sym, Size, 1 << Pow2Alignment); + + return false; +} + +/// ParseDirectiveZerofill +/// ::= .zerofill segname , sectname [, identifier , size_expression [ +/// , align_expression ]] +bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) { + StringRef Segment; + if (getParser().ParseIdentifier(Segment)) + return TokError("expected segment name after '.zerofill' directive"); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + StringRef Section; + if (getParser().ParseIdentifier(Section)) + return TokError("expected section name after comma in '.zerofill' " + "directive"); + + // If this is the end of the line all that was wanted was to create the + // the section but with no symbol. + if (getLexer().is(AsmToken::EndOfStatement)) { + // Create the zerofill section but no symbol + getStreamer().EmitZerofill(getContext().getMachOSection( + Segment, Section, MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS())); + return false; + } + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + SMLoc IDLoc = getLexer().getLoc(); + StringRef IDStr; + if (getParser().ParseIdentifier(IDStr)) + return TokError("expected identifier in directive"); + + // handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(IDStr); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.zerofill' directive"); + + Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less " + "than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assembler + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, " + "can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // Create the zerofill Symbol with Size and Pow2Alignment + // + // FIXME: Arch specific. + getStreamer().EmitZerofill(getContext().getMachOSection( + Segment, Section, MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS()), + Sym, Size, 1 << Pow2Alignment); + + return false; +} + +namespace llvm { + +MCAsmParserExtension *createDarwinAsmParser() { + return new DarwinAsmParser; +} + +} diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp new file mode 100644 index 0000000..7a54dd3 --- /dev/null +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -0,0 +1,68 @@ +//===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +using namespace llvm; + +namespace { + +class ELFAsmParser : public MCAsmParserExtension { + bool ParseSectionSwitch(StringRef Section, unsigned Type, + unsigned Flags, SectionKind Kind); + +public: + ELFAsmParser() {} + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + Parser.AddDirectiveHandler(this, ".data", MCAsmParser::DirectiveHandler( + &ELFAsmParser::ParseSectionDirectiveData)); + Parser.AddDirectiveHandler(this, ".text", MCAsmParser::DirectiveHandler( + &ELFAsmParser::ParseSectionDirectiveText)); + } + + bool ParseSectionDirectiveData(StringRef, SMLoc) { + return ParseSectionSwitch(".data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + } + bool ParseSectionDirectiveText(StringRef, SMLoc) { + return ParseSectionSwitch(".text", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_EXECINSTR | + MCSectionELF::SHF_ALLOC, SectionKind::getText()); + } +}; + +} + +bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type, + unsigned Flags, SectionKind Kind) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in section switching directive"); + Lex(); + + getStreamer().SwitchSection(getContext().getELFSection( + Section, Type, Flags, Kind)); + + return false; +} + +namespace llvm { + +MCAsmParserExtension *createELFAsmParser() { + return new ELFAsmParser; +} + +} diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp index e5b2955..dceece7 100644 --- a/lib/MC/MCParser/MCAsmLexer.cpp +++ b/lib/MC/MCParser/MCAsmLexer.cpp @@ -12,12 +12,16 @@ using namespace llvm; -MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()) { +MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), TokStart(0) { } MCAsmLexer::~MCAsmLexer() { } +SMLoc MCAsmLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); +} + SMLoc AsmToken::getLoc() const { return SMLoc::getFromPointer(Str.data()); } diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp index b8c2054..bee3064 100644 --- a/lib/MC/MCParser/MCAsmParser.cpp +++ b/lib/MC/MCParser/MCAsmParser.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/Support/SourceMgr.h" @@ -23,6 +24,11 @@ const AsmToken &MCAsmParser::getTok() { return getLexer().getTok(); } +bool MCAsmParser::TokError(const char *Msg) { + Error(getLexer().getLoc(), Msg); + return true; +} + bool MCAsmParser::ParseExpression(const MCExpr *&Res) { SMLoc L; return ParseExpression(Res, L); diff --git a/lib/MC/MCParser/MCAsmParserExtension.cpp b/lib/MC/MCParser/MCAsmParserExtension.cpp new file mode 100644 index 0000000..c30d306 --- /dev/null +++ b/lib/MC/MCParser/MCAsmParserExtension.cpp @@ -0,0 +1,21 @@ +//===-- MCAsmParserExtension.cpp - Asm Parser Hooks -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +using namespace llvm; + +MCAsmParserExtension::MCAsmParserExtension() { +} + +MCAsmParserExtension::~MCAsmParserExtension() { +} + +void MCAsmParserExtension::Initialize(MCAsmParser &Parser) { + this->Parser = &Parser; +} diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp index d57bb0c..eb53160 100644 --- a/lib/MC/MCSectionCOFF.cpp +++ b/lib/MC/MCSectionCOFF.cpp @@ -44,28 +44,28 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << 'w'; else OS << 'r'; - if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE) + if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) OS << 'n'; OS << "\"\n"; - if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_LNK_COMDAT) { + if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) { switch (Selection) { - case IMAGE_COMDAT_SELECT_NODUPLICATES: + case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: OS << "\t.linkonce one_only\n"; break; - case IMAGE_COMDAT_SELECT_ANY: + case COFF::IMAGE_COMDAT_SELECT_ANY: OS << "\t.linkonce discard\n"; break; - case IMAGE_COMDAT_SELECT_SAME_SIZE: + case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE: OS << "\t.linkonce same_size\n"; break; - case IMAGE_COMDAT_SELECT_EXACT_MATCH: + case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH: OS << "\t.linkonce same_contents\n"; break; //NOTE: as of binutils 2.20, there is no way to specifiy select largest // with the .linkonce directive. For now, we treat it as an invalid // comdat selection value. - case IMAGE_COMDAT_SELECT_LARGEST: + case COFF::IMAGE_COMDAT_SELECT_LARGEST: // OS << "\t.linkonce largest\n"; // break; default: diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 3207e99..7ca0951 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -33,6 +33,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) { default: llvm_unreachable("invalid fixup kind!"); case X86::reloc_pcrel_1byte: case FK_Data_1: return 0; + case X86::reloc_pcrel_2byte: case FK_Data_2: return 1; case X86::reloc_pcrel_4byte: case X86::reloc_riprel_4byte: @@ -47,6 +48,7 @@ static bool isFixupKindPCRel(unsigned Kind) { default: return false; case X86::reloc_pcrel_1byte: + case X86::reloc_pcrel_2byte: case X86::reloc_pcrel_4byte: case X86::reloc_riprel_4byte: case X86::reloc_riprel_4byte_movq_load: @@ -738,6 +740,51 @@ public: Relocations[Fragment->getParent()].push_back(MRE); } + void RecordTLVPRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && + !Is64Bit && + "Should only be called with a 32-bit TLVP relocation!"); + + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = 0; + + // Get the symbol data. + MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + unsigned Index = SD_A->getIndex(); + + // We're only going to have a second symbol in pic mode and it'll be a + // subtraction from the picbase. For 32-bit pic the addend is the difference + // between the picbase and the next address. For 32-bit static the addend + // is zero. + if (Target.getSymB()) { + // If this is a subtraction then we're pcrel. + uint32_t FixupAddress = + Layout.getFragmentAddress(Fragment) + Fixup.getOffset(); + MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); + IsPCRel = 1; + FixedValue = (FixupAddress - Layout.getSymbolAddress(SD_B) + + Target.getConstant()); + FixedValue += 1 << Log2Size; + } else { + FixedValue = 0; + } + + // struct relocation_info (8 bytes) + MachRelocationEntry MRE; + MRE.Word0 = Value; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (1 << 27) | // Extern + (RIT_TLV << 28)); // Type + Relocations[Fragment->getParent()].push_back(MRE); + } + void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { @@ -749,6 +796,12 @@ public: unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind()); unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + // If this is a 32-bit TLVP reloc it's handled a bit differently. + if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { + RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); + return; + } + // If this is a difference or a defined symbol plus an offset, then we need // a scattered relocation entry. // Differences always require scattered relocations. @@ -772,7 +825,6 @@ public: // See <reloc.h>. uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - uint32_t Value = 0; unsigned Index = 0; unsigned IsExtern = 0; unsigned Type = 0; @@ -783,7 +835,6 @@ public: // FIXME: Currently, these are never generated (see code below). I cannot // find a case where they are actually emitted. Type = RIT_Vanilla; - Value = 0; } else { // Check whether we need an external or internal relocation. if (doesSymbolRequireExternRelocation(SD)) { @@ -794,11 +845,9 @@ public: // undefined. This occurs with weak definitions, for example. if (!SD->Symbol->isUndefined()) FixedValue -= Layout.getSymbolAddress(SD); - Value = 0; } else { // The index is the section ordinal (1-based). Index = SD->getFragment()->getParent()->getOrdinal() + 1; - Value = Layout.getSymbolAddress(SD); } Type = RIT_Vanilla; @@ -898,7 +947,7 @@ public: const MCSymbol &Symbol = it->getSymbol(); // Ignore non-linker visible symbols. - if (!Asm.isSymbolLinkerVisible(it)) + if (!Asm.isSymbolLinkerVisible(it->getSymbol())) continue; if (!it->isExternal() && !Symbol.isUndefined()) @@ -934,7 +983,7 @@ public: const MCSymbol &Symbol = it->getSymbol(); // Ignore non-linker visible symbols. - if (!Asm.isSymbolLinkerVisible(it)) + if (!Asm.isSymbolLinkerVisible(it->getSymbol())) continue; if (it->isExternal() || Symbol.isUndefined()) diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp new file mode 100644 index 0000000..6804766 --- /dev/null +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -0,0 +1,71 @@ +//===-- llvm/MC/WinCOFFObjectWriter.cpp -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of a Win32 COFF object file writer. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "WinCOFFObjectWriter" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +using namespace llvm; + +namespace { + + class WinCOFFObjectWriter : public MCObjectWriter { + public: + WinCOFFObjectWriter(raw_ostream &OS); + + // MCObjectWriter interface implementation. + + void ExecutePostLayoutBinding(MCAssembler &Asm); + + void RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); + + void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout); + }; +} + +WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS) + : MCObjectWriter(OS, true) { +} + +//////////////////////////////////////////////////////////////////////////////// +// MCObjectWriter interface implementations + +void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { +} + +void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { +} + +void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm, + const MCAsmLayout &Layout) { +} + +//------------------------------------------------------------------------------ +// WinCOFFObjectWriter factory function + +namespace llvm { + MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS) { + return new WinCOFFObjectWriter(OS); + } +} diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp new file mode 100644 index 0000000..1030cdb --- /dev/null +++ b/lib/MC/WinCOFFStreamer.cpp @@ -0,0 +1,198 @@ +//===-- llvm/MC/WinCOFFStreamer.cpp -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of a Win32 COFF object file streamer. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "WinCOFFStreamer" + +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/Target/TargetAsmBackend.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define dbg_notimpl(x) \ + do { dbgs() << "not implemented, " << __FUNCTION__ << " (" << x << ")"; \ + abort(); } while (false); + +namespace { +class WinCOFFStreamer : public MCObjectStreamer { +public: + WinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS); + + // MCStreamer interface + + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol); + virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void EmitCOFFSymbolType(int Type); + virtual void EndCOFFSymbolDef(); + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size,unsigned ByteAlignment); + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment); + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value); + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, + unsigned ValueSize, unsigned MaxBytesToEmit); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit); + virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value); + virtual void EmitFileDirective(StringRef Filename); + virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename); + virtual void EmitInstruction(const MCInst &Instruction); + virtual void Finish(); +}; +} // end anonymous namespace. + +WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS) + : MCObjectStreamer(Context, TAB, OS, &CE) { +} + +// MCStreamer interface + +void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) { +} + +void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + dbg_notimpl("Flag = " << Flag); +} + +void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { +} + +void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { +} + +void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + dbg_notimpl("Symbol = " << Symbol->getName() << ", DescValue = "<< DescValue); +} + +void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) { +} + +void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { +} + +void WinCOFFStreamer::EmitCOFFSymbolType(int Type) { +} + +void WinCOFFStreamer::EndCOFFSymbolDef() { +} + +void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + dbg_notimpl("Symbol = " << Symbol->getName() << ", Value = " << *Value); +} + +void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { +} + +void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { +} + +void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size,unsigned ByteAlignment) { + MCSectionCOFF const *SectionCOFF = + static_cast<MCSectionCOFF const *>(Section); + + dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " << + Symbol->getName() << ", Size = " << Size << ", ByteAlignment = " + << ByteAlignment); +} + +void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + MCSectionCOFF const *SectionCOFF = + static_cast<MCSectionCOFF const *>(Section); + + dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " << + Symbol->getName() << ", Size = " << Size << ", ByteAlignment = " + << ByteAlignment); +} + +void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { +} + +void WinCOFFStreamer::EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { +} + +void WinCOFFStreamer::EmitGPRel32Value(const MCExpr *Value) { + dbg_notimpl("Value = '" << *Value); +} + +void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) { +} + +void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0) { +} + +void WinCOFFStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0) { + dbg_notimpl("Offset = '" << *Offset << "', Value = " << Value); +} + +void WinCOFFStreamer::EmitFileDirective(StringRef Filename) { + // Ignore for now, linkers don't care, and proper debug + // info will be a much large effort. +} + +void WinCOFFStreamer::EmitDwarfFileDirective(unsigned FileNo, + StringRef Filename) { + dbg_notimpl("FileNo = " << FileNo << ", Filename = '" << Filename << "'"); +} + +void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) { +} + +void WinCOFFStreamer::Finish() { + MCObjectStreamer::Finish(); +} + +namespace llvm +{ + MCStreamer *createWinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS) { + return new WinCOFFStreamer(Context, TAB, CE, OS); + } +} diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index f1347f9..366d2f7 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMSupport ConstantRange.cpp Debug.cpp DeltaAlgorithm.cpp + DAGDeltaAlgorithm.cpp Dwarf.cpp ErrorHandling.cpp FileUtilities.cpp diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp new file mode 100644 index 0000000..8145664 --- /dev/null +++ b/lib/Support/DAGDeltaAlgorithm.cpp @@ -0,0 +1,357 @@ +//===--- DAGDeltaAlgorithm.cpp - A DAG Minimization Algorithm --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// +// +// The algorithm we use attempts to exploit the dependency information by +// minimizing top-down. We start by constructing an initial root set R, and +// then iteratively: +// +// 1. Minimize the set R using the test predicate: +// P'(S) = P(S union pred*(S)) +// +// 2. Extend R to R' = R union pred(R). +// +// until a fixed point is reached. +// +// The idea is that we want to quickly prune entire portions of the graph, so we +// try to find high-level nodes that can be eliminated with all of their +// dependents. +// +// FIXME: The current algorithm doesn't actually provide a strong guarantee +// about the minimality of the result. The problem is that after adding nodes to +// the required set, we no longer consider them for elimination. For strictly +// well formed predicates, this doesn't happen, but it commonly occurs in +// practice when there are unmodelled dependencies. I believe we can resolve +// this by allowing the required set to be minimized as well, but need more test +// cases first. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DAGDeltaAlgorithm.h" +#include "llvm/ADT/DeltaAlgorithm.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <map> +using namespace llvm; + +namespace { + +class DAGDeltaAlgorithmImpl { + friend class DeltaActiveSetHelper; + +public: + typedef DAGDeltaAlgorithm::change_ty change_ty; + typedef DAGDeltaAlgorithm::changeset_ty changeset_ty; + typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty; + typedef DAGDeltaAlgorithm::edge_ty edge_ty; + +private: + typedef std::vector<change_ty>::iterator pred_iterator_ty; + typedef std::vector<change_ty>::iterator succ_iterator_ty; + typedef std::set<change_ty>::iterator pred_closure_iterator_ty; + typedef std::set<change_ty>::iterator succ_closure_iterator_ty; + + DAGDeltaAlgorithm &DDA; + + const changeset_ty &Changes; + const std::vector<edge_ty> &Dependencies; + + std::vector<change_ty> Roots; + + /// Cache of failed test results. Successful test results are never cached + /// since we always reduce following a success. We maintain an independent + /// cache from that used by the individual delta passes because we may get + /// hits across multiple individual delta invocations. + mutable std::set<changeset_ty> FailedTestsCache; + + // FIXME: Gross. + std::map<change_ty, std::vector<change_ty> > Predecessors; + std::map<change_ty, std::vector<change_ty> > Successors; + + std::map<change_ty, std::set<change_ty> > PredClosure; + std::map<change_ty, std::set<change_ty> > SuccClosure; + +private: + pred_iterator_ty pred_begin(change_ty Node) { + assert(Predecessors.count(Node) && "Invalid node!"); + return Predecessors[Node].begin(); + } + pred_iterator_ty pred_end(change_ty Node) { + assert(Predecessors.count(Node) && "Invalid node!"); + return Predecessors[Node].end(); + } + + pred_closure_iterator_ty pred_closure_begin(change_ty Node) { + assert(PredClosure.count(Node) && "Invalid node!"); + return PredClosure[Node].begin(); + } + pred_closure_iterator_ty pred_closure_end(change_ty Node) { + assert(PredClosure.count(Node) && "Invalid node!"); + return PredClosure[Node].end(); + } + + succ_iterator_ty succ_begin(change_ty Node) { + assert(Successors.count(Node) && "Invalid node!"); + return Successors[Node].begin(); + } + succ_iterator_ty succ_end(change_ty Node) { + assert(Successors.count(Node) && "Invalid node!"); + return Successors[Node].end(); + } + + succ_closure_iterator_ty succ_closure_begin(change_ty Node) { + assert(SuccClosure.count(Node) && "Invalid node!"); + return SuccClosure[Node].begin(); + } + succ_closure_iterator_ty succ_closure_end(change_ty Node) { + assert(SuccClosure.count(Node) && "Invalid node!"); + return SuccClosure[Node].end(); + } + + void UpdatedSearchState(const changeset_ty &Changes, + const changesetlist_ty &Sets, + const changeset_ty &Required) { + DDA.UpdatedSearchState(Changes, Sets, Required); + } + + /// ExecuteOneTest - Execute a single test predicate on the change set \arg S. + bool ExecuteOneTest(const changeset_ty &S) { + // Check dependencies invariant. + DEBUG({ + for (changeset_ty::const_iterator it = S.begin(), + ie = S.end(); it != ie; ++it) + for (succ_iterator_ty it2 = succ_begin(*it), + ie2 = succ_end(*it); it2 != ie2; ++it2) + assert(S.count(*it2) && "Attempt to run invalid changeset!"); + }); + + return DDA.ExecuteOneTest(S); + } + +public: + DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA, + const changeset_ty &_Changes, + const std::vector<edge_ty> &_Dependencies); + + changeset_ty Run(); + + /// GetTestResult - Get the test result for the active set \arg Changes with + /// \arg Required changes from the cache, executing the test if necessary. + /// + /// \param Changes - The set of active changes being minimized, which should + /// have their pred closure included in the test. + /// \param Required - The set of changes which have previously been + /// established to be required. + /// \return - The test result. + bool GetTestResult(const changeset_ty &Changes, const changeset_ty &Required); +}; + +/// Helper object for minimizing an active set of changes. +class DeltaActiveSetHelper : public DeltaAlgorithm { + DAGDeltaAlgorithmImpl &DDAI; + + const changeset_ty &Required; + +protected: + /// UpdatedSearchState - Callback used when the search state changes. + virtual void UpdatedSearchState(const changeset_ty &Changes, + const changesetlist_ty &Sets) { + DDAI.UpdatedSearchState(Changes, Sets, Required); + } + + virtual bool ExecuteOneTest(const changeset_ty &S) { + return DDAI.GetTestResult(S, Required); + } + +public: + DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &_DDAI, + const changeset_ty &_Required) + : DDAI(_DDAI), Required(_Required) {} +}; + +} + +DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA, + const changeset_ty &_Changes, + const std::vector<edge_ty> + &_Dependencies) + : DDA(_DDA), + Changes(_Changes), + Dependencies(_Dependencies) +{ + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) { + Predecessors.insert(std::make_pair(*it, std::vector<change_ty>())); + Successors.insert(std::make_pair(*it, std::vector<change_ty>())); + } + for (std::vector<edge_ty>::const_iterator it = Dependencies.begin(), + ie = Dependencies.end(); it != ie; ++it) { + Predecessors[it->second].push_back(it->first); + Successors[it->first].push_back(it->second); + } + + // Compute the roots. + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + if (succ_begin(*it) == succ_end(*it)) + Roots.push_back(*it); + + // Pre-compute the closure of the successor relation. + std::vector<change_ty> Worklist(Roots.begin(), Roots.end()); + while (!Worklist.empty()) { + change_ty Change = Worklist.back(); + Worklist.pop_back(); + + std::set<change_ty> &ChangeSuccs = SuccClosure[Change]; + for (pred_iterator_ty it = pred_begin(Change), + ie = pred_end(Change); it != ie; ++it) { + SuccClosure[*it].insert(Change); + SuccClosure[*it].insert(ChangeSuccs.begin(), ChangeSuccs.end()); + Worklist.push_back(*it); + } + } + + // Invert to form the predecessor closure map. + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + PredClosure.insert(std::make_pair(*it, std::set<change_ty>())); + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + for (succ_closure_iterator_ty it2 = succ_closure_begin(*it), + ie2 = succ_closure_end(*it); it2 != ie2; ++it2) + PredClosure[*it2].insert(*it); + + // Dump useful debug info. + DEBUG({ + llvm::errs() << "-- DAGDeltaAlgorithmImpl --\n"; + llvm::errs() << "Changes: ["; + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) { + if (it != Changes.begin()) llvm::errs() << ", "; + llvm::errs() << *it; + + if (succ_begin(*it) != succ_end(*it)) { + llvm::errs() << "("; + for (succ_iterator_ty it2 = succ_begin(*it), + ie2 = succ_end(*it); it2 != ie2; ++it2) { + if (it2 != succ_begin(*it)) llvm::errs() << ", "; + llvm::errs() << "->" << *it2; + } + llvm::errs() << ")"; + } + } + llvm::errs() << "]\n"; + + llvm::errs() << "Roots: ["; + for (std::vector<change_ty>::const_iterator it = Roots.begin(), + ie = Roots.end(); it != ie; ++it) { + if (it != Roots.begin()) llvm::errs() << ", "; + llvm::errs() << *it; + } + llvm::errs() << "]\n"; + + llvm::errs() << "Predecessor Closure:\n"; + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) { + llvm::errs() << format(" %-4d: [", *it); + for (pred_closure_iterator_ty it2 = pred_closure_begin(*it), + ie2 = pred_closure_end(*it); it2 != ie2; ++it2) { + if (it2 != pred_closure_begin(*it)) llvm::errs() << ", "; + llvm::errs() << *it2; + } + llvm::errs() << "]\n"; + } + + llvm::errs() << "Successor Closure:\n"; + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) { + llvm::errs() << format(" %-4d: [", *it); + for (succ_closure_iterator_ty it2 = succ_closure_begin(*it), + ie2 = succ_closure_end(*it); it2 != ie2; ++it2) { + if (it2 != succ_closure_begin(*it)) llvm::errs() << ", "; + llvm::errs() << *it2; + } + llvm::errs() << "]\n"; + } + + llvm::errs() << "\n\n"; + }); +} + +bool DAGDeltaAlgorithmImpl::GetTestResult(const changeset_ty &Changes, + const changeset_ty &Required) { + changeset_ty Extended(Required); + Extended.insert(Changes.begin(), Changes.end()); + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + Extended.insert(pred_closure_begin(*it), pred_closure_end(*it)); + + if (FailedTestsCache.count(Extended)) + return false; + + bool Result = ExecuteOneTest(Extended); + if (!Result) + FailedTestsCache.insert(Extended); + + return Result; +} + +DAGDeltaAlgorithm::changeset_ty +DAGDeltaAlgorithmImpl::Run() { + // The current set of changes we are minimizing, starting at the roots. + changeset_ty CurrentSet(Roots.begin(), Roots.end()); + + // The set of required changes. + changeset_ty Required; + + // Iterate until the active set of changes is empty. Convergence is guaranteed + // assuming input was a DAG. + // + // Invariant: CurrentSet intersect Required == {} + // Invariant: Required == (Required union succ*(Required)) + while (!CurrentSet.empty()) { + DEBUG({ + llvm::errs() << "DAG_DD - " << CurrentSet.size() << " active changes, " + << Required.size() << " required changes\n"; + }); + + // Minimize the current set of changes. + DeltaActiveSetHelper Helper(*this, Required); + changeset_ty CurrentMinSet = Helper.Run(CurrentSet); + + // Update the set of required changes. Since + // CurrentMinSet subset CurrentSet + // and after the last iteration, + // succ(CurrentSet) subset Required + // then + // succ(CurrentMinSet) subset Required + // and our invariant on Required is maintained. + Required.insert(CurrentMinSet.begin(), CurrentMinSet.end()); + + // Replace the current set with the predecssors of the minimized set of + // active changes. + CurrentSet.clear(); + for (changeset_ty::const_iterator it = CurrentMinSet.begin(), + ie = CurrentMinSet.end(); it != ie; ++it) + CurrentSet.insert(pred_begin(*it), pred_end(*it)); + + // FIXME: We could enforce CurrentSet intersect Required == {} here if we + // wanted to protect against cyclic graphs. + } + + return Required; +} + +DAGDeltaAlgorithm::changeset_ty +DAGDeltaAlgorithm::Run(const changeset_ty &Changes, + const std::vector<edge_ty> &Dependencies) { + return DAGDeltaAlgorithmImpl(*this, Changes, Dependencies).Run(); +} diff --git a/lib/Support/DeltaAlgorithm.cpp b/lib/Support/DeltaAlgorithm.cpp index d176548..9e52874 100644 --- a/lib/Support/DeltaAlgorithm.cpp +++ b/lib/Support/DeltaAlgorithm.cpp @@ -30,10 +30,10 @@ void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) { // FIXME: This is really slow. changeset_ty LHS, RHS; - unsigned idx = 0; + unsigned idx = 0, N = S.size() / 2; for (changeset_ty::const_iterator it = S.begin(), ie = S.end(); it != ie; ++it, ++idx) - ((idx & 1) ? LHS : RHS).insert(*it); + ((idx < N) ? LHS : RHS).insert(*it); if (!LHS.empty()) Res.push_back(LHS); if (!RHS.empty()) diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index c19c2d6..96ce9d3 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -86,8 +86,8 @@ const char *llvm::dwarf::TagString(unsigned Tag) { /// const char *llvm::dwarf::ChildrenString(unsigned Children) { switch (Children) { - case DW_CHILDREN_no: return "CHILDREN_no"; - case DW_CHILDREN_yes: return "CHILDREN_yes"; + case DW_CHILDREN_no: return "DW_CHILDREN_no"; + case DW_CHILDREN_yes: return "DW_CHILDREN_yes"; } return 0; } @@ -207,27 +207,27 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { /// const char *llvm::dwarf::FormEncodingString(unsigned Encoding) { switch (Encoding) { - case DW_FORM_addr: return "FORM_addr"; - case DW_FORM_block2: return "FORM_block2"; - case DW_FORM_block4: return "FORM_block4"; - case DW_FORM_data2: return "FORM_data2"; - case DW_FORM_data4: return "FORM_data4"; - case DW_FORM_data8: return "FORM_data8"; - case DW_FORM_string: return "FORM_string"; - case DW_FORM_block: return "FORM_block"; - case DW_FORM_block1: return "FORM_block1"; - case DW_FORM_data1: return "FORM_data1"; - case DW_FORM_flag: return "FORM_flag"; - case DW_FORM_sdata: return "FORM_sdata"; - case DW_FORM_strp: return "FORM_strp"; - case DW_FORM_udata: return "FORM_udata"; - case DW_FORM_ref_addr: return "FORM_ref_addr"; - case DW_FORM_ref1: return "FORM_ref1"; - case DW_FORM_ref2: return "FORM_ref2"; - case DW_FORM_ref4: return "FORM_ref4"; - case DW_FORM_ref8: return "FORM_ref8"; - case DW_FORM_ref_udata: return "FORM_ref_udata"; - case DW_FORM_indirect: return "FORM_indirect"; + case DW_FORM_addr: return "DW_FORM_addr"; + case DW_FORM_block2: return "DW_FORM_block2"; + case DW_FORM_block4: return "DW_FORM_block4"; + case DW_FORM_data2: return "DW_FORM_data2"; + case DW_FORM_data4: return "DW_FORM_data4"; + case DW_FORM_data8: return "DW_FORM_data8"; + case DW_FORM_string: return "DW_FORM_string"; + case DW_FORM_block: return "DW_FORM_block"; + case DW_FORM_block1: return "DW_FORM_block1"; + case DW_FORM_data1: return "DW_FORM_data1"; + case DW_FORM_flag: return "DW_FORM_flag"; + case DW_FORM_sdata: return "DW_FORM_sdata"; + case DW_FORM_strp: return "DW_FORM_strp"; + case DW_FORM_udata: return "DW_FORM_udata"; + case DW_FORM_ref_addr: return "DW_FORM_ref_addr"; + case DW_FORM_ref1: return "DW_FORM_ref1"; + case DW_FORM_ref2: return "DW_FORM_ref2"; + case DW_FORM_ref4: return "DW_FORM_ref4"; + case DW_FORM_ref8: return "DW_FORM_ref8"; + case DW_FORM_ref_udata: return "DW_FORM_ref_udata"; + case DW_FORM_indirect: return "DW_FORM_indirect"; } return 0; } @@ -236,72 +236,159 @@ const char *llvm::dwarf::FormEncodingString(unsigned Encoding) { /// encoding. const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) { switch (Encoding) { - case DW_OP_addr: return "OP_addr"; - case DW_OP_deref: return "OP_deref"; - case DW_OP_const1u: return "OP_const1u"; - case DW_OP_const1s: return "OP_const1s"; - case DW_OP_const2u: return "OP_const2u"; - case DW_OP_const2s: return "OP_const2s"; - case DW_OP_const4u: return "OP_const4u"; - case DW_OP_const4s: return "OP_const4s"; - case DW_OP_const8u: return "OP_const8u"; - case DW_OP_const8s: return "OP_const8s"; - case DW_OP_constu: return "OP_constu"; - case DW_OP_consts: return "OP_consts"; - case DW_OP_dup: return "OP_dup"; - case DW_OP_drop: return "OP_drop"; - case DW_OP_over: return "OP_over"; - case DW_OP_pick: return "OP_pick"; - case DW_OP_swap: return "OP_swap"; - case DW_OP_rot: return "OP_rot"; - case DW_OP_xderef: return "OP_xderef"; - case DW_OP_abs: return "OP_abs"; - case DW_OP_and: return "OP_and"; - case DW_OP_div: return "OP_div"; - case DW_OP_minus: return "OP_minus"; - case DW_OP_mod: return "OP_mod"; - case DW_OP_mul: return "OP_mul"; - case DW_OP_neg: return "OP_neg"; - case DW_OP_not: return "OP_not"; - case DW_OP_or: return "OP_or"; - case DW_OP_plus: return "OP_plus"; - case DW_OP_plus_uconst: return "OP_plus_uconst"; - case DW_OP_shl: return "OP_shl"; - case DW_OP_shr: return "OP_shr"; - case DW_OP_shra: return "OP_shra"; - case DW_OP_xor: return "OP_xor"; - case DW_OP_skip: return "OP_skip"; - case DW_OP_bra: return "OP_bra"; - case DW_OP_eq: return "OP_eq"; - case DW_OP_ge: return "OP_ge"; - case DW_OP_gt: return "OP_gt"; - case DW_OP_le: return "OP_le"; - case DW_OP_lt: return "OP_lt"; - case DW_OP_ne: return "OP_ne"; - case DW_OP_lit0: return "OP_lit0"; - case DW_OP_lit1: return "OP_lit1"; - case DW_OP_lit31: return "OP_lit31"; - case DW_OP_reg0: return "OP_reg0"; - case DW_OP_reg1: return "OP_reg1"; - case DW_OP_reg31: return "OP_reg31"; - case DW_OP_breg0: return "OP_breg0"; - case DW_OP_breg1: return "OP_breg1"; - case DW_OP_breg31: return "OP_breg31"; - case DW_OP_regx: return "OP_regx"; - case DW_OP_fbreg: return "OP_fbreg"; - case DW_OP_bregx: return "OP_bregx"; - case DW_OP_piece: return "OP_piece"; - case DW_OP_deref_size: return "OP_deref_size"; - case DW_OP_xderef_size: return "OP_xderef_size"; - case DW_OP_nop: return "OP_nop"; - case DW_OP_push_object_address: return "OP_push_object_address"; - case DW_OP_call2: return "OP_call2"; - case DW_OP_call4: return "OP_call4"; - case DW_OP_call_ref: return "OP_call_ref"; - case DW_OP_form_tls_address: return "OP_form_tls_address"; - case DW_OP_call_frame_cfa: return "OP_call_frame_cfa"; - case DW_OP_lo_user: return "OP_lo_user"; - case DW_OP_hi_user: return "OP_hi_user"; + case DW_OP_addr: return "DW_OP_addr"; + case DW_OP_deref: return "DW_OP_deref"; + case DW_OP_const1u: return "DW_OP_const1u"; + case DW_OP_const1s: return "DW_OP_const1s"; + case DW_OP_const2u: return "DW_OP_const2u"; + case DW_OP_const2s: return "DW_OP_const2s"; + case DW_OP_const4u: return "DW_OP_const4u"; + case DW_OP_const4s: return "DW_OP_const4s"; + case DW_OP_const8u: return "DW_OP_const8u"; + case DW_OP_const8s: return "DW_OP_const8s"; + case DW_OP_constu: return "DW_OP_constu"; + case DW_OP_consts: return "DW_OP_consts"; + case DW_OP_dup: return "DW_OP_dup"; + case DW_OP_drop: return "DW_OP_drop"; + case DW_OP_over: return "DW_OP_over"; + case DW_OP_pick: return "DW_OP_pick"; + case DW_OP_swap: return "DW_OP_swap"; + case DW_OP_rot: return "DW_OP_rot"; + case DW_OP_xderef: return "DW_OP_xderef"; + case DW_OP_abs: return "DW_OP_abs"; + case DW_OP_and: return "DW_OP_and"; + case DW_OP_div: return "DW_OP_div"; + case DW_OP_minus: return "DW_OP_minus"; + case DW_OP_mod: return "DW_OP_mod"; + case DW_OP_mul: return "DW_OP_mul"; + case DW_OP_neg: return "DW_OP_neg"; + case DW_OP_not: return "DW_OP_not"; + case DW_OP_or: return "DW_OP_or"; + case DW_OP_plus: return "DW_OP_plus"; + case DW_OP_plus_uconst: return "DW_OP_plus_uconst"; + case DW_OP_shl: return "DW_OP_shl"; + case DW_OP_shr: return "DW_OP_shr"; + case DW_OP_shra: return "DW_OP_shra"; + case DW_OP_xor: return "DW_OP_xor"; + case DW_OP_skip: return "DW_OP_skip"; + case DW_OP_bra: return "DW_OP_bra"; + case DW_OP_eq: return "DW_OP_eq"; + case DW_OP_ge: return "DW_OP_ge"; + case DW_OP_gt: return "DW_OP_gt"; + case DW_OP_le: return "DW_OP_le"; + case DW_OP_lt: return "DW_OP_lt"; + case DW_OP_ne: return "DW_OP_ne"; + case DW_OP_lit0: return "DW_OP_lit0"; + case DW_OP_lit1: return "DW_OP_lit1"; + case DW_OP_lit2: return "DW_OP_lit2"; + case DW_OP_lit3: return "DW_OP_lit3"; + case DW_OP_lit4: return "DW_OP_lit4"; + case DW_OP_lit5: return "DW_OP_lit5"; + case DW_OP_lit6: return "DW_OP_lit6"; + case DW_OP_lit7: return "DW_OP_lit7"; + case DW_OP_lit8: return "DW_OP_lit8"; + case DW_OP_lit9: return "DW_OP_lit9"; + case DW_OP_lit10: return "DW_OP_lit10"; + case DW_OP_lit11: return "DW_OP_lit11"; + case DW_OP_lit12: return "DW_OP_lit12"; + case DW_OP_lit13: return "DW_OP_lit13"; + case DW_OP_lit14: return "DW_OP_lit14"; + case DW_OP_lit15: return "DW_OP_lit15"; + case DW_OP_lit16: return "DW_OP_lit16"; + case DW_OP_lit17: return "DW_OP_lit17"; + case DW_OP_lit18: return "DW_OP_lit18"; + case DW_OP_lit19: return "DW_OP_lit19"; + case DW_OP_lit20: return "DW_OP_lit20"; + case DW_OP_lit21: return "DW_OP_lit21"; + case DW_OP_lit22: return "DW_OP_lit22"; + case DW_OP_lit23: return "DW_OP_lit23"; + case DW_OP_lit24: return "DW_OP_lit24"; + case DW_OP_lit25: return "DW_OP_lit25"; + case DW_OP_lit26: return "DW_OP_lit26"; + case DW_OP_lit27: return "DW_OP_lit27"; + case DW_OP_lit28: return "DW_OP_lit28"; + case DW_OP_lit29: return "DW_OP_lit29"; + case DW_OP_lit30: return "DW_OP_lit30"; + case DW_OP_lit31: return "DW_OP_lit31"; + case DW_OP_reg0: return "DW_OP_reg0"; + case DW_OP_reg1: return "DW_OP_reg1"; + case DW_OP_reg2: return "DW_OP_reg2"; + case DW_OP_reg3: return "DW_OP_reg3"; + case DW_OP_reg4: return "DW_OP_reg4"; + case DW_OP_reg5: return "DW_OP_reg5"; + case DW_OP_reg6: return "DW_OP_reg6"; + case DW_OP_reg7: return "DW_OP_reg7"; + case DW_OP_reg8: return "DW_OP_reg8"; + case DW_OP_reg9: return "DW_OP_reg9"; + case DW_OP_reg10: return "DW_OP_reg10"; + case DW_OP_reg11: return "DW_OP_reg11"; + case DW_OP_reg12: return "DW_OP_reg12"; + case DW_OP_reg13: return "DW_OP_reg13"; + case DW_OP_reg14: return "DW_OP_reg14"; + case DW_OP_reg15: return "DW_OP_reg15"; + case DW_OP_reg16: return "DW_OP_reg16"; + case DW_OP_reg17: return "DW_OP_reg17"; + case DW_OP_reg18: return "DW_OP_reg18"; + case DW_OP_reg19: return "DW_OP_reg19"; + case DW_OP_reg20: return "DW_OP_reg20"; + case DW_OP_reg21: return "DW_OP_reg21"; + case DW_OP_reg22: return "DW_OP_reg22"; + case DW_OP_reg23: return "DW_OP_reg23"; + case DW_OP_reg24: return "DW_OP_reg24"; + case DW_OP_reg25: return "DW_OP_reg25"; + case DW_OP_reg26: return "DW_OP_reg26"; + case DW_OP_reg27: return "DW_OP_reg27"; + case DW_OP_reg28: return "DW_OP_reg28"; + case DW_OP_reg29: return "DW_OP_reg29"; + case DW_OP_reg30: return "DW_OP_reg30"; + case DW_OP_reg31: return "DW_OP_reg31"; + case DW_OP_breg0: return "DW_OP_breg0"; + case DW_OP_breg1: return "DW_OP_breg1"; + case DW_OP_breg2: return "DW_OP_breg2"; + case DW_OP_breg3: return "DW_OP_breg3"; + case DW_OP_breg4: return "DW_OP_breg4"; + case DW_OP_breg5: return "DW_OP_breg5"; + case DW_OP_breg6: return "DW_OP_breg6"; + case DW_OP_breg7: return "DW_OP_breg7"; + case DW_OP_breg8: return "DW_OP_breg8"; + case DW_OP_breg9: return "DW_OP_breg9"; + case DW_OP_breg10: return "DW_OP_breg10"; + case DW_OP_breg11: return "DW_OP_breg11"; + case DW_OP_breg12: return "DW_OP_breg12"; + case DW_OP_breg13: return "DW_OP_breg13"; + case DW_OP_breg14: return "DW_OP_breg14"; + case DW_OP_breg15: return "DW_OP_breg15"; + case DW_OP_breg16: return "DW_OP_breg16"; + case DW_OP_breg17: return "DW_OP_breg17"; + case DW_OP_breg18: return "DW_OP_breg18"; + case DW_OP_breg19: return "DW_OP_breg19"; + case DW_OP_breg20: return "DW_OP_breg20"; + case DW_OP_breg21: return "DW_OP_breg21"; + case DW_OP_breg22: return "DW_OP_breg22"; + case DW_OP_breg23: return "DW_OP_breg23"; + case DW_OP_breg24: return "DW_OP_breg24"; + case DW_OP_breg25: return "DW_OP_breg25"; + case DW_OP_breg26: return "DW_OP_breg26"; + case DW_OP_breg27: return "DW_OP_breg27"; + case DW_OP_breg28: return "DW_OP_breg28"; + case DW_OP_breg29: return "DW_OP_breg29"; + case DW_OP_breg30: return "DW_OP_breg30"; + case DW_OP_breg31: return "DW_OP_breg31"; + case DW_OP_regx: return "DW_OP_regx"; + case DW_OP_fbreg: return "DW_OP_fbreg"; + case DW_OP_bregx: return "DW_OP_bregx"; + case DW_OP_piece: return "DW_OP_piece"; + case DW_OP_deref_size: return "DW_OP_deref_size"; + case DW_OP_xderef_size: return "DW_OP_xderef_size"; + case DW_OP_nop: return "DW_OP_nop"; + case DW_OP_push_object_address: return "DW_OP_push_object_address"; + case DW_OP_call2: return "DW_OP_call2"; + case DW_OP_call4: return "DW_OP_call4"; + case DW_OP_call_ref: return "DW_OP_call_ref"; + case DW_OP_form_tls_address: return "DW_OP_form_tls_address"; + case DW_OP_call_frame_cfa: return "DW_OP_call_frame_cfa"; + case DW_OP_lo_user: return "DW_OP_lo_user"; + case DW_OP_hi_user: return "DW_OP_hi_user"; } return 0; } @@ -310,23 +397,23 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) { /// encoding. const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) { switch (Encoding) { - case DW_ATE_address: return "ATE_address"; - case DW_ATE_boolean: return "ATE_boolean"; - case DW_ATE_complex_float: return "ATE_complex_float"; - case DW_ATE_float: return "ATE_float"; - case DW_ATE_signed: return "ATE_signed"; - case DW_ATE_signed_char: return "ATE_signed_char"; - case DW_ATE_unsigned: return "ATE_unsigned"; - case DW_ATE_unsigned_char: return "ATE_unsigned_char"; - case DW_ATE_imaginary_float: return "ATE_imaginary_float"; - case DW_ATE_packed_decimal: return "ATE_packed_decimal"; - case DW_ATE_numeric_string: return "ATE_numeric_string"; - case DW_ATE_edited: return "ATE_edited"; - case DW_ATE_signed_fixed: return "ATE_signed_fixed"; - case DW_ATE_unsigned_fixed: return "ATE_unsigned_fixed"; - case DW_ATE_decimal_float: return "ATE_decimal_float"; - case DW_ATE_lo_user: return "ATE_lo_user"; - case DW_ATE_hi_user: return "ATE_hi_user"; + case DW_ATE_address: return "DW_ATE_address"; + case DW_ATE_boolean: return "DW_ATE_boolean"; + case DW_ATE_complex_float: return "DW_ATE_complex_float"; + case DW_ATE_float: return "DW_ATE_float"; + case DW_ATE_signed: return "DW_ATE_signed"; + case DW_ATE_signed_char: return "DW_ATE_signed_char"; + case DW_ATE_unsigned: return "DW_ATE_unsigned"; + case DW_ATE_unsigned_char: return "DW_ATE_unsigned_char"; + case DW_ATE_imaginary_float: return "DW_ATE_imaginary_float"; + case DW_ATE_packed_decimal: return "DW_ATE_packed_decimal"; + case DW_ATE_numeric_string: return "DW_ATE_numeric_string"; + case DW_ATE_edited: return "DW_ATE_edited"; + case DW_ATE_signed_fixed: return "DW_ATE_signed_fixed"; + case DW_ATE_unsigned_fixed: return "DW_ATE_unsigned_fixed"; + case DW_ATE_decimal_float: return "DW_ATE_decimal_float"; + case DW_ATE_lo_user: return "DW_ATE_lo_user"; + case DW_ATE_hi_user: return "DW_ATE_hi_user"; } return 0; } @@ -335,11 +422,11 @@ const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) { /// attribute. const char *llvm::dwarf::DecimalSignString(unsigned Sign) { switch (Sign) { - case DW_DS_unsigned: return "DS_unsigned"; - case DW_DS_leading_overpunch: return "DS_leading_overpunch"; - case DW_DS_trailing_overpunch: return "DS_trailing_overpunch"; - case DW_DS_leading_separate: return "DS_leading_separate"; - case DW_DS_trailing_separate: return "DS_trailing_separate"; + case DW_DS_unsigned: return "DW_DS_unsigned"; + case DW_DS_leading_overpunch: return "DW_DS_leading_overpunch"; + case DW_DS_trailing_overpunch: return "DW_DS_trailing_overpunch"; + case DW_DS_leading_separate: return "DW_DS_leading_separate"; + case DW_DS_trailing_separate: return "DW_DS_trailing_separate"; } return 0; } @@ -348,11 +435,11 @@ const char *llvm::dwarf::DecimalSignString(unsigned Sign) { /// const char *llvm::dwarf::EndianityString(unsigned Endian) { switch (Endian) { - case DW_END_default: return "END_default"; - case DW_END_big: return "END_big"; - case DW_END_little: return "END_little"; - case DW_END_lo_user: return "END_lo_user"; - case DW_END_hi_user: return "END_hi_user"; + case DW_END_default: return "DW_END_default"; + case DW_END_big: return "DW_END_big"; + case DW_END_little: return "DW_END_little"; + case DW_END_lo_user: return "DW_END_lo_user"; + case DW_END_hi_user: return "DW_END_hi_user"; } return 0; } @@ -362,9 +449,9 @@ const char *llvm::dwarf::EndianityString(unsigned Endian) { const char *llvm::dwarf::AccessibilityString(unsigned Access) { switch (Access) { // Accessibility codes - case DW_ACCESS_public: return "ACCESS_public"; - case DW_ACCESS_protected: return "ACCESS_protected"; - case DW_ACCESS_private: return "ACCESS_private"; + case DW_ACCESS_public: return "DW_ACCESS_public"; + case DW_ACCESS_protected: return "DW_ACCESS_protected"; + case DW_ACCESS_private: return "DW_ACCESS_private"; } return 0; } @@ -373,9 +460,9 @@ const char *llvm::dwarf::AccessibilityString(unsigned Access) { /// const char *llvm::dwarf::VisibilityString(unsigned Visibility) { switch (Visibility) { - case DW_VIS_local: return "VIS_local"; - case DW_VIS_exported: return "VIS_exported"; - case DW_VIS_qualified: return "VIS_qualified"; + case DW_VIS_local: return "DW_VIS_local"; + case DW_VIS_exported: return "DW_VIS_exported"; + case DW_VIS_qualified: return "DW_VIS_qualified"; } return 0; } @@ -384,9 +471,9 @@ const char *llvm::dwarf::VisibilityString(unsigned Visibility) { /// const char *llvm::dwarf::VirtualityString(unsigned Virtuality) { switch (Virtuality) { - case DW_VIRTUALITY_none: return "VIRTUALITY_none"; - case DW_VIRTUALITY_virtual: return "VIRTUALITY_virtual"; - case DW_VIRTUALITY_pure_virtual: return "VIRTUALITY_pure_virtual"; + case DW_VIRTUALITY_none: return "DW_VIRTUALITY_none"; + case DW_VIRTUALITY_virtual: return "DW_VIRTUALITY_virtual"; + case DW_VIRTUALITY_pure_virtual: return "DW_VIRTUALITY_pure_virtual"; } return 0; } @@ -395,27 +482,27 @@ const char *llvm::dwarf::VirtualityString(unsigned Virtuality) { /// const char *llvm::dwarf::LanguageString(unsigned Language) { switch (Language) { - case DW_LANG_C89: return "LANG_C89"; - case DW_LANG_C: return "LANG_C"; - case DW_LANG_Ada83: return "LANG_Ada83"; - case DW_LANG_C_plus_plus: return "LANG_C_plus_plus"; - case DW_LANG_Cobol74: return "LANG_Cobol74"; - case DW_LANG_Cobol85: return "LANG_Cobol85"; - case DW_LANG_Fortran77: return "LANG_Fortran77"; - case DW_LANG_Fortran90: return "LANG_Fortran90"; - case DW_LANG_Pascal83: return "LANG_Pascal83"; - case DW_LANG_Modula2: return "LANG_Modula2"; - case DW_LANG_Java: return "LANG_Java"; - case DW_LANG_C99: return "LANG_C99"; - case DW_LANG_Ada95: return "LANG_Ada95"; - case DW_LANG_Fortran95: return "LANG_Fortran95"; - case DW_LANG_PLI: return "LANG_PLI"; - case DW_LANG_ObjC: return "LANG_ObjC"; - case DW_LANG_ObjC_plus_plus: return "LANG_ObjC_plus_plus"; - case DW_LANG_UPC: return "LANG_UPC"; - case DW_LANG_D: return "LANG_D"; - case DW_LANG_lo_user: return "LANG_lo_user"; - case DW_LANG_hi_user: return "LANG_hi_user"; + case DW_LANG_C89: return "DW_LANG_C89"; + case DW_LANG_C: return "DW_LANG_C"; + case DW_LANG_Ada83: return "DW_LANG_Ada83"; + case DW_LANG_C_plus_plus: return "DW_LANG_C_plus_plus"; + case DW_LANG_Cobol74: return "DW_LANG_Cobol74"; + case DW_LANG_Cobol85: return "DW_LANG_Cobol85"; + case DW_LANG_Fortran77: return "DW_LANG_Fortran77"; + case DW_LANG_Fortran90: return "DW_LANG_Fortran90"; + case DW_LANG_Pascal83: return "DW_LANG_Pascal83"; + case DW_LANG_Modula2: return "DW_LANG_Modula2"; + case DW_LANG_Java: return "DW_LANG_Java"; + case DW_LANG_C99: return "DW_LANG_C99"; + case DW_LANG_Ada95: return "DW_LANG_Ada95"; + case DW_LANG_Fortran95: return "DW_LANG_Fortran95"; + case DW_LANG_PLI: return "DW_LANG_PLI"; + case DW_LANG_ObjC: return "DW_LANG_ObjC"; + case DW_LANG_ObjC_plus_plus: return "DW_LANG_ObjC_plus_plus"; + case DW_LANG_UPC: return "DW_LANG_UPC"; + case DW_LANG_D: return "DW_LANG_D"; + case DW_LANG_lo_user: return "DW_LANG_lo_user"; + case DW_LANG_hi_user: return "DW_LANG_hi_user"; } return 0; } @@ -424,10 +511,10 @@ const char *llvm::dwarf::LanguageString(unsigned Language) { /// const char *llvm::dwarf::CaseString(unsigned Case) { switch (Case) { - case DW_ID_case_sensitive: return "ID_case_sensitive"; - case DW_ID_up_case: return "ID_up_case"; - case DW_ID_down_case: return "ID_down_case"; - case DW_ID_case_insensitive: return "ID_case_insensitive"; + case DW_ID_case_sensitive: return "DW_ID_case_sensitive"; + case DW_ID_up_case: return "DW_ID_up_case"; + case DW_ID_down_case: return "DW_ID_down_case"; + case DW_ID_case_insensitive: return "DW_ID_case_insensitive"; } return 0; } @@ -436,11 +523,11 @@ const char *llvm::dwarf::CaseString(unsigned Case) { /// const char *llvm::dwarf::ConventionString(unsigned Convention) { switch (Convention) { - case DW_CC_normal: return "CC_normal"; - case DW_CC_program: return "CC_program"; - case DW_CC_nocall: return "CC_nocall"; - case DW_CC_lo_user: return "CC_lo_user"; - case DW_CC_hi_user: return "CC_hi_user"; + case DW_CC_normal: return "DW_CC_normal"; + case DW_CC_program: return "DW_CC_program"; + case DW_CC_nocall: return "DW_CC_nocall"; + case DW_CC_lo_user: return "DW_CC_lo_user"; + case DW_CC_hi_user: return "DW_CC_hi_user"; } return 0; } @@ -449,10 +536,10 @@ const char *llvm::dwarf::ConventionString(unsigned Convention) { /// const char *llvm::dwarf::InlineCodeString(unsigned Code) { switch (Code) { - case DW_INL_not_inlined: return "INL_not_inlined"; - case DW_INL_inlined: return "INL_inlined"; - case DW_INL_declared_not_inlined: return "INL_declared_not_inlined"; - case DW_INL_declared_inlined: return "INL_declared_inlined"; + case DW_INL_not_inlined: return "DW_INL_not_inlined"; + case DW_INL_inlined: return "DW_INL_inlined"; + case DW_INL_declared_not_inlined: return "DW_INL_declared_not_inlined"; + case DW_INL_declared_inlined: return "DW_INL_declared_inlined"; } return 0; } @@ -461,8 +548,8 @@ const char *llvm::dwarf::InlineCodeString(unsigned Code) { /// const char *llvm::dwarf::ArrayOrderString(unsigned Order) { switch (Order) { - case DW_ORD_row_major: return "ORD_row_major"; - case DW_ORD_col_major: return "ORD_col_major"; + case DW_ORD_row_major: return "DW_ORD_row_major"; + case DW_ORD_col_major: return "DW_ORD_col_major"; } return 0; } @@ -471,8 +558,8 @@ const char *llvm::dwarf::ArrayOrderString(unsigned Order) { /// descriptor. const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) { switch (Discriminant) { - case DW_DSC_label: return "DSC_label"; - case DW_DSC_range: return "DSC_range"; + case DW_DSC_label: return "DW_DSC_label"; + case DW_DSC_range: return "DW_DSC_range"; } return 0; } @@ -481,18 +568,18 @@ const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) { /// const char *llvm::dwarf::LNStandardString(unsigned Standard) { switch (Standard) { - case DW_LNS_copy: return "LNS_copy"; - case DW_LNS_advance_pc: return "LNS_advance_pc"; - case DW_LNS_advance_line: return "LNS_advance_line"; - case DW_LNS_set_file: return "LNS_set_file"; - case DW_LNS_set_column: return "LNS_set_column"; - case DW_LNS_negate_stmt: return "LNS_negate_stmt"; - case DW_LNS_set_basic_block: return "LNS_set_basic_block"; - case DW_LNS_const_add_pc: return "LNS_const_add_pc"; - case DW_LNS_fixed_advance_pc: return "LNS_fixed_advance_pc"; - case DW_LNS_set_prologue_end: return "LNS_set_prologue_end"; - case DW_LNS_set_epilogue_begin: return "LNS_set_epilogue_begin"; - case DW_LNS_set_isa: return "LNS_set_isa"; + case DW_LNS_copy: return "DW_LNS_copy"; + case DW_LNS_advance_pc: return "DW_LNS_advance_pc"; + case DW_LNS_advance_line: return "DW_LNS_advance_line"; + case DW_LNS_set_file: return "DW_LNS_set_file"; + case DW_LNS_set_column: return "DW_LNS_set_column"; + case DW_LNS_negate_stmt: return "DW_LNS_negate_stmt"; + case DW_LNS_set_basic_block: return "DW_LNS_set_basic_block"; + case DW_LNS_const_add_pc: return "DW_LNS_const_add_pc"; + case DW_LNS_fixed_advance_pc: return "DW_LNS_fixed_advance_pc"; + case DW_LNS_set_prologue_end: return "DW_LNS_set_prologue_end"; + case DW_LNS_set_epilogue_begin: return "DW_LNS_set_epilogue_begin"; + case DW_LNS_set_isa: return "DW_LNS_set_isa"; } return 0; } @@ -502,11 +589,11 @@ const char *llvm::dwarf::LNStandardString(unsigned Standard) { const char *llvm::dwarf::LNExtendedString(unsigned Encoding) { switch (Encoding) { // Line Number Extended Opcode Encodings - case DW_LNE_end_sequence: return "LNE_end_sequence"; - case DW_LNE_set_address: return "LNE_set_address"; - case DW_LNE_define_file: return "LNE_define_file"; - case DW_LNE_lo_user: return "LNE_lo_user"; - case DW_LNE_hi_user: return "LNE_hi_user"; + case DW_LNE_end_sequence: return "DW_LNE_end_sequence"; + case DW_LNE_set_address: return "DW_LNE_set_address"; + case DW_LNE_define_file: return "DW_LNE_define_file"; + case DW_LNE_lo_user: return "DW_LNE_lo_user"; + case DW_LNE_hi_user: return "DW_LNE_hi_user"; } return 0; } @@ -516,11 +603,11 @@ const char *llvm::dwarf::LNExtendedString(unsigned Encoding) { const char *llvm::dwarf::MacinfoString(unsigned Encoding) { switch (Encoding) { // Macinfo Type Encodings - case DW_MACINFO_define: return "MACINFO_define"; - case DW_MACINFO_undef: return "MACINFO_undef"; - case DW_MACINFO_start_file: return "MACINFO_start_file"; - case DW_MACINFO_end_file: return "MACINFO_end_file"; - case DW_MACINFO_vendor_ext: return "MACINFO_vendor_ext"; + case DW_MACINFO_define: return "DW_MACINFO_define"; + case DW_MACINFO_undef: return "DW_MACINFO_undef"; + case DW_MACINFO_start_file: return "DW_MACINFO_start_file"; + case DW_MACINFO_end_file: return "DW_MACINFO_end_file"; + case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext"; } return 0; } @@ -529,33 +616,33 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) { /// encodings. const char *llvm::dwarf::CallFrameString(unsigned Encoding) { switch (Encoding) { - case DW_CFA_advance_loc: return "CFA_advance_loc"; - case DW_CFA_offset: return "CFA_offset"; - case DW_CFA_restore: return "CFA_restore"; - case DW_CFA_set_loc: return "CFA_set_loc"; - case DW_CFA_advance_loc1: return "CFA_advance_loc1"; - case DW_CFA_advance_loc2: return "CFA_advance_loc2"; - case DW_CFA_advance_loc4: return "CFA_advance_loc4"; - case DW_CFA_offset_extended: return "CFA_offset_extended"; - case DW_CFA_restore_extended: return "CFA_restore_extended"; - case DW_CFA_undefined: return "CFA_undefined"; - case DW_CFA_same_value: return "CFA_same_value"; - case DW_CFA_register: return "CFA_register"; - case DW_CFA_remember_state: return "CFA_remember_state"; - case DW_CFA_restore_state: return "CFA_restore_state"; - case DW_CFA_def_cfa: return "CFA_def_cfa"; - case DW_CFA_def_cfa_register: return "CFA_def_cfa_register"; - case DW_CFA_def_cfa_offset: return "CFA_def_cfa_offset"; - case DW_CFA_def_cfa_expression: return "CFA_def_cfa_expression"; - case DW_CFA_expression: return "CFA_expression"; - case DW_CFA_offset_extended_sf: return "CFA_offset_extended_sf"; - case DW_CFA_def_cfa_sf: return "CFA_def_cfa_sf"; - case DW_CFA_def_cfa_offset_sf: return "CFA_def_cfa_offset_sf"; - case DW_CFA_val_offset: return "CFA_val_offset"; - case DW_CFA_val_offset_sf: return "CFA_val_offset_sf"; - case DW_CFA_val_expression: return "CFA_val_expression"; - case DW_CFA_lo_user: return "CFA_lo_user"; - case DW_CFA_hi_user: return "CFA_hi_user"; + case DW_CFA_advance_loc: return "DW_CFA_advance_loc"; + case DW_CFA_offset: return "DW_CFA_offset"; + case DW_CFA_restore: return "DW_CFA_restore"; + case DW_CFA_set_loc: return "DW_CFA_set_loc"; + case DW_CFA_advance_loc1: return "DW_CFA_advance_loc1"; + case DW_CFA_advance_loc2: return "DW_CFA_advance_loc2"; + case DW_CFA_advance_loc4: return "DW_CFA_advance_loc4"; + case DW_CFA_offset_extended: return "DW_CFA_offset_extended"; + case DW_CFA_restore_extended: return "DW_CFA_restore_extended"; + case DW_CFA_undefined: return "DW_CFA_undefined"; + case DW_CFA_same_value: return "DW_CFA_same_value"; + case DW_CFA_register: return "DW_CFA_register"; + case DW_CFA_remember_state: return "DW_CFA_remember_state"; + case DW_CFA_restore_state: return "DW_CFA_restore_state"; + case DW_CFA_def_cfa: return "DW_CFA_def_cfa"; + case DW_CFA_def_cfa_register: return "DW_CFA_def_cfa_register"; + case DW_CFA_def_cfa_offset: return "DW_CFA_def_cfa_offset"; + case DW_CFA_def_cfa_expression: return "DW_CFA_def_cfa_expression"; + case DW_CFA_expression: return "DW_CFA_expression"; + case DW_CFA_offset_extended_sf: return "DW_CFA_offset_extended_sf"; + case DW_CFA_def_cfa_sf: return "DW_CFA_def_cfa_sf"; + case DW_CFA_def_cfa_offset_sf: return "DW_CFA_def_cfa_offset_sf"; + case DW_CFA_val_offset: return "DW_CFA_val_offset"; + case DW_CFA_val_offset_sf: return "DW_CFA_val_offset_sf"; + case DW_CFA_val_expression: return "DW_CFA_val_expression"; + case DW_CFA_lo_user: return "DW_CFA_lo_user"; + case DW_CFA_hi_user: return "DW_CFA_hi_user"; } return 0; } diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp index 095395f..1bde2fe 100644 --- a/lib/Support/FileUtilities.cpp +++ b/lib/Support/FileUtilities.cpp @@ -51,7 +51,15 @@ static const char *BackupNumber(const char *Pos, const char *FirstChar) { if (!isNumberChar(*Pos)) return Pos; // Otherwise, return to the start of the number. + bool HasPeriod = false; while (Pos > FirstChar && isNumberChar(Pos[-1])) { + // Backup over at most one period. + if (Pos[-1] == '.') { + if (HasPeriod) + break; + HasPeriod = true; + } + --Pos; if (Pos > FirstChar && isSignedChar(Pos[0]) && !isExponentChar(Pos[-1])) break; @@ -204,16 +212,16 @@ int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA, const char *F1P = File1Start; const char *F2P = File2Start; - if (A_size == B_size) { - // Are the buffers identical? Common case: Handle this efficiently. - if (std::memcmp(File1Start, File2Start, A_size) == 0) - return 0; + // Are the buffers identical? Common case: Handle this efficiently. + if (A_size == B_size && + std::memcmp(File1Start, File2Start, A_size) == 0) + return 0; - if (AbsTol == 0 && RelTol == 0) { - if (Error) - *Error = "Files differ without tolerance allowance"; - return 1; // Files different! - } + // Otherwise, we are done a tolerances are set. + if (AbsTol == 0 && RelTol == 0) { + if (Error) + *Error = "Files differ without tolerance allowance"; + return 1; // Files different! } bool CompareFailed = false; diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index 3f467fe..b8dca33 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -175,6 +175,14 @@ static void **GetBucketFor(const FoldingSetNodeID &ID, return Buckets + BucketNum; } +/// AllocateBuckets - Allocated initialized bucket memory. +static void **AllocateBuckets(unsigned NumBuckets) { + void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*))); + // Set the very last bucket to be a non-null "pointer". + Buckets[NumBuckets] = reinterpret_cast<void*>(-1); + return Buckets; +} + //===----------------------------------------------------------------------===// // FoldingSetImpl Implementation @@ -182,11 +190,11 @@ FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { assert(5 < Log2InitSize && Log2InitSize < 32 && "Initial hash table size out of range"); NumBuckets = 1 << Log2InitSize; - Buckets = new void*[NumBuckets+1]; - clear(); + Buckets = AllocateBuckets(NumBuckets); + NumNodes = 0; } FoldingSetImpl::~FoldingSetImpl() { - delete [] Buckets; + free(Buckets); } void FoldingSetImpl::clear() { // Set all but the last bucket to null pointers. @@ -207,8 +215,8 @@ void FoldingSetImpl::GrowHashTable() { NumBuckets <<= 1; // Clear out new buckets. - Buckets = new void*[NumBuckets+1]; - clear(); + Buckets = AllocateBuckets(NumBuckets); + NumNodes = 0; // Walk the old buckets, rehashing nodes into their new place. FoldingSetNodeID ID; @@ -227,7 +235,7 @@ void FoldingSetImpl::GrowHashTable() { } } - delete[] OldBuckets; + free(OldBuckets); } /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 2b95089..542162d 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/MathExtras.h" #include "llvm/System/Errno.h" #include "llvm/System/Path.h" #include "llvm/System/Process.h" @@ -37,22 +38,7 @@ using namespace llvm; // MemoryBuffer implementation itself. //===----------------------------------------------------------------------===// -MemoryBuffer::~MemoryBuffer() { - if (MustDeleteBuffer) - free((void*)BufferStart); -} - -/// initCopyOf - Initialize this source buffer with a copy of the specified -/// memory range. We make the copy so that we can null terminate it -/// successfully. -void MemoryBuffer::initCopyOf(const char *BufStart, const char *BufEnd) { - size_t Size = BufEnd-BufStart; - BufferStart = (char *)malloc(Size+1); - BufferEnd = BufferStart+Size; - memcpy(const_cast<char*>(BufferStart), BufStart, Size); - *const_cast<char*>(BufferEnd) = 0; // Null terminate buffer. - MustDeleteBuffer = true; -} +MemoryBuffer::~MemoryBuffer() { } /// init - Initialize this MemoryBuffer as a reference to externally allocated /// memory, memory that we know is already null terminated. @@ -60,27 +46,38 @@ void MemoryBuffer::init(const char *BufStart, const char *BufEnd) { assert(BufEnd[0] == 0 && "Buffer is not null terminated!"); BufferStart = BufStart; BufferEnd = BufEnd; - MustDeleteBuffer = false; } //===----------------------------------------------------------------------===// // MemoryBufferMem implementation. //===----------------------------------------------------------------------===// +/// CopyStringRef - Copies contents of a StringRef into a block of memory and +/// null-terminates it. +static void CopyStringRef(char *Memory, StringRef Data) { + memcpy(Memory, Data.data(), Data.size()); + Memory[Data.size()] = 0; // Null terminate string. +} + +/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it. +template <typename T> +static T* GetNamedBuffer(StringRef Buffer, StringRef Name) { + char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1)); + CopyStringRef(Mem + sizeof(T), Name); + return new (Mem) T(Buffer); +} + namespace { +/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory. class MemoryBufferMem : public MemoryBuffer { - std::string FileID; public: - MemoryBufferMem(StringRef InputData, StringRef FID, bool Copy = false) - : FileID(FID) { - if (!Copy) - init(InputData.data(), InputData.data()+InputData.size()); - else - initCopyOf(InputData.data(), InputData.data()+InputData.size()); + MemoryBufferMem(StringRef InputData) { + init(InputData.begin(), InputData.end()); } - + virtual const char *getBufferIdentifier() const { - return FileID.c_str(); + // The name is stored after the class itself. + return reinterpret_cast<const char*>(this + 1); } }; } @@ -88,42 +85,55 @@ public: /// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note /// that EndPtr[0] must be a null byte and be accessible! MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData, - const char *BufferName) { - return new MemoryBufferMem(InputData, BufferName); + StringRef BufferName) { + return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName); } /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, /// copying the contents and taking ownership of it. This has no requirements /// on EndPtr[0]. MemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData, - const char *BufferName) { - return new MemoryBufferMem(InputData, BufferName, true); + StringRef BufferName) { + MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName); + if (!Buf) return 0; + memcpy(const_cast<char*>(Buf->getBufferStart()), InputData.data(), + InputData.size()); + return Buf; } /// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size -/// that is completely initialized to zeros. Note that the caller should -/// initialize the memory allocated by this method. The memory is owned by -/// the MemoryBuffer object. +/// that is not initialized. Note that the caller should initialize the +/// memory allocated by this method. The memory is owned by the MemoryBuffer +/// object. MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size, StringRef BufferName) { - char *Buf = (char *)malloc(Size+1); - if (!Buf) return 0; - Buf[Size] = 0; - MemoryBufferMem *SB = new MemoryBufferMem(StringRef(Buf, Size), BufferName); - // The memory for this buffer is owned by the MemoryBuffer. - SB->MustDeleteBuffer = true; - return SB; + // Allocate space for the MemoryBuffer, the data and the name. It is important + // that MemoryBuffer and data are aligned so PointerIntPair works with them. + size_t AlignedStringLen = + RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1, + sizeof(void*)); // TODO: Is sizeof(void*) enough? + size_t RealLen = AlignedStringLen + Size + 1; + char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow)); + if (!Mem) return 0; + + // The name is stored after the class itself. + CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName); + + // The buffer begins after the name and must be aligned. + char *Buf = Mem + AlignedStringLen; + Buf[Size] = 0; // Null terminate buffer. + + return new (Mem) MemoryBufferMem(StringRef(Buf, Size)); } /// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that /// is completely initialized to zeros. Note that the caller should /// initialize the memory allocated by this method. The memory is owned by /// the MemoryBuffer object. -MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, - const char *BufferName) { +MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) { MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName); if (!SB) return 0; - memset(const_cast<char*>(SB->getBufferStart()), 0, Size+1); + memset(const_cast<char*>(SB->getBufferStart()), 0, Size); return SB; } @@ -137,7 +147,16 @@ MemoryBuffer *MemoryBuffer::getFileOrSTDIN(StringRef Filename, int64_t FileSize, struct stat *FileInfo) { if (Filename == "-") - return getSTDIN(); + return getSTDIN(ErrStr); + return getFile(Filename, ErrStr, FileSize, FileInfo); +} + +MemoryBuffer *MemoryBuffer::getFileOrSTDIN(const char *Filename, + std::string *ErrStr, + int64_t FileSize, + struct stat *FileInfo) { + if (strcmp(Filename, "-") == 0) + return getSTDIN(ErrStr); return getFile(Filename, ErrStr, FileSize, FileInfo); } @@ -149,18 +168,11 @@ namespace { /// MemoryBufferMMapFile - This represents a file that was mapped in with the /// sys::Path::MapInFilePages method. When destroyed, it calls the /// sys::Path::UnMapFilePages method. -class MemoryBufferMMapFile : public MemoryBuffer { - std::string Filename; +class MemoryBufferMMapFile : public MemoryBufferMem { public: - MemoryBufferMMapFile(StringRef filename, const char *Pages, uint64_t Size) - : Filename(filename) { - init(Pages, Pages+Size); - } - - virtual const char *getBufferIdentifier() const { - return Filename.c_str(); - } - + MemoryBufferMMapFile(StringRef Buffer) + : MemoryBufferMem(Buffer) { } + ~MemoryBufferMMapFile() { sys::Path::UnMapFilePages(getBufferStart(), getBufferSize()); } @@ -170,19 +182,24 @@ public: class FileCloser { int FD; public: - FileCloser(int FD) : FD(FD) {} + explicit FileCloser(int FD) : FD(FD) {} ~FileCloser() { ::close(FD); } }; } MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, int64_t FileSize, struct stat *FileInfo) { - int OpenFlags = 0; + SmallString<256> PathBuf(Filename.begin(), Filename.end()); + return MemoryBuffer::getFile(PathBuf.c_str(), ErrStr, FileSize, FileInfo); +} + +MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr, + int64_t FileSize, struct stat *FileInfo) { + int OpenFlags = O_RDONLY; #ifdef O_BINARY OpenFlags |= O_BINARY; // Open input file in binary mode on win32. #endif - SmallString<256> PathBuf(Filename.begin(), Filename.end()); - int FD = ::open(PathBuf.c_str(), O_RDONLY|OpenFlags); + int FD = ::open(Filename, OpenFlags); if (FD == -1) { if (ErrStr) *ErrStr = sys::StrError(); return 0; @@ -213,8 +230,8 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, if (FileSize >= 4096*4 && (FileSize & (sys::Process::GetPageSize()-1)) != 0) { if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) { - // Close the file descriptor, now that the whole file is in memory. - return new MemoryBufferMMapFile(Filename, Pages, FileSize); + return GetNamedBuffer<MemoryBufferMMapFile>(StringRef(Pages, FileSize), + Filename); } } @@ -254,34 +271,27 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, // MemoryBuffer::getSTDIN implementation. //===----------------------------------------------------------------------===// -namespace { -class STDINBufferFile : public MemoryBuffer { -public: - virtual const char *getBufferIdentifier() const { - return "<stdin>"; - } -}; -} - -MemoryBuffer *MemoryBuffer::getSTDIN() { - char Buffer[4096*4]; - - std::vector<char> FileData; - +MemoryBuffer *MemoryBuffer::getSTDIN(std::string *ErrStr) { // Read in all of the data from stdin, we cannot mmap stdin. // // FIXME: That isn't necessarily true, we should try to mmap stdin and // fallback if it fails. sys::Program::ChangeStdinToBinary(); - size_t ReadBytes; + + const ssize_t ChunkSize = 4096*4; + SmallString<ChunkSize> Buffer; + ssize_t ReadBytes; + // Read into Buffer until we hit EOF. do { - ReadBytes = fread(Buffer, sizeof(char), sizeof(Buffer), stdin); - FileData.insert(FileData.end(), Buffer, Buffer+ReadBytes); - } while (ReadBytes == sizeof(Buffer)); - - FileData.push_back(0); // &FileData[Size] is invalid. So is &*FileData.end(). - size_t Size = FileData.size(); - MemoryBuffer *B = new STDINBufferFile(); - B->initCopyOf(&FileData[0], &FileData[Size-1]); - return B; + Buffer.reserve(Buffer.size() + ChunkSize); + ReadBytes = read(0, Buffer.end(), ChunkSize); + if (ReadBytes == -1) { + if (errno == EINTR) continue; + if (ErrStr) *ErrStr = sys::StrError(); + return 0; + } + Buffer.set_size(Buffer.size() + ReadBytes); + } while (ReadBytes != 0); + + return getMemBufferCopy(Buffer, "<stdin>"); } diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index 7a04a53..a99ab2f 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -12,11 +12,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Signals.h" #include "llvm/System/ThreadLocal.h" #include "llvm/ADT/SmallString.h" + +#ifdef HAVE_CRASHREPORTERCLIENT_H +#include <CrashReporterClient.h> +#endif + using namespace llvm; namespace llvm { @@ -48,8 +54,17 @@ static void PrintCurStackTrace(raw_ostream &OS) { OS.flush(); } -// Integrate with crash reporter. -#ifdef __APPLE__ +// Integrate with crash reporter libraries. +#if defined (__APPLE__) && defined (HAVE_CRASHREPORTERCLIENT_H) +// If any clients of llvm try to link to libCrashReporterClient.a themselves, +// only one crash info struct will be used. +extern "C" { +CRASH_REPORTER_CLIENT_HIDDEN +struct crashreporter_annotations_t gCRAnnotations + __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION))) + = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0 }; +} +#elif defined (__APPLE__) static const char *__crashreporter_info__ = 0; asm(".desc ___crashreporter_info__, 0x10"); #endif @@ -71,7 +86,11 @@ static void CrashHandler(void *Cookie) { } if (!TmpStr.empty()) { +#ifndef HAVE_CRASHREPORTERCLIENT_H __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str()); +#else + CRSetCrashLogMessage(std::string(TmpStr.str()).c_str()); +#endif errs() << TmpStr.str(); } diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index 68938fa..504e649 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -166,10 +166,13 @@ void SmallPtrSetImpl::Grow() { } } -SmallPtrSetImpl::SmallPtrSetImpl(const SmallPtrSetImpl& that) { +SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage, + const SmallPtrSetImpl& that) { + SmallArray = SmallStorage; + // If we're becoming small, prepare to insert into our stack space if (that.isSmall()) { - CurArray = &SmallArray[0]; + CurArray = SmallArray; // Otherwise, allocate new heap space (unless we were the same size) } else { CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1)); @@ -197,7 +200,7 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) { if (RHS.isSmall()) { if (!isSmall()) free(CurArray); - CurArray = &SmallArray[0]; + CurArray = SmallArray; // Otherwise, allocate new heap space (unless we were the same size) } else if (CurArraySize != RHS.CurArraySize) { if (isSmall()) diff --git a/lib/Support/SmallVector.cpp b/lib/Support/SmallVector.cpp index 6821382..2e17af8 100644 --- a/lib/Support/SmallVector.cpp +++ b/lib/Support/SmallVector.cpp @@ -21,15 +21,18 @@ void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) { size_t NewCapacityInBytes = 2 * capacity_in_bytes(); if (NewCapacityInBytes < MinSizeInBytes) NewCapacityInBytes = MinSizeInBytes; - void *NewElts = operator new(NewCapacityInBytes); - - // Copy the elements over. No need to run dtors on PODs. - memcpy(NewElts, this->BeginX, CurSizeBytes); - - // If this wasn't grown from the inline copy, deallocate the old space. - if (!this->isSmall()) - operator delete(this->BeginX); - + + void *NewElts; + if (this->isSmall()) { + NewElts = malloc(NewCapacityInBytes); + + // Copy the elements over. No need to run dtors on PODs. + memcpy(NewElts, this->BeginX, CurSizeBytes); + } else { + // If this wasn't grown from the inline copy, grow the allocated space. + NewElts = realloc(this->BeginX, NewCapacityInBytes); + } + this->EndX = (char*)NewElts+CurSizeBytes; this->BeginX = NewElts; this->CapacityX = (char*)this->BeginX + NewCapacityInBytes; diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 784b77c..44ee177 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -236,11 +236,13 @@ static Timer &getNamedRegionTimer(StringRef Name) { return T; } -NamedRegionTimer::NamedRegionTimer(StringRef Name) - : TimeRegion(getNamedRegionTimer(Name)) {} +NamedRegionTimer::NamedRegionTimer(StringRef Name, + bool Enabled) + : TimeRegion(!Enabled ? 0 : &getNamedRegionTimer(Name)) {} -NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName) - : TimeRegion(NamedGroupedTimers->get(Name, GroupName)) {} +NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName, + bool Enabled) + : TimeRegion(!Enabled ? 0 : &NamedGroupedTimers->get(Name, GroupName)) {} //===----------------------------------------------------------------------===// // TimerGroup Implementation diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 9796ca5..6a70449 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -104,6 +104,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case Solaris: return "solaris"; case Win32: return "win32"; case Haiku: return "haiku"; + case Minix: return "minix"; } return "<invalid>"; @@ -326,7 +327,9 @@ void Triple::Parse() const { else if (OSName.startswith("win32")) OS = Win32; else if (OSName.startswith("haiku")) - OS = Haiku; + OS = Haiku; + else if (OSName.startswith("minix")) + OS = Minix; else OS = UnknownOS; diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 11cf0ec..8054ae6 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -427,10 +427,9 @@ raw_fd_ostream::~raw_fd_ostream() { void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { assert(FD >= 0 && "File already closed."); pos += Size; - ssize_t ret; do { - ret = ::write(FD, Ptr, Size); + ssize_t ret = ::write(FD, Ptr, Size); if (ret < 0) { // If it's a recoverable error, swallow it and retry the write. @@ -482,7 +481,7 @@ uint64_t raw_fd_ostream::seek(uint64_t off) { } size_t raw_fd_ostream::preferred_buffer_size() const { -#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(_MINIX) +#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix) // Windows and Minix have no st_blksize. assert(FD >= 0 && "File not yet open!"); struct stat statbuf; @@ -496,8 +495,9 @@ size_t raw_fd_ostream::preferred_buffer_size() const { return 0; // Return the preferred block size. return statbuf.st_blksize; -#endif +#else return raw_ostream::preferred_buffer_size(); +#endif } raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, diff --git a/lib/System/Disassembler.cpp b/lib/System/Disassembler.cpp index bad427a..139e3be 100644 --- a/lib/System/Disassembler.cpp +++ b/lib/System/Disassembler.cpp @@ -44,33 +44,29 @@ std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length, uint64_t pc) { std::stringstream res; -#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__) +#if (defined (__i386__) || defined (__amd64__) || defined (__x86_64__)) \ + && USE_UDIS86 unsigned bits; # if defined(__i386__) bits = 32; # else bits = 64; # endif - -# if USE_UDIS86 + ud_t ud_obj; - + ud_init(&ud_obj); ud_set_input_buffer(&ud_obj, start, length); ud_set_mode(&ud_obj, bits); ud_set_pc(&ud_obj, pc); ud_set_syntax(&ud_obj, UD_SYN_ATT); - + res << std::setbase(16) << std::setw(bits/4); - + while (ud_disassemble(&ud_obj)) { res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n"; } -# else - res << "No disassembler available. See configure help for options.\n"; -# endif - #else res << "No disassembler available. See configure help for options.\n"; #endif diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp index 6844530..1235257 100644 --- a/lib/System/Path.cpp +++ b/lib/System/Path.cpp @@ -136,26 +136,23 @@ sys::IdentifyFileType(const char *magic, unsigned length) { bool Path::isArchive() const { - if (canRead()) - return hasMagicNumber("!<arch>\012"); - return false; + return hasMagicNumber("!<arch>\012"); } bool Path::isDynamicLibrary() const { - if (canRead()) { - std::string Magic; - if (getMagicNumber(Magic, 64)) - switch (IdentifyFileType(Magic.c_str(), - static_cast<unsigned>(Magic.length()))) { - default: return false; - case Mach_O_FixedVirtualMemorySharedLib_FileType: - case Mach_O_DynamicallyLinkedSharedLib_FileType: - case Mach_O_DynamicallyLinkedSharedLibStub_FileType: - case ELF_SharedObject_FileType: - case COFF_FileType: return true; - } - } + std::string Magic; + if (getMagicNumber(Magic, 64)) + switch (IdentifyFileType(Magic.c_str(), + static_cast<unsigned>(Magic.length()))) { + default: return false; + case Mach_O_FixedVirtualMemorySharedLib_FileType: + case Mach_O_DynamicallyLinkedSharedLib_FileType: + case Mach_O_DynamicallyLinkedSharedLibStub_FileType: + case ELF_SharedObject_FileType: + case COFF_FileType: return true; + } + return false; } diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc index 74596dc..bc104a3 100644 --- a/lib/System/Unix/Path.inc +++ b/lib/System/Unix/Path.inc @@ -421,10 +421,8 @@ bool Path::getMagicNumber(std::string &Magic, unsigned len) const { return false; ssize_t bytes_read = ::read(fd, Buf, len); ::close(fd); - if (ssize_t(len) != bytes_read) { - Magic.clear(); + if (ssize_t(len) != bytes_read) return false; - } Magic.assign(Buf, len); return true; } @@ -890,14 +888,19 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) { #else // Okay, looks like we have to do it all by our lonesome. static unsigned FCounter = 0; - unsigned offset = path.size() + 1; - while ( FCounter < 999999 && exists()) { - sprintf(FNBuffer+offset,"%06u",++FCounter); + // Try to initialize with unique value. + if (FCounter == 0) FCounter = ((unsigned)getpid() & 0xFFFF) << 8; + char* pos = strstr(FNBuffer, "XXXXXX"); + do { + if (++FCounter > 0xFFFFFF) { + return MakeErrMsg(ErrMsg, + path + ": can't make unique filename: too many files"); + } + sprintf(pos, "%06X", FCounter); path = FNBuffer; - } - if (FCounter > 999999) - return MakeErrMsg(ErrMsg, - path + ": can't make unique filename: too many files"); + } while (exists()); + // POSSIBLE SECURITY BUG: An attacker can easily guess the name and exploit + // LLVM. #endif return false; } diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc index 358415f..67018de 100644 --- a/lib/System/Unix/Program.inc +++ b/lib/System/Unix/Program.inc @@ -310,7 +310,7 @@ Program::Wait(unsigned secondsToWait, // fact of having a handler at all causes the wait below to return with EINTR, // unlike if we used SIG_IGN. if (secondsToWait) { -#ifndef __HAIKU__ +#if !defined(__HAIKU__) && !defined(__minix) Act.sa_sigaction = 0; #endif Act.sa_handler = TimeOutHandler; diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc index 9548816..1e74647 100644 --- a/lib/System/Unix/Signals.inc +++ b/lib/System/Unix/Signals.inc @@ -111,6 +111,14 @@ static void UnregisterHandlers() { } +/// RemoveFilesToRemove - Process the FilesToRemove list. This function +/// should be called with the SignalsMutex lock held. +static void RemoveFilesToRemove() { + while (!FilesToRemove.empty()) { + FilesToRemove.back().eraseFromDisk(true); + FilesToRemove.pop_back(); + } +} // SignalHandler - The signal handler that runs. static RETSIGTYPE SignalHandler(int Sig) { @@ -126,10 +134,7 @@ static RETSIGTYPE SignalHandler(int Sig) { sigprocmask(SIG_UNBLOCK, &SigMask, 0); SignalsMutex.acquire(); - while (!FilesToRemove.empty()) { - FilesToRemove.back().eraseFromDisk(true); - FilesToRemove.pop_back(); - } + RemoveFilesToRemove(); if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) { if (InterruptFunction) { @@ -153,7 +158,9 @@ static RETSIGTYPE SignalHandler(int Sig) { } void llvm::sys::RunInterruptHandlers() { - SignalHandler(SIGINT); + SignalsMutex.acquire(); + RemoveFilesToRemove(); + SignalsMutex.release(); } void llvm::sys::SetInterruptFunction(void (*IF)()) { diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc index 5a0052f..379527d 100644 --- a/lib/System/Win32/Path.inc +++ b/lib/System/Win32/Path.inc @@ -281,12 +281,6 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { // FIXME: the above set of functions don't map to Windows very well. -bool -Path::isRootDirectory() const { - size_t len = path.size(); - return len > 0 && path[len-1] == '/'; -} - StringRef Path::getDirname() const { return getDirnameCharSep(path, "/"); } diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc index a3a393c..d6db71b 100644 --- a/lib/System/Win32/Signals.inc +++ b/lib/System/Win32/Signals.inc @@ -283,7 +283,7 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { #ifdef _MSC_VER if (ExitOnUnhandledExceptions) - _exit(-3); + _exit(-3); #endif // Allow dialog box to pop up allowing choice to start debugger. diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index ae7ae59..14825a7 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -90,10 +90,6 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { } } -/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model -/// operations involving sub-registers. -bool ModelWithRegSequence(); - FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index e68354a..d316b13 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -520,6 +520,70 @@ namespace ARM_AM { // This is stored in two operands [regaddr, align]. The first is the // address register. The second operand is the value of the alignment // specifier to use or zero if no explicit alignment. + // Valid alignments are: 0, 8, 16, and 32 bytes, depending on the specific + // instruction. + + //===--------------------------------------------------------------------===// + // NEON Modified Immediates + //===--------------------------------------------------------------------===// + // + // Several NEON instructions (e.g., VMOV) take a "modified immediate" + // vector operand, where a small immediate encoded in the instruction + // specifies a full NEON vector value. These modified immediates are + // represented here as encoded integers. The low 8 bits hold the immediate + // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold + // the "Cmode" field of the instruction. The interfaces below treat the + // Op and Cmode values as a single 5-bit value. + + static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) { + return (OpCmode << 8) | Val; + } + static inline unsigned getNEONModImmOpCmode(unsigned ModImm) { + return (ModImm >> 8) & 0x1f; + } + static inline unsigned getNEONModImmVal(unsigned ModImm) { + return ModImm & 0xff; + } + + /// decodeNEONModImm - Decode a NEON modified immediate value into the + /// element value and the element size in bits. (If the element size is + /// smaller than the vector, it is splatted into all the elements.) + static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) { + unsigned OpCmode = getNEONModImmOpCmode(ModImm); + unsigned Imm8 = getNEONModImmVal(ModImm); + uint64_t Val = 0; + + if (OpCmode == 0xe) { + // 8-bit vector elements + Val = Imm8; + EltBits = 8; + } else if ((OpCmode & 0xc) == 0x8) { + // 16-bit vector elements + unsigned ByteNum = (OpCmode & 0x6) >> 1; + Val = Imm8 << (8 * ByteNum); + EltBits = 16; + } else if ((OpCmode & 0x8) == 0) { + // 32-bit vector elements, zero with one byte set + unsigned ByteNum = (OpCmode & 0x6) >> 1; + Val = Imm8 << (8 * ByteNum); + EltBits = 32; + } else if ((OpCmode & 0xe) == 0xc) { + // 32-bit vector elements, one byte with low bits set + unsigned ByteNum = 1 + (OpCmode & 0x1); + Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum))); + EltBits = 32; + } else if (OpCmode == 0x1e) { + // 64-bit vector elements + for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { + if ((ModImm >> ByteNum) & 1) + Val |= (uint64_t)0xff << (8 * ByteNum); + } + EltBits = 64; + } else { + assert(false && "Unsupported NEON immediate"); + } + return Val; + } } // end namespace ARM_AM } // end namespace llvm diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 2528854..49c16f3 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -56,7 +56,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); - unsigned TSFlags = MI->getDesc().TSFlags; + uint64_t TSFlags = MI->getDesc().TSFlags; bool isPre = false; switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { default: return NULL; @@ -199,9 +199,9 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, bool ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -227,8 +227,9 @@ ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Insert the spill to the stack frame. The register is killed at the spill // + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); storeRegToStackSlot(MBB, MI, Reg, isKill, - CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI); + CSI[i].getFrameIdx(), RC, TRI); } return true; } @@ -347,10 +348,8 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; - + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); int BOpc = !AFI->isThumbFunction() ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); @@ -364,17 +363,17 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (FBB == 0) { if (Cond.empty()) // Unconditional branch? - BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB); + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); else - BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); return 1; } // Two-way conditional branch. - BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); - BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB); + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); return 2; } @@ -487,7 +486,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // Basic size info comes from the TSFlags field. const TargetInstrDesc &TID = MI->getDesc(); - unsigned TSFlags = TID.TSFlags; + uint64_t TSFlags = TID.TSFlags; unsigned Opc = MI->getOpcode(); switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { @@ -524,11 +523,11 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 10; case ARM::Int_eh_sjlj_setjmp: case ARM::Int_eh_sjlj_setjmp_nofp: - return 24; + return 20; case ARM::tInt_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: - return 14; + return 12; case ARM::BR_JTr: case ARM::BR_JTm: case ARM::BR_JTadd: @@ -595,6 +594,7 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, return true; } case ARM::MOVr: + case ARM::MOVr_TC: case ARM::tMOVr: case ARM::tMOVgpr2tgpr: case ARM::tMOVtgpr2gpr: @@ -693,75 +693,44 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } -bool -ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - // tGPR is used sometimes in ARM instructions that need to avoid using - // certain registers. Just treat it as GPR here. - if (DestRC == ARM::tGPRRegisterClass) - DestRC = ARM::GPRRegisterClass; - if (SrcRC == ARM::tGPRRegisterClass) - SrcRC = ARM::GPRRegisterClass; - - // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies. - if (DestRC == ARM::DPR_8RegisterClass) - DestRC = ARM::DPR_VFP2RegisterClass; - if (SrcRC == ARM::DPR_8RegisterClass) - SrcRC = ARM::DPR_VFP2RegisterClass; - - // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies. - if (DestRC == ARM::QPR_VFP2RegisterClass || - DestRC == ARM::QPR_8RegisterClass) - DestRC = ARM::QPRRegisterClass; - if (SrcRC == ARM::QPR_VFP2RegisterClass || - SrcRC == ARM::QPR_8RegisterClass) - SrcRC = ARM::QPRRegisterClass; - - // Allow QQPR / QQPR_VFP2 cross-class copies. - if (DestRC == ARM::QQPR_VFP2RegisterClass) - DestRC = ARM::QQPRRegisterClass; - if (SrcRC == ARM::QQPR_VFP2RegisterClass) - SrcRC = ARM::QQPRRegisterClass; - - // Disallow copies of unequal sizes. - if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize()) - return false; - - if (DestRC == ARM::GPRRegisterClass) { - if (SrcRC == ARM::SPRRegisterClass) - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg) - .addReg(SrcReg)); - else - AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), - DestReg).addReg(SrcReg))); - } else { - unsigned Opc; - - if (DestRC == ARM::SPRRegisterClass) - Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS); - else if (DestRC == ARM::DPRRegisterClass) - Opc = ARM::VMOVD; - else if (DestRC == ARM::DPR_VFP2RegisterClass || - SrcRC == ARM::DPR_VFP2RegisterClass) - // Always use neon reg-reg move if source or dest is NEON-only regclass. - Opc = ARM::VMOVDneon; - else if (DestRC == ARM::QPRRegisterClass) - Opc = ARM::VMOVQ; - else if (DestRC == ARM::QQPRRegisterClass) - Opc = ARM::VMOVQQ; - else if (DestRC == ARM::QQQQPRRegisterClass) - Opc = ARM::VMOVQQQQ; - else - return false; - - AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg)); +void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool GPRDest = ARM::GPRRegClass.contains(DestReg); + bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); + + if (GPRDest && GPRSrc) { + AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)))); + return; } - return true; + bool SPRDest = ARM::SPRRegClass.contains(DestReg); + bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); + + unsigned Opc; + if (SPRDest && SPRSrc) + Opc = ARM::VMOVS; + else if (GPRDest && SPRSrc) + Opc = ARM::VMOVRS; + else if (SPRDest && GPRSrc) + Opc = ARM::VMOVSR; + else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD; + else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVQ; + else if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVQQ; + else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVQQQQ; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); + MIB.addReg(SrcReg, getKillRegState(KillSrc)); + if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ) + AddDefaultPred(MIB); } static const @@ -795,30 +764,34 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass) + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) RC = ARM::GPRRegisterClass; - if (RC == ARM::GPRRegisterClass) { + switch (RC->getID()) { + case ARM::GPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { + break; + case ARM::SPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::DPRRegisterClass || - RC == ARM::DPR_VFP2RegisterClass || - RC == ARM::DPR_8RegisterClass) { + break; + case ARM::DPRRegClassID: + case ARM::DPR_VFP2RegClassID: + case ARM::DPR_8RegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass || - RC == ARM::QPR_8RegisterClass) { + break; + case ARM::QPRRegClassID: + case ARM::QPR_VFP2RegClassID: + case ARM::QPR_8RegClassID: // FIXME: Neon instructions should support predicates if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q)) - .addFrameIndex(FI).addImm(128) + .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); } else { @@ -828,12 +801,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } - } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + break; + case ARM::QQPRRegClassID: + case ARM::QQPR_VFP2RegClassID: if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32)) - .addFrameIndex(FI).addImm(128); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST1d64Q)) + .addFrameIndex(FI).addImm(16); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); @@ -850,8 +825,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); } - } else { - assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); + break; + case ARM::QQQQPRRegClassID: { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) .addFrameIndex(FI) @@ -865,6 +840,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); + break; + } + default: + llvm_unreachable("Unknown regclass!"); } } @@ -886,26 +865,30 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass) + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) RC = ARM::GPRRegisterClass; - if (RC == ARM::GPRRegisterClass) { + switch (RC->getID()) { + case ARM::GPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { + break; + case ARM::SPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::DPRRegisterClass || - RC == ARM::DPR_VFP2RegisterClass || - RC == ARM::DPR_8RegisterClass) { + break; + case ARM::DPRRegClassID: + case ARM::DPR_VFP2RegClassID: + case ARM::DPR_8RegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass || - RC == ARM::QPR_8RegisterClass) { + break; + case ARM::QPRRegClassID: + case ARM::QPR_VFP2RegClassID: + case ARM::QPR_8RegClassID: if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg) - .addFrameIndex(FI).addImm(128) + .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); } else { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg) @@ -913,14 +896,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } - } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + break; + case ARM::QQPRRegClassID: + case ARM::QQPR_VFP2RegClassID: if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32)); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD1d64Q)); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO)); + AddDefaultPred(MIB.addFrameIndex(FI).addImm(16).addMemOperand(MMO)); } else { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) @@ -932,21 +917,25 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); } - } else { - assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) - .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) - .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); - AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); + break; + case ARM::QQQQPRRegClassID: { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); + AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); + break; + } + default: + llvm_unreachable("Unknown regclass!"); } } @@ -960,223 +949,6 @@ ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, return &*MIB; } -MachineInstr *ARMBaseInstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = NULL; - if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { - // If it is updating CPSR, then it cannot be folded. - if (MI->getOperand(4).getReg() == ARM::CPSR && !MI->getOperand(4).isDead()) - return NULL; - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - if (Opc == ARM::MOVr) - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); - else // ARM::t2MOVr - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - if (Opc == ARM::MOVr) - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), DstSubReg) - .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); - else // ARM::t2MOVr - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } - } else if (Opc == ARM::tMOVgpr2gpr || - Opc == ARM::tMOVtgpr2gpr || - Opc == ARM::tMOVgpr2tgpr) { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); - } - } else if (Opc == ARM::VMOVS) { - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI) - .addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } - } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) { - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } - } else if (Opc == ARM::VMOVQ) { - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - if (MFI.getObjectAlignment(FI) >= 16 && - getRegisterInfo().canRealignStack(MF)) { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q)) - .addFrameIndex(FI).addImm(128) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addImm(Pred).addReg(PredReg); - } else { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) - .addImm(Pred).addReg(PredReg); - } - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - if (MFI.getObjectAlignment(FI) >= 16 && - getRegisterInfo().canRealignStack(MF)) { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg); - } else { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) - .addImm(Pred).addReg(PredReg); - } - } - } - - return NewMI; -} - -MachineInstr* -ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - // FIXME - return 0; -} - -bool -ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { - if (Ops.size() != 1) return false; - - unsigned Opc = MI->getOpcode(); - if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { - // If it is updating CPSR, then it cannot be folded. - return MI->getOperand(4).getReg() != ARM::CPSR || - MI->getOperand(4).isDead(); - } else if (Opc == ARM::tMOVgpr2gpr || - Opc == ARM::tMOVtgpr2gpr || - Opc == ARM::tMOVgpr2tgpr) { - return true; - } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD || - Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { - return true; - } - - // FIXME: VMOVQQ and VMOVQQQQ? - - return false; -} - /// Create a copy of a const pool value. Update CPI to the new index and return /// the label UID. static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { @@ -1211,17 +983,12 @@ reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { - if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { - DestReg = TRI->getSubReg(DestReg, SubIdx); - SubIdx = 0; - } - + const TargetRegisterInfo &TRI) const { unsigned Opcode = Orig->getOpcode(); switch (Opcode) { default: { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); + MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); MBB.insert(I, MI); break; } @@ -1237,9 +1004,6 @@ reMaterialize(MachineBasicBlock &MBB, break; } } - - MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); } MachineInstr * @@ -1291,6 +1055,165 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } +/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to +/// determine if two loads are loading from the same base address. It should +/// only return true if the base pointers are the same and the only differences +/// between the two addresses is the offset. It also returns the offsets by +/// reference. +bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, + int64_t &Offset2) const { + // Don't worry about Thumb: just ARM and Thumb2. + if (Subtarget.isThumb1Only()) return false; + + if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) + return false; + + switch (Load1->getMachineOpcode()) { + default: + return false; + case ARM::LDR: + case ARM::LDRB: + case ARM::LDRD: + case ARM::LDRH: + case ARM::LDRSB: + case ARM::LDRSH: + case ARM::VLDRD: + case ARM::VLDRS: + case ARM::t2LDRi8: + case ARM::t2LDRDi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRi12: + case ARM::t2LDRSHi12: + break; + } + + switch (Load2->getMachineOpcode()) { + default: + return false; + case ARM::LDR: + case ARM::LDRB: + case ARM::LDRD: + case ARM::LDRH: + case ARM::LDRSB: + case ARM::LDRSH: + case ARM::VLDRD: + case ARM::VLDRS: + case ARM::t2LDRi8: + case ARM::t2LDRDi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRi12: + case ARM::t2LDRSHi12: + break; + } + + // Check if base addresses and chain operands match. + if (Load1->getOperand(0) != Load2->getOperand(0) || + Load1->getOperand(4) != Load2->getOperand(4)) + return false; + + // Index should be Reg0. + if (Load1->getOperand(3) != Load2->getOperand(3)) + return false; + + // Determine the offsets. + if (isa<ConstantSDNode>(Load1->getOperand(1)) && + isa<ConstantSDNode>(Load2->getOperand(1))) { + Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); + Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); + return true; + } + + return false; +} + +/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to +/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should +/// be scheduled togther. On some targets if two loads are loading from +/// addresses in the same cache line, it's better if they are scheduled +/// together. This function takes two integers that represent the load offsets +/// from the common base address. It returns true if it decides it's desirable +/// to schedule the two loads together. "NumLoads" is the number of loads that +/// have already been scheduled after Load1. +bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const { + // Don't worry about Thumb: just ARM and Thumb2. + if (Subtarget.isThumb1Only()) return false; + + assert(Offset2 > Offset1); + + if ((Offset2 - Offset1) / 8 > 64) + return false; + + if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) + return false; // FIXME: overly conservative? + + // Four loads in a row should be sufficient. + if (NumLoads >= 3) + return false; + + return true; +} + +bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Debug info is never a scheduling boundary. It's necessary to be explicit + // due to the special treatment of IT instructions below, otherwise a + // dbg_value followed by an IT will result in the IT instruction being + // considered a scheduling hazard, which is wrong. It should be the actual + // instruction preceding the dbg_value instruction(s), just like it is + // when debug info is not present. + if (MI->isDebugValue()) + return false; + + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel()) + return true; + + // Treat the start of the IT block as a scheduling boundary, but schedule + // t2IT along with all instructions following it. + // FIXME: This is a big hammer. But the alternative is to add all potential + // true and anti dependencies to IT block instructions as implicit operands + // to the t2IT instruction. The added compile time and complexity does not + // seem worth it. + MachineBasicBlock::const_iterator I = MI; + // Make sure to skip any dbg_value instructions + while (++I != MBB->end() && I->isDebugValue()) + ; + if (I != MBB->end() && I->getOpcode() == ARM::t2IT) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + if (MI->definesRegister(ARM::SP)) + return true; + + return false; +} + +bool ARMBaseInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const { + if (!NumInstrs) + return false; + if (Subtarget.getCPUString() == "generic") + // Generic (and overly aggressive) if-conversion limits for testing. + return NumInstrs <= 10; + else if (Subtarget.hasV7Ops()) + return NumInstrs <= 3; + return NumInstrs <= 2; +} + +bool ARMBaseInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, + MachineBasicBlock &FMBB, unsigned NumF) const { + return NumT && NumF && NumT <= 2 && NumF <= 2; +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index b566271..89a2db7 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -116,11 +116,25 @@ namespace ARMII { // Thumb format ThumbFrm = 24 << FormShift, - // NEON format - NEONFrm = 25 << FormShift, - NEONGetLnFrm = 26 << FormShift, - NEONSetLnFrm = 27 << FormShift, - NEONDupFrm = 28 << FormShift, + // Miscelleaneous format + MiscFrm = 25 << FormShift, + + // NEON formats + NGetLnFrm = 26 << FormShift, + NSetLnFrm = 27 << FormShift, + NDupFrm = 28 << FormShift, + NLdStFrm = 29 << FormShift, + N1RegModImmFrm= 30 << FormShift, + N2RegFrm = 31 << FormShift, + NVCVTFrm = 32 << FormShift, + NVDupLnFrm = 33 << FormShift, + N2RegVShLFrm = 34 << FormShift, + N2RegVShRFrm = 35 << FormShift, + N3RegFrm = 36 << FormShift, + N3RegVShFrm = 37 << FormShift, + NVExtFrm = 38 << FormShift, + NVMulSLFrm = 39 << FormShift, + NVTBLFrm = 40 << FormShift, //===------------------------------------------------------------------===// // Misc flags. @@ -213,7 +227,8 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; @@ -258,12 +273,10 @@ public: virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -283,29 +296,51 @@ public: const MDNode *MDPtr, DebugLoc DL) const; - virtual bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const; - virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo &TRI) const; MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1) const; + + /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to + /// determine if two loads are loading from the same base address. It should + /// only return true if the base pointers are the same and the only + /// differences between the two addresses is the offset. It also returns the + /// offsets by reference. + virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, int64_t &Offset2)const; + + /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to + /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should + /// be scheduled togther. On some targets if two loads are loading from + /// addresses in the same cache line, it's better if they are scheduled + /// together. This function takes two integers that represent the load offsets + /// from the common base address. It returns true if it decides it's desirable + /// to schedule the two loads together. "NumLoads" is the number of loads that + /// have already been scheduled after Load1. + virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const; + + virtual bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT, + MachineBasicBlock &FMBB,unsigned NumF) const; + + virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs) const { + return NumInstrs && NumInstrs == 1; + } }; static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 82458d2..182bd99 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -170,56 +170,6 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs; } -const TargetRegisterClass* const * -ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass, - &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const DarwinCalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const DarwinThumbCalleeSavedRegClasses[] ={ - &ARM::GPRRegClass, &ARM::tGPRRegClass, &ARM::tGPRRegClass, - &ARM::tGPRRegClass, &ARM::tGPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - if (STI.isThumb1Only()) { - return STI.isTargetDarwin() - ? DarwinThumbCalleeSavedRegClasses : ThumbCalleeSavedRegClasses; - } - return STI.isTargetDarwin() - ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses; -} - BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { // FIXME: avoid re-calculating this everytime. @@ -352,7 +302,7 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, } bool -ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC, +ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, SmallVectorImpl<unsigned> &SubIndices, unsigned &NewSubIdx) const { @@ -724,6 +674,15 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { I != E; ++I) { for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { if (!I->getOperand(i).isFI()) continue; + + // When using ADDri to get the address of a stack object, 255 is the + // largest offset guaranteed to fit in the immediate offset. + if (I->getOpcode() == ARM::ADDri) { + Limit = std::min(Limit, (1U << 8) - 1); + break; + } + + // Otherwise check the addressing mode. switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { case ARMII::AddrMode3: case ARMII::AddrModeT2_i8: @@ -765,6 +724,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, SmallVector<unsigned, 4> UnspilledCS1GPRs; SmallVector<unsigned, 4> UnspilledCS2GPRs; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); // Spill R4 if Thumb2 function requires stack realignment - it will be used as // scratch register. @@ -780,7 +740,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. const unsigned *CSRegs = getCalleeSavedRegs(); - const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses(); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; @@ -798,50 +757,50 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } } - if (CSRegClasses[i] == ARM::GPRRegisterClass || - CSRegClasses[i] == ARM::tGPRRegisterClass) { - if (Spilled) { - NumGPRSpills++; + if (!ARM::GPRRegisterClass->contains(Reg)) + continue; - if (!STI.isTargetDarwin()) { - if (Reg == ARM::LR) - LRSpilled = true; - CS1Spilled = true; - continue; - } + if (Spilled) { + NumGPRSpills++; - // Keep track if LR and any of R4, R5, R6, and R7 is spilled. - switch (Reg) { - case ARM::LR: + if (!STI.isTargetDarwin()) { + if (Reg == ARM::LR) LRSpilled = true; - // Fallthrough - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - CS1Spilled = true; - break; - default: - break; - } - } else { - if (!STI.isTargetDarwin()) { - UnspilledCS1GPRs.push_back(Reg); - continue; - } + CS1Spilled = true; + continue; + } - switch (Reg) { - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - UnspilledCS1GPRs.push_back(Reg); - break; - default: - UnspilledCS2GPRs.push_back(Reg); - break; - } + // Keep track if LR and any of R4, R5, R6, and R7 is spilled. + switch (Reg) { + case ARM::LR: + LRSpilled = true; + // Fallthrough + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + CS1Spilled = true; + break; + default: + break; + } + } else { + if (!STI.isTargetDarwin()) { + UnspilledCS1GPRs.push_back(Reg); + continue; + } + + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + UnspilledCS1GPRs.push_back(Reg); + break; + default: + UnspilledCS2GPRs.push_back(Reg); + break; } } } @@ -862,9 +821,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // offset, make sure a register (or a spill slot) is available for the // register scavenger. Note that if we're indexing off the frame pointer, the // effective stack size is 4 bytes larger since the FP points to the stack - // slot of the previous FP. - bool BigStack = RS && - estimateStackSize(MF) + (hasFP(MF) ? 4 : 0) >= estimateRSStackSizeLimit(MF); + // slot of the previous FP. Also, if we have variable sized objects in the + // function, stack slot references will often be negative, and some of + // our instructions are positive-offset only, so conservatively consider + // that case to want a spill slot (or register) as well. + // FIXME: We could add logic to be more precise about negative offsets + // and which instructions will need a scratch register for them. Is it + // worth the effort and added fragility? + bool BigStack = + (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >= + estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects(); bool ExtraCSSpill = false; if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) { @@ -957,7 +923,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. const TargetRegisterClass *RC = ARM::GPRRegisterClass; - MachineFrameInfo *MFI = MF.getFrameInfo(); RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); @@ -1622,6 +1587,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = prior(MBB.end()); assert(MBBI->getDesc().isReturn() && "Can only insert epilog into returning blocks"); + unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -1696,6 +1662,39 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size()); } + if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || + RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { + // Tail call return: adjust the stack pointer and jump to callee. + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + + // Jump to label or value in register. + if (RetOpcode == ARM::TCRETURNdi) { + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else if (RetOpcode == ARM::TCRETURNdiND) { + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else if (RetOpcode == ARM::TCRETURNri) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); + } else if (RetOpcode == ARM::TCRETURNriND) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). + addReg(JumpTarget.getReg(), RegState::Kill); + } + + MachineInstr *NewMI = prior(MBBI); + for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) + NewMI->addOperand(MBBI->getOperand(i)); + + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + } + if (VARegSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 2c9c82d..f7ee0d5 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -69,9 +69,6 @@ public: /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; /// getMatchingSuperRegClass - Return a subclass of the specified register @@ -81,14 +78,15 @@ public: getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; - /// canCombinedSubRegIndex - Given a register class and a list of sub-register - /// indices, return true if it's possible to combine the sub-register indices - /// into one that corresponds to a larger sub-register. Return the new sub- - /// register index by reference. Note the new index by be zero if the given - /// sub-registers combined to form the whole register. - virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC, - SmallVectorImpl<unsigned> &SubIndices, - unsigned &NewSubIdx) const; + /// canCombineSubRegIndices - Given a register class and a list of + /// subregister indices, return true if it's possible to combine the + /// subregister indices into one that corresponds to a larger + /// subregister. Return the new subregister index by reference. Note the + /// new index may be zero if the given subregisters can be combined to + /// form the whole register. + virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC, + SmallVectorImpl<unsigned> &SubIndices, + unsigned &NewSubIdx) const; const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; @@ -150,8 +148,8 @@ public: virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const; virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, FrameIndexValue *Value = NULL, diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index f2730fc..7895cb0 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -55,6 +55,7 @@ namespace { const std::vector<MachineConstantPoolEntry> *MCPEs; const std::vector<MachineJumpTableEntry> *MJTEs; bool IsPIC; + bool IsThumb; void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineModuleInfo>(); @@ -67,8 +68,8 @@ namespace { : MachineFunctionPass(&ID), JTI(0), II((const ARMInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), - MCE(mce), MCPEs(0), MJTEs(0), - IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for @@ -139,6 +140,12 @@ namespace { void emitMiscInstruction(const MachineInstr &MI); + void emitNEONLaneInstruction(const MachineInstr &MI); + void emitNEONDupInstruction(const MachineInstr &MI); + void emitNEON1RegModImmInstruction(const MachineInstr &MI); + void emitNEON2RegInstruction(const MachineInstr &MI); + void emitNEON3RegInstruction(const MachineInstr &MI); + /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO); @@ -147,7 +154,8 @@ namespace { } /// getMovi32Value - Return binary encoding of operand for movw/movt. If the - /// machine operand requires relocation, record the relocation and return zero. + /// machine operand requires relocation, record the relocation and return + /// zero. unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, unsigned Reloc); unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx, @@ -193,6 +201,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { MJTEs = 0; if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); IsPIC = TM.getRelocationModel() == Reloc::PIC_; + IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction(); JTI->Initialize(MF, IsPIC); MMI = &getAnalysis<MachineModuleInfo>(); MCE.setModuleInfo(MMI); @@ -347,7 +356,7 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { MCE.processDebugLoc(MI.getDebugLoc(), true); - NumEmitted++; // Keep track of the # of mi's emitted + ++NumEmitted; // Keep track of the # of mi's emitted switch (MI.getDesc().TSFlags & ARMII::FormMask) { default: { llvm_unreachable("Unhandled instruction encoding format!"); @@ -407,6 +416,23 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { case ARMII::VFPMiscFrm: emitMiscInstruction(MI); break; + // NEON instructions. + case ARMII::NGetLnFrm: + case ARMII::NSetLnFrm: + emitNEONLaneInstruction(MI); + break; + case ARMII::NDupFrm: + emitNEONDupInstruction(MI); + break; + case ARMII::N1RegModImmFrm: + emitNEON1RegModImmInstruction(MI); + break; + case ARMII::N2RegFrm: + emitNEON2RegInstruction(MI); + break; + case ARMII::N3RegFrm: + emitNEON3RegInstruction(MI); + break; } MCE.processDebugLoc(MI.getDebugLoc(), false); } @@ -1539,4 +1565,144 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) { emitWordLE(Binary); } +static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegD = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegD = ARMRegisterInfo::getRegisterNumbering(RegD); + Binary |= (RegD & 0xf) << ARMII::RegRdShift; + Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift; + return Binary; +} + +static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegN = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegN = ARMRegisterInfo::getRegisterNumbering(RegN); + Binary |= (RegN & 0xf) << ARMII::RegRnShift; + Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift; + return Binary; +} + +static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegM = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegM = ARMRegisterInfo::getRegisterNumbering(RegM); + Binary |= (RegM & 0xf); + Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift; + return Binary; +} + +/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON +/// data-processing instruction to the corresponding Thumb encoding. +static unsigned convertNEONDataProcToThumb(unsigned Binary) { + assert((Binary & 0xfe000000) == 0xf2000000 && + "not an ARM NEON data-processing instruction"); + unsigned UBit = (Binary >> 24) & 1; + return 0xef000000 | (UBit << 28) | (Binary & 0xffffff); +} + +void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + + unsigned RegTOpIdx, RegNOpIdx, LnOpIdx; + const TargetInstrDesc &TID = MI.getDesc(); + if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) { + RegTOpIdx = 0; + RegNOpIdx = 1; + LnOpIdx = 2; + } else { // ARMII::NSetLnFrm + RegTOpIdx = 2; + RegNOpIdx = 0; + LnOpIdx = 3; + } + + // Set the conditional execution predicate + Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; + + unsigned RegT = MI.getOperand(RegTOpIdx).getReg(); + RegT = ARMRegisterInfo::getRegisterNumbering(RegT); + Binary |= (RegT << ARMII::RegRdShift); + Binary |= encodeNEONRn(MI, RegNOpIdx); + + unsigned LaneShift; + if ((Binary & (1 << 22)) != 0) + LaneShift = 0; // 8-bit elements + else if ((Binary & (1 << 5)) != 0) + LaneShift = 1; // 16-bit elements + else + LaneShift = 2; // 32-bit elements + + unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift; + unsigned Opc1 = Lane >> 2; + unsigned Opc2 = Lane & 3; + assert((Opc1 & 3) == 0 && "out-of-range lane number operand"); + Binary |= (Opc1 << 21); + Binary |= (Opc2 << 5); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; + + unsigned RegT = MI.getOperand(1).getReg(); + RegT = ARMRegisterInfo::getRegisterNumbering(RegT); + Binary |= (RegT << ARMII::RegRdShift); + Binary |= encodeNEONRn(MI, 0); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd. + Binary |= encodeNEONRd(MI, 0); + // Immediate fields: Op, Cmode, I, Imm3, Imm4 + unsigned Imm = MI.getOperand(1).getImm(); + unsigned Op = (Imm >> 12) & 1; + unsigned Cmode = (Imm >> 8) & 0xf; + unsigned I = (Imm >> 7) & 1; + unsigned Imm3 = (Imm >> 4) & 0x7; + unsigned Imm4 = Imm & 0xf; + Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4; + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd; source register in Dm. + unsigned OpIdx = 0; + Binary |= encodeNEONRd(MI, OpIdx++); + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRm(MI, OpIdx); + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + // FIXME: This does not handle VDUPfdf or VDUPfqf. + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd; source registers in Dn and Dm. + unsigned OpIdx = 0; + Binary |= encodeNEONRd(MI, OpIdx++); + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRn(MI, OpIdx++); + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRm(MI, OpIdx); + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + // FIXME: This does not handle VMOVDneon or VMOVQ. + emitWordLE(Binary); +} + #include "ARMGenCodeEmitter.inc" diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 13d8b74..65a3da6 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -337,7 +337,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { if (CPChange && ++NoCPIters > 30) llvm_unreachable("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); - + // Clear NewWaterList now. If we split a block for branches, it should // appear as "new water" for the next iteration of constant pool placement. NewWaterList.clear(); @@ -361,8 +361,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // After a while, this might be made debug-only, but it is not expensive. verify(MF); - // If LR has been forced spilled and no far jumps (i.e. BL) has been issued. - // Undo the spill / restore of LR if possible. + // If LR has been forced spilled and no far jump (i.e. BL) has been issued, + // undo the spill / restore of LR if possible. if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) MadeChange |= UndoLRSpillRestore(); @@ -407,7 +407,7 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, std::vector<CPEntry> CPEs; CPEs.push_back(CPEntry(CPEMI, i)); CPEntries.push_back(CPEs); - NumCPEs++; + ++NumCPEs; DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i << "\n"); } @@ -418,7 +418,8 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, static bool BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. MachineFunction::iterator MBBI = MBB; - if (llvm::next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function. + // Can't fall off end of function. + if (llvm::next(MBBI) == MBB->getParent()->end()) return false; MachineBasicBlock *NextBB = llvm::next(MBBI); @@ -491,6 +492,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, unsigned MBBSize = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { + if (I->isDebugValue()) + continue; // Add instruction size to MBBSize. MBBSize += TII->GetInstSizeInBytes(I); @@ -722,7 +725,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // correspond to anything in the source. unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B; BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB); - NumSplit++; + ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. while (!OrigBB->succ_empty()) { @@ -945,7 +948,7 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { if (--CPE->RefCount == 0) { RemoveDeadCPEMI(CPEMI); CPE->CPEMI = NULL; - NumCPEs--; + --NumCPEs; return true; } return false; @@ -1246,7 +1249,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); - NumCPEs++; + ++NumCPEs; BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; // Compensate for .align 2 in thumb mode. @@ -1369,7 +1372,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { BBSizes[MBB->getNumber()] += 2; AdjustBBOffsetsAfter(MBB, 2); HasFarJump = true; - NumUBrFixed++; + ++NumUBrFixed; DEBUG(errs() << " Changed B to long jump " << *MI); @@ -1402,7 +1405,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { MachineInstr *BMI = &MBB->back(); bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); - NumCBrFixed++; + ++NumCBrFixed; if (BMI != MI) { if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && BMI->getOpcode() == Br.UncondBr) { @@ -1621,7 +1624,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { // constantpool tables? MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); if (MJTI == 0) return false; - + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; @@ -1658,15 +1661,25 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { continue; unsigned IdxReg = MI->getOperand(1).getReg(); bool IdxRegKill = MI->getOperand(1).isKill(); + + // Scan backwards to find the instruction that defines the base + // register. Due to post-RA scheduling, we can't count on it + // immediately preceding the branch instruction. MachineBasicBlock::iterator PrevI = MI; - if (PrevI == MBB->begin()) + MachineBasicBlock::iterator B = MBB->begin(); + while (PrevI != B && !PrevI->definesRegister(BaseReg)) + --PrevI; + + // If for some reason we didn't find it, we can't do anything, so + // just skip this one. + if (!PrevI->definesRegister(BaseReg)) continue; - MachineInstr *AddrMI = --PrevI; + MachineInstr *AddrMI = PrevI; bool OptOk = true; - // Examine the instruction that calculate the jumptable entry address. - // If it's not the one just before the t2BR_JT, we won't delete it, then - // it's not worth doing the optimization. + // Examine the instruction that calculates the jumptable entry address. + // Make sure it only defines the base register and kills any uses + // other than the index register. for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { const MachineOperand &MO = AddrMI->getOperand(k); if (!MO.isReg() || !MO.getReg()) @@ -1683,9 +1696,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { if (!OptOk) continue; - // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want + // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction + // that gave us the initial base register definition. + for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI) + ; + + // The instruction should be a tLEApcrel or t2LEApcrelJT; we want // to delete it as well. - MachineInstr *LeaMI = --PrevI; + MachineInstr *LeaMI = PrevI; if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && LeaMI->getOpcode() != ARM::t2LEApcrelJT) || LeaMI->getOperand(0).getReg() != BaseReg) @@ -1729,7 +1747,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); if (MJTI == 0) return false; - + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; @@ -1769,7 +1787,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { MachineFunction &MF = *BB->getParent(); - // If it's the destination block is terminated by an unconditional branch, + // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple // heuristic. FIXME: We can definitely improve it. diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 6f4eddf..3119b54 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -15,6 +15,7 @@ #define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H #include "llvm/CodeGen/MachineConstantPool.h" +#include <cstddef> namespace llvm { diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c87f5d7..9c62597 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -144,13 +144,15 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineInstrBuilder Even = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(EvenSrc, getKillRegState(SrcIsKill))); + .addReg(EvenDst, + getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(EvenSrc, getKillRegState(SrcIsKill))); MachineInstrBuilder Odd = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(OddSrc, getKillRegState(SrcIsKill))); + .addReg(OddDst, + getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(OddSrc, getKillRegState(SrcIsKill))); TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); Modified = true; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9baef6b..c84d3ff 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMAddressingModes.h" #include "ARMTargetMachine.h" @@ -35,11 +36,6 @@ using namespace llvm; -static cl::opt<bool> -UseRegSeq("neon-reg-sequence", cl::Hidden, - cl::desc("Use reg_sequence to model ld / st of multiple neon regs"), - cl::init(true)); - //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -147,6 +143,11 @@ private: unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); + /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, + /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be + /// generated to force the table registers to be consecutive. + SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); + /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); @@ -173,24 +174,17 @@ private: char ConstraintCode, std::vector<SDValue> &OutOps); - /// PairDRegs - Form a quad register from a pair of D registers. - /// + // Form pairs of consecutive S, D, or Q registers. + SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1); SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); - - /// PairDRegs - Form a quad register pair from a pair of Q registers. - /// SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1); - /// QuadDRegs - Form a quad register pair from a quad of D registers. - /// + // Form sequences of 4 consecutive S, D, or Q registers. + SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - - /// QuadQRegs - Form 4 consecutive Q registers. - /// SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - /// OctoDRegs - Form 8 consecutive D registers. - /// + // Form sequences of 8 consecutive D registers. SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3, SDValue V4, SDValue V5, SDValue V6, SDValue V7); }; @@ -544,10 +538,9 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N, bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset){ // FIXME dl should come from the parent load or store, not the address - DebugLoc dl = Op->getDebugLoc(); if (N.getOpcode() != ISD::ADD) { ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); - if (!NC || NC->getZExtValue() != 0) + if (!NC || !NC->isNullValue()) return false; Base = Offset = N; @@ -788,8 +781,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, if (N.getOpcode() == ISD::ADD) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); + // 8 bits. if (((RHSC & 0x3) == 0) && - ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { // 8 bits. + ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { Base = N.getOperand(0); OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -798,7 +792,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, } else if (N.getOpcode() == ISD::SUB) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); - if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { // 8 bits. + // 8 bits. + if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { Base = N.getOperand(0); OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32); return true; @@ -960,22 +955,24 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } +/// PairSRegs - Form a D register from a pair of S registers. +/// +SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); +} + /// PairDRegs - Form a quad register from a pair of D registers. /// SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); - if (llvm::ModelWithRegSequence()) { - const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); - } - SDValue Undef = - SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0); - SDNode *Pair = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, - VT, Undef, V0, SubReg0); - return CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, - VT, SDValue(Pair, 0), V1, SubReg1); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); } /// PairQRegs - Form 4 consecutive D registers from a pair of Q registers. @@ -988,6 +985,19 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); } +/// QuadSRegs - Form 4 consecutive S registers. +/// +SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); +} + /// QuadDRegs - Form 4 consecutive D registers. /// SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, @@ -1088,7 +1098,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, std::vector<EVT> ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); - if (!llvm::ModelWithRegSequence() || NumVecs < 2) + if (NumVecs < 2) return VLd; SDValue RegSeq; @@ -1129,24 +1139,17 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, Chain = SDValue(VLd, 2 * NumVecs); // Combine the even and odd subregs to produce the result. - if (llvm::ModelWithRegSequence()) { - if (NumVecs == 1) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); - ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); - } else { - SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, - SDValue(VLd, 0), SDValue(VLd, 1), - SDValue(VLd, 2), SDValue(VLd, 3)), 0); - SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); - SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); - ReplaceUses(SDValue(N, 0), Q0); - ReplaceUses(SDValue(N, 1), Q1); - } + if (NumVecs == 1) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); + ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); - } + SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, + SDValue(VLd, 0), SDValue(VLd, 1), + SDValue(VLd, 2), SDValue(VLd, 3)), 0); + SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); + SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); + ReplaceUses(SDValue(N, 0), Q0); + ReplaceUses(SDValue(N, 1), Q1); } } else { // Otherwise, quad registers are loaded with two separate instructions, @@ -1169,37 +1172,27 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); - if (llvm::ModelWithRegSequence()) { - SDValue V0 = SDValue(VLdA, 0); - SDValue V1 = SDValue(VLdB, 0); - SDValue V2 = SDValue(VLdA, 1); - SDValue V3 = SDValue(VLdB, 1); - SDValue V4 = SDValue(VLdA, 2); - SDValue V5 = SDValue(VLdB, 2); - SDValue V6 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), - 0) - : SDValue(VLdA, 3); - SDValue V7 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), - 0) - : SDValue(VLdB, 3); - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, - V4, V5, V6, V7), 0); - - // Extract out the 3 / 4 Q registers. - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, - dl, VT, RegSeq); - ReplaceUses(SDValue(N, Vec), Q); - } - } else { - // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); - } + SDValue V0 = SDValue(VLdA, 0); + SDValue V1 = SDValue(VLdB, 0); + SDValue V2 = SDValue(VLdA, 1); + SDValue V3 = SDValue(VLdB, 1); + SDValue V4 = SDValue(VLdA, 2); + SDValue V5 = SDValue(VLdB, 2); + SDValue V6 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) + : SDValue(VLdA, 3); + SDValue V7 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) + : SDValue(VLdB, 3); + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, + V4, V5, V6, V7), 0); + + // Extract out the 3 / 4 Q registers. + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), Q); } } ReplaceUses(SDValue(N, NumVecs), Chain); @@ -1209,7 +1202,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { - assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range"); + assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, Align; @@ -1247,7 +1240,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Ops.push_back(Align); if (is64BitVector) { - if (llvm::ModelWithRegSequence() && NumVecs >= 2) { + if (NumVecs >= 2) { SDValue RegSeq; SDValue V0 = N->getOperand(0+3); SDValue V1 = N->getOperand(1+3); @@ -1292,7 +1285,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, // Quad registers are directly supported for VST1 and VST2, // storing pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - if (llvm::ModelWithRegSequence() && NumVecs == 2) { + if (NumVecs == 2) { // First extract the pair of Q registers. SDValue Q0 = N->getOperand(3); SDValue Q1 = N->getOperand(4); @@ -1330,76 +1323,48 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. - if (llvm::ModelWithRegSequence()) { - // Form the QQQQ REG_SEQUENCE. - SDValue V[8]; - for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3)); - V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3)); - } - if (NumVecs == 3) - V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], - V[4], V[5], V[6], V[7]), 0); - - // Store the even D registers. - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); - Ops.push_back(Reg0); // post-access address offset - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, - RegVT, RegSeq)); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = QOpcodes0[OpcodeIndex]; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStA, 1); - // Store the odd D registers. - Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, - RegVT, RegSeq); - Ops[NumVecs+5] = Chain; - Opc = QOpcodes1[OpcodeIndex]; - SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; - } else { - Ops.push_back(Reg0); // post-access address offset - - // Store the even subregs. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = QOpcodes0[OpcodeIndex]; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStA, 1); - - // Store the odd subregs. - Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3)); - Ops[NumVecs+5] = Chain; - Opc = QOpcodes1[OpcodeIndex]; - SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; - } + // Form the QQQQ REG_SEQUENCE. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3)); + V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3)); + } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); + + // Store the even D registers. + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + Ops.push_back(Reg0); // post-access address offset + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, + RegVT, RegSeq)); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStA, 1); + + // Store the odd D registers. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, + RegVT, RegSeq); + Ops[NumVecs+5] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; } SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, @@ -1421,13 +1386,11 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, // Quad registers are handled by load/store of subregs. Find the subreg info. unsigned NumElts = 0; - int SubregIdx = 0; bool Even = false; EVT RegVT = VT; if (!is64BitVector) { RegVT = GetNEONSubregVT(VT); NumElts = RegVT.getVectorNumElements(); - SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1; Even = Lane < NumElts; } @@ -1455,35 +1418,26 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned Opc = 0; if (is64BitVector) { Opc = DOpcodes[OpcodeIndex]; - if (llvm::ModelWithRegSequence()) { - SDValue RegSeq; - SDValue V0 = N->getOperand(0+3); - SDValue V1 = N->getOperand(1+3); - if (NumVecs == 2) { - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - } else { - SDValue V2 = N->getOperand(2+3); - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : N->getOperand(3+3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, - RegSeq)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, - RegSeq)); - if (NumVecs > 2) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, - RegSeq)); - if (NumVecs > 3) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, - RegSeq)); + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq)); } else { // Check if this is loading the even or odd subreg of a Q register. if (Lane < NumElts) { @@ -1493,31 +1447,24 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, Opc = QOpcodes1[OpcodeIndex]; } - if (llvm::ModelWithRegSequence()) { - SDValue RegSeq; - SDValue V0 = N->getOperand(0+3); - SDValue V1 = N->getOperand(1+3); - if (NumVecs == 2) { - RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); - } else { - SDValue V2 = N->getOperand(2+3); - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : N->getOperand(3+3); - RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); - } - - // Extract the subregs of the input vector. - unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT, - RegSeq)); + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); } else { - // Extract the subregs of the input vector. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(Vec+3))); + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); } + + // Extract the subregs of the input vector. + unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT, + RegSeq)); } Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); @@ -1531,76 +1478,97 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, ResTys.push_back(MVT::Other); SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6); - if (llvm::ModelWithRegSequence()) { - // Form a REG_SEQUENCE to force register allocation. - SDValue RegSeq; - if (is64BitVector) { - SDValue V0 = SDValue(VLdLn, 0); - SDValue V1 = SDValue(VLdLn, 1); - if (NumVecs == 2) { - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - } else { - SDValue V2 = SDValue(VLdLn, 2); - // If it's a vld3, form a quad D-register but discard the last part. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : SDValue(VLdLn, 3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } + // Form a REG_SEQUENCE to force register allocation. + SDValue RegSeq; + if (is64BitVector) { + SDValue V0 = SDValue(VLdLn, 0); + SDValue V1 = SDValue(VLdLn, 1); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); } else { - // For 128-bit vectors, take the 64-bit results of the load and insert them - // as subregs into the result. - SDValue V[8]; - for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - if (Even) { - V[i] = SDValue(VLdLn, Vec); - V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - } else { - V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - V[i+1] = SDValue(VLdLn, Vec); - } + SDValue V2 = SDValue(VLdLn, 2); + // If it's a vld3, form a quad D-register but discard the last part. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : SDValue(VLdLn, 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + } else { + // For 128-bit vectors, take the 64-bit results of the load and insert + // them as subregs into the result. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + if (Even) { + V[i] = SDValue(VLdLn, Vec); + V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + } else { + V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + V[i+1] = SDValue(VLdLn, Vec); } - if (NumVecs == 3) - V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - - if (NumVecs == 2) - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0); - else - RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], - V[4], V[5], V[6], V[7]), 0); } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq)); - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); - return NULL; - } - - // For a 64-bit vector load to D registers, nothing more needs to be done. - if (is64BitVector) - return VLdLn; - - // For 128-bit vectors, take the 64-bit results of the load and insert them - // as subregs into the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue QuadVec = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(Vec+3), - SDValue(VLdLn, Vec)); - ReplaceUses(SDValue(N, Vec), QuadVec); + if (NumVecs == 2) + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0); + else + RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); } - Chain = SDValue(VLdLn, NumVecs); - ReplaceUses(SDValue(N, NumVecs), Chain); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); return NULL; } +SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, + unsigned Opc) { + assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + unsigned FirstTblReg = IsExt ? 2 : 1; + + // Form a REG_SEQUENCE to force register allocation. + SDValue RegSeq; + SDValue V0 = N->getOperand(FirstTblReg + 0); + SDValue V1 = N->getOperand(FirstTblReg + 1); + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + else { + SDValue V2 = N->getOperand(FirstTblReg + 2); + // If it's a vtbl3, form a quad D-register and leave the last part as + // an undef. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) + : N->getOperand(FirstTblReg + 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + SmallVector<SDValue, 6> Ops; + if (IsExt) + Ops.push_back(N->getOperand(1)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq)); + + Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); + Ops.push_back(getAL(CurDAG)); // predicate + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register + return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size()); +} + SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) @@ -1954,8 +1922,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (Subtarget->isThumb()) { - SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 5); + SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7); @@ -2015,7 +1983,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4); + return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4); } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), @@ -2029,7 +1997,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4); + return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4); } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), @@ -2211,6 +2179,22 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); } + case ARMISD::BUILD_VECTOR: { + EVT VecVT = N->getValueType(0); + EVT EltVT = VecVT.getVectorElementType(); + unsigned NumElts = VecVT.getVectorNumElements(); + if (EltVT.getSimpleVT() == MVT::f64) { + assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); + return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1)); + } + assert(EltVT.getSimpleVT() == MVT::f32 && + "unexpected type for BUILD_VECTOR"); + if (NumElts == 2) + return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1)); + assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); + return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)); + } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { @@ -2342,6 +2326,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + break; + + case Intrinsic::arm_neon_vtbl2: + return SelectVTBL(N, false, 2, ARM::VTBL2); + case Intrinsic::arm_neon_vtbl3: + return SelectVTBL(N, false, 3, ARM::VTBL3); + case Intrinsic::arm_neon_vtbl4: + return SelectVTBL(N, false, 4, ARM::VTBL4); + + case Intrinsic::arm_neon_vtbx2: + return SelectVTBL(N, true, 2, ARM::VTBX2); + case Intrinsic::arm_neon_vtbx3: + return SelectVTBL(N, true, 3, ARM::VTBX3); + case Intrinsic::arm_neon_vtbx4: + return SelectVTBL(N, true, 4, ARM::VTBX4); + } + break; + } + case ISD::CONCAT_VECTORS: return SelectConcatVector(N); } @@ -2367,9 +2374,3 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel) { return new ARMDAGToDAGISel(TM, OptLevel); } - -/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model -/// operations involving sub-registers. -bool llvm::ModelWithRegSequence() { - return UseRegSeq; -} diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b8126a3..98d8b85 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" @@ -40,6 +41,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -47,9 +49,27 @@ #include <sstream> using namespace llvm; +STATISTIC(NumTailCalls, "Number of tail calls"); + +// This option should go away when tail calls fully work. +static cl::opt<bool> +EnableARMTailCalls("arm-tail-calls", cl::Hidden, + cl::desc("Generate tail calls (TEMPORARY OPTION)."), + cl::init(true)); + static cl::opt<bool> EnableARMLongCalls("arm-long-calls", cl::Hidden, - cl::desc("Generate calls via indirect call instructions."), + cl::desc("Generate calls via indirect call instructions"), + cl::init(false)); + +static cl::opt<bool> +ARMInterworking("arm-interworking", cl::Hidden, + cl::desc("Enable / disable ARM interworking (for debugging only)"), + cl::init(true)); + +static cl::opt<bool> +EnableARMCodePlacement("arm-code-placement", cl::Hidden, + cl::desc("Enable code placement pass for ARM"), cl::init(false)); static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, @@ -94,10 +114,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, } setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - if (llvm::ModelWithRegSequence()) - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); - else - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); @@ -393,13 +410,57 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // doesn't yet know how to not do that for SjLj. setExceptionSelectorRegister(ARM::R0); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - - // If the subtarget does not have extract instructions, sign_extend_inreg - // needs to be expanded. Extract is available in ARM mode on v6 and up, - // and on most Thumb2 implementations. - if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops()) - || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) { + // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise + // use the default expansion. + bool canHandleAtomics = + (Subtarget->hasV7Ops() || + (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())); + if (canHandleAtomics) { + // membarrier needs custom lowering; the rest are legal and handled + // normally. + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + } else { + // Set them all for expansion, which will force libcalls. + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + // Since the libcalls include locking, fold in the fences + setShouldFoldAtomicFences(true); + } + // 64-bit versions are always libcalls (for now) + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); + + // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. + if (!Subtarget->hasV6Ops()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); } @@ -412,8 +473,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); - setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + if (Subtarget->isTargetDarwin()) { + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + } setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); @@ -474,28 +537,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) else setSchedulingPreference(Sched::Hybrid); - // FIXME: If-converter should use instruction latency to determine - // profitability rather than relying on fixed limits. - if (Subtarget->getCPUString() == "generic") { - // Generic (and overly aggressive) if-conversion limits. - setIfCvtBlockSizeLimit(10); - setIfCvtDupBlockSizeLimit(2); - } else if (Subtarget->hasV7Ops()) { - setIfCvtBlockSizeLimit(3); - setIfCvtDupBlockSizeLimit(1); - } else if (Subtarget->hasV6Ops()) { - setIfCvtBlockSizeLimit(2); - setIfCvtDupBlockSizeLimit(1); - } else { - setIfCvtBlockSizeLimit(3); - setIfCvtDupBlockSizeLimit(2); - } - maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type - // Do not enable CodePlacementOpt for now: it currently runs after the - // ARMConstantIslandPass and messes up branch relaxation and placement - // of constant islands. - // benefitFromCodePlacementOpt = true; + + // On ARM arguments smaller than 4 bytes are extended, so all arguments + // are at least 4 bytes aligned. + setMinStackArgumentAlignment(4); + + if (EnableARMCodePlacement) + benefitFromCodePlacementOpt = true; } const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -537,6 +586,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; + case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; + case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; @@ -581,6 +632,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; } @@ -603,15 +655,33 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { - return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1; + return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; } Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + unsigned NumVals = N->getNumValues(); + if (!NumVals) + return Sched::RegPressure; + + for (unsigned i = 0; i != NumVals; ++i) { EVT VT = N->getValueType(i); if (VT.isFloatingPoint() || VT.isVector()) return Sched::Latency; } + + if (!N->isMachineOpcode()) + return Sched::RegPressure; + + // Load are scheduled for latency even if there instruction itinerary + // is not available. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + if (TID.mayLoad()) + return Sched::Latency; + + const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData(); + if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2) + return Sched::Latency; return Sched::RegPressure; } @@ -964,11 +1034,28 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - // ARM target does not yet support tail call optimization. - isTailCall = false; + MachineFunction &MF = DAG.getMachineFunction(); + bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + bool IsSibCall = false; + // Temporarily disable tail calls so things don't break. + if (!EnableARMTailCalls) + isTailCall = false; + if (isTailCall) { + // Check if it's really possible to do a tail call. + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), + Outs, OutVals, Ins, DAG); + // We don't support GuaranteedTailCallOpt for ARM, only automatically + // detected sibcalls. + if (isTailCall) { + ++NumTailCalls; + IsSibCall = true; + } + } // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; @@ -981,9 +1068,14 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); + // For tail calls, memory operands are available in our caller's stack. + if (IsSibCall) + NumBytes = 0; + // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + if (!IsSibCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); @@ -996,7 +1088,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[realArgIdx].Val; + SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; // Promote the value if needed. @@ -1044,7 +1136,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, } } else if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { + } else if (!IsSibCall) { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, @@ -1059,10 +1151,32 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); + // Tail call byval lowering might overwrite argument registers so in case of + // tail call optimization the copies to registers are lowered later. + if (!isTailCall) + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // For tail calls lower the arguments to the 'real' stack slot. + if (isTailCall) { + // Force all the incoming stack arguments to be loaded from the stack + // before any new outgoing arguments are stored to the stack, because the + // outgoing stack slots may alias the incoming argument stack slots, and + // the alias isn't otherwise explicit. This is slightly more conservative + // than necessary, because it means that each store effectively depends + // on every argument instead of just those arguments it would clobber. + + // Do not flag preceeding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + InFlag =SDValue(); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every @@ -1071,7 +1185,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, bool isDirect = false; bool isARMFunc = false; bool isLocalARMFunc = false; - MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); if (EnableARMLongCalls) { @@ -1117,7 +1230,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; // ARM call to a local ARM function is predicable. - isLocalARMFunc = !Subtarget->isThumb() && !isExt; + isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); @@ -1134,7 +1247,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); } else - Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { isDirect = true; bool isStub = Subtarget->isTargetDarwin() && @@ -1171,11 +1284,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) : ARMISD::CALL_NOLINK; } - if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) { - // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK - Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag); - InFlag = Chain.getValue(1); - } std::vector<SDValue> Ops; Ops.push_back(Chain); @@ -1189,9 +1297,13 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (InFlag.getNode()) Ops.push_back(InFlag); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + if (isTailCall) + return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), - &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -1205,10 +1317,203 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, dl, DAG, InVals); } +/// MatchingStackOffset - Return true if the given stack call argument is +/// already available in the same position (relatively) of the caller's +/// incoming argument stack. +static +bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, + MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, + const ARMInstrInfo *TII) { + unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; + int FI = INT_MAX; + if (Arg.getOpcode() == ISD::CopyFromReg) { + unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); + if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) + return false; + MachineInstr *Def = MRI->getVRegDef(VR); + if (!Def) + return false; + if (!Flags.isByVal()) { + if (!TII->isLoadFromStackSlot(Def, FI)) + return false; + } else { + return false; + } + } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { + if (Flags.isByVal()) + // ByVal argument is passed in as a pointer but it's now being + // dereferenced. e.g. + // define @foo(%struct.X* %A) { + // tail call @bar(%struct.X* byval %A) + // } + return false; + SDValue Ptr = Ld->getBasePtr(); + FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); + if (!FINode) + return false; + FI = FINode->getIndex(); + } else + return false; + + assert(FI != INT_MAX); + if (!MFI->isFixedObjectIndex(FI)) + return false; + return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); +} + +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. Targets which want to do tail call +/// optimization should implement this function. +bool +ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + const Function *CallerF = DAG.getMachineFunction().getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + // Look for obvious safe cases to perform tail call optimization that do not + // require ABI changes. This is what gcc calls sibcall. + + // Do not sibcall optimize vararg calls unless the call site is not passing + // any arguments. + if (isVarArg && !Outs.empty()) + return false; + + // Also avoid sibcall optimization if either caller or callee uses struct + // return semantics. + if (isCalleeStructRet || isCallerStructRet) + return false; + + // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: + // emitEpilogue is not ready for them. + // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take + // LR. This means if we need to reload LR, it takes an extra instructions, + // which outweighs the value of the tail call; but here we don't know yet + // whether LR is going to be used. Probably the right approach is to + // generate the tail call here and turn it back into CALL/RET in + // emitEpilogue if LR is used. + if (Subtarget->isThumb1Only()) + return false; + + // For the moment, we can only do this to functions defined in this + // compilation, or to indirect calls. A Thumb B to an ARM function, + // or vice versa, is not easily fixed up in the linker unlike BL. + // (We could do this by loading the address of the callee into a register; + // that is an extra instruction over the direct call and burns a register + // as well, so is not likely to be a win.) + + // It might be safe to remove this restriction on non-Darwin. + + // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, + // but we need to make sure there are enough registers; the only valid + // registers are the 4 used for parameters. We don't currently do this + // case. + if (isa<ExternalSymbolSDNode>(Callee)) + return false; + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + if (GV->isDeclaration() || GV->isWeakForLinker()) + return false; + } + + // If the calling conventions do not match, then we'd better make sure the + // results are returned in the same way as what the caller expects. + if (!CCMatch) { + SmallVector<CCValAssign, 16> RVLocs1; + CCState CCInfo1(CalleeCC, false, getTargetMachine(), + RVLocs1, *DAG.getContext()); + CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); + + SmallVector<CCValAssign, 16> RVLocs2; + CCState CCInfo2(CallerCC, false, getTargetMachine(), + RVLocs2, *DAG.getContext()); + CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); + + if (RVLocs1.size() != RVLocs2.size()) + return false; + for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { + if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) + return false; + if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) + return false; + if (RVLocs1[i].isRegLoc()) { + if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) + return false; + } else { + if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) + return false; + } + } + } + + // If the callee takes no arguments then go on to check the results of the + // call. + if (!Outs.empty()) { + // Check if stack adjustment is needed. For now, do not do this if any + // argument is passed on the stack. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, + CCAssignFnForNode(CalleeCC, false, isVarArg)); + if (CCInfo.getNextStackOffset()) { + MachineFunction &MF = DAG.getMachineFunction(); + + // Check if the arguments are already laid out in the right way as + // the caller's fixed stack objects. + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const ARMInstrInfo *TII = + ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); + for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); + i != e; + ++i, ++realArgIdx) { + CCValAssign &VA = ArgLocs[i]; + EVT RegVT = VA.getLocVT(); + SDValue Arg = OutVals[realArgIdx]; + ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + if (VA.needsCustom()) { + // f64 and vector types are split into multiple registers or + // register/stack-slot combinations. The types will not match + // the registers; give up on memory f64 refs until we figure + // out what to do about this. + if (!VA.isRegLoc()) + return false; + if (!ArgLocs[++i].isRegLoc()) + return false; + if (RegVT == MVT::v2f64) { + if (!ArgLocs[++i].isRegLoc()) + return false; + if (!ArgLocs[++i].isRegLoc()) + return false; + } + } else if (!VA.isRegLoc()) { + if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, + MFI, MRI, TII)) + return false; + } + } + } + } + + return true; +} + SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. @@ -1239,7 +1544,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue Arg = Outs[realRVLocIdx].Val; + SDValue Arg = OutVals[realRVLocIdx]; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); @@ -1477,7 +1782,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, // pair. This is always cheaper. if (Subtarget->useMovt()) { return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, PtrVT)); + DAG.getTargetGlobalAddress(GV, dl, PtrVT)); } else { SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1552,9 +1857,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - SDValue Val = Subtarget->isThumb() ? - DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : - DAG.getConstant(0, MVT::i32); + SDValue Val = DAG.getConstant(0, MVT::i32); return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1), Val); } @@ -1568,8 +1871,7 @@ ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) - const { + const ARMSubtarget *Subtarget) const { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); switch (IntNo) { @@ -1597,7 +1899,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, PseudoSourceValue::getConstantPool(), 0, false, false, 0); - SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1609,25 +1910,21 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { + const ARMSubtarget *Subtarget) { DebugLoc dl = Op.getDebugLoc(); SDValue Op5 = Op.getOperand(5); - SDValue Res; unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); - if (isDeviceBarrier) { - if (Subtarget->hasV7Ops()) - Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0)); - else - Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(0, MVT::i32)); - } else { - if (Subtarget->hasV7Ops()) - Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); - else - Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(0, MVT::i32)); - } - return Res; + // v6 and v7 can both handle barriers directly, but need handled a bit + // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should + // never get here. + unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; + if (Subtarget->hasV7Ops()) + return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); + else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()) + return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); + return SDValue(); } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { @@ -1712,7 +2009,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue ArgValue2; if (NextVA.isMemLoc()) { MachineFrameInfo *MFI = MF.getFrameInfo(); - int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false); + int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1768,8 +2065,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, VA = ArgLocs[++i]; // skip ahead to next loc SDValue ArgValue2; if (VA.isMemLoc()) { - int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, PseudoSourceValue::getFixedStack(FI), 0, @@ -1836,8 +2132,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1868,7 +2163,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, AFI->setVarArgsFrameIndex( MFI->CreateFixedObject(VARegSaveSize, ArgOffset + VARegSaveSize - VARegSize, - true, false)); + true)); SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy()); @@ -1884,8 +2179,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0, - false, false, 0); + PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), + 0, false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -1895,8 +2190,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. - AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, - true, false)); + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); } return Chain; @@ -1978,9 +2272,44 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); } +static bool canBitcastToInt(SDNode *Op) { + return Op->hasOneUse() && + ISD::isNormalLoad(Op) && + Op->getValueType(0) == MVT::f32; +} + +static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) { + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) + return DAG.getLoad(MVT::i32, Op.getDebugLoc(), + Ld->getChain(), Ld->getBasePtr(), + Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); + + llvm_unreachable("Unknown VFP cmp argument!"); +} + /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. -static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) { +SDValue +ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, + DebugLoc dl) const { + if (UnsafeFPMath && FiniteOnlyFPMath() && + (CC == ISD::SETEQ || CC == ISD::SETOEQ || + CC == ISD::SETNE || CC == ISD::SETUNE) && + canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) { + // If unsafe fp math optimization is enabled and there are no othter uses of + // the CMP operands, and the condition code is EQ oe NE, we can optimize it + // to an integer comparison. + if (CC == ISD::SETOEQ) + CC = ISD::SETEQ; + else if (CC == ISD::SETUNE) + CC = ISD::SETNE; + LHS = bitcastToInt(LHS, DAG); + RHS = bitcastToInt(RHS, DAG); + return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); + } + SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); @@ -2010,13 +2339,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - ARMCC, CCR, Cmp); + ARMCC, CCR, Cmp); if (CondCode2 != ARMCC::AL) { SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl); Result = DAG.getNode(ARMISD::CMOV, dl, VT, Result, TrueVal, ARMCC2, CCR, Cmp2); } @@ -2043,8 +2372,8 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; @@ -2132,7 +2461,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(Opc, dl, VT, Op); } -static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Implement fcopysign with a fabs and a conditional fneg. SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); @@ -2140,8 +2469,10 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); + SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); + SDValue Cmp = getVFPCmp(Tmp1, FP0, + ISD::SETLT, ARMCC, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } @@ -2206,7 +2537,8 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(1, MVT::i32)); - return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); + return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT, + DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); } // Turn f64->i64 into VMOVRRD. @@ -2516,76 +2848,149 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { return Result; } -/// isVMOVSplat - Check if the specified splat value corresponds to an immediate -/// VMOV instruction, and if so, return the constant being splatted. -static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef, - unsigned SplatBitSize, SelectionDAG &DAG) { +/// isNEONModifiedImm - Check if the specified splat value corresponds to a +/// valid vector constant for a NEON instruction with a "modified immediate" +/// operand (e.g., VMOV). If so, return either the constant being +/// splatted or the encoded value, depending on the DoEncode parameter. +static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, + unsigned SplatBitSize, SelectionDAG &DAG, + bool isVMOV, bool DoEncode) { + unsigned OpCmode, Imm; + EVT VT; + + // SplatBitSize is set to the smallest size that splats the vector, so a + // zero vector will always have SplatBitSize == 8. However, NEON modified + // immediate instructions others than VMOV do not support the 8-bit encoding + // of a zero vector, and the default encoding of zero is supposed to be the + // 32-bit version. + if (SplatBits == 0) + SplatBitSize = 32; + switch (SplatBitSize) { case 8: - // Any 1-byte value is OK. + // Any 1-byte value is OK. Op=0, Cmode=1110. assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); - return DAG.getTargetConstant(SplatBits, MVT::i8); + OpCmode = 0xe; + Imm = SplatBits; + VT = MVT::i8; + break; case 16: // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. - if ((SplatBits & ~0xff) == 0 || - (SplatBits & ~0xff00) == 0) - return DAG.getTargetConstant(SplatBits, MVT::i16); - break; + VT = MVT::i16; + if ((SplatBits & ~0xff) == 0) { + // Value = 0x00nn: Op=x, Cmode=100x. + OpCmode = 0x8; + Imm = SplatBits; + break; + } + if ((SplatBits & ~0xff00) == 0) { + // Value = 0xnn00: Op=x, Cmode=101x. + OpCmode = 0xa; + Imm = SplatBits >> 8; + break; + } + return SDValue(); case 32: // NEON's 32-bit VMOV supports splat values where: // * only one byte is nonzero, or // * the least significant byte is 0xff and the second byte is nonzero, or // * the least significant 2 bytes are 0xff and the third is nonzero. - if ((SplatBits & ~0xff) == 0 || - (SplatBits & ~0xff00) == 0 || - (SplatBits & ~0xff0000) == 0 || - (SplatBits & ~0xff000000) == 0) - return DAG.getTargetConstant(SplatBits, MVT::i32); + VT = MVT::i32; + if ((SplatBits & ~0xff) == 0) { + // Value = 0x000000nn: Op=x, Cmode=000x. + OpCmode = 0; + Imm = SplatBits; + break; + } + if ((SplatBits & ~0xff00) == 0) { + // Value = 0x0000nn00: Op=x, Cmode=001x. + OpCmode = 0x2; + Imm = SplatBits >> 8; + break; + } + if ((SplatBits & ~0xff0000) == 0) { + // Value = 0x00nn0000: Op=x, Cmode=010x. + OpCmode = 0x4; + Imm = SplatBits >> 16; + break; + } + if ((SplatBits & ~0xff000000) == 0) { + // Value = 0xnn000000: Op=x, Cmode=011x. + OpCmode = 0x6; + Imm = SplatBits >> 24; + break; + } if ((SplatBits & ~0xffff) == 0 && - ((SplatBits | SplatUndef) & 0xff) == 0xff) - return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32); + ((SplatBits | SplatUndef) & 0xff) == 0xff) { + // Value = 0x0000nnff: Op=x, Cmode=1100. + OpCmode = 0xc; + Imm = SplatBits >> 8; + SplatBits |= 0xff; + break; + } if ((SplatBits & ~0xffffff) == 0 && - ((SplatBits | SplatUndef) & 0xffff) == 0xffff) - return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32); + ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { + // Value = 0x00nnffff: Op=x, Cmode=1101. + OpCmode = 0xd; + Imm = SplatBits >> 16; + SplatBits |= 0xffff; + break; + } // Note: there are a few 32-bit splat values (specifically: 00ffff00, // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not // VMOV.I32. A (very) minor optimization would be to replicate the value // and fall through here to test for a valid 64-bit splat. But, then the // caller would also need to check and handle the change in size. - break; + return SDValue(); case 64: { // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. + if (!isVMOV) + return SDValue(); uint64_t BitMask = 0xff; uint64_t Val = 0; + unsigned ImmMask = 1; + Imm = 0; for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { - if (((SplatBits | SplatUndef) & BitMask) == BitMask) + if (((SplatBits | SplatUndef) & BitMask) == BitMask) { Val |= BitMask; - else if ((SplatBits & BitMask) != 0) + Imm |= ImmMask; + } else if ((SplatBits & BitMask) != 0) { return SDValue(); + } BitMask <<= 8; + ImmMask <<= 1; } - return DAG.getTargetConstant(Val, MVT::i64); + // Op=1, Cmode=1110. + OpCmode = 0x1e; + SplatBits = Val; + VT = MVT::i64; + break; } default: - llvm_unreachable("unexpected size for isVMOVSplat"); - break; + llvm_unreachable("unexpected size for isNEONModifiedImm"); + return SDValue(); } - return SDValue(); + if (DoEncode) { + unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); + return DAG.getTargetConstant(EncodedVal, MVT::i32); + } + return DAG.getTargetConstant(SplatBits, VT); } -/// getVMOVImm - If this is a build_vector of constants which can be -/// formed by using a VMOV instruction of the specified element size, -/// return the constant being splatted. The ByteSize field indicates the -/// number of bytes of each element [1248]. -SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { +/// getNEONModImm - If this is a valid vector constant for a NEON instruction +/// with a "modified immediate" operand (e.g., VMOV) of the specified element +/// size, return the encoded value for that immediate. The ByteSize field +/// indicates the number of bytes of each element [1248]. +SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV, + SelectionDAG &DAG) { BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); APInt SplatBits, SplatUndef; unsigned SplatBitSize; @@ -2597,8 +3002,8 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { if (SplatBitSize > ByteSize * 8) return SDValue(); - return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), - SplatBitSize, DAG); + return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), + SplatBitSize, DAG, isVMOV, true); } static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, @@ -2838,8 +3243,10 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { bool HasAnyUndefs; if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { - SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, DAG); + // Check if an immediate VMOV works. + SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), + SplatBitSize, DAG, true, false); if (Val.getNode()) return BuildSplat(Val, VT, DAG, dl); } @@ -2883,21 +3290,17 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::VDUP, dl, VT, Value); // Vectors with 32- or 64-bit elements can be built by directly assigning - // the subregisters. + // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands + // will be legalized. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. EVT EltVT = EVT::getFloatingPointVT(EltSize); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); - SDValue Val = DAG.getUNDEF(VecVT); - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Elt = Op.getOperand(i); - if (Elt.getOpcode() == ISD::UNDEF) - continue; - Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt); - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt, - DAG.getConstant(i, MVT::i32)); - } + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumElts; ++i) + Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i))); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); } @@ -2934,7 +3337,9 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, bool ReverseVEXT; unsigned Imm, WhichResult; - return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + return (EltSize >= 32 || + ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isVREVMask(M, VT, 64) || isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16) || @@ -3032,59 +3437,62 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // of the same time so that they get CSEd properly. SVN->getMask(ShuffleMask); - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { - int Lane = SVN->getSplatIndex(); - // If this is undef splat, generate it via "just" vdup, if possible. - if (Lane == -1) Lane = 0; - - if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { - return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (EltSize <= 32) { + if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { + int Lane = SVN->getSplatIndex(); + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane == -1) Lane = 0; + + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + } + return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, + DAG.getConstant(Lane, MVT::i32)); } - return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, - DAG.getConstant(Lane, MVT::i32)); - } - bool ReverseVEXT; - unsigned Imm; - if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { - if (ReverseVEXT) - std::swap(V1, V2); - return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, - DAG.getConstant(Imm, MVT::i32)); - } - - if (isVREVMask(ShuffleMask, VT, 64)) - return DAG.getNode(ARMISD::VREV64, dl, VT, V1); - if (isVREVMask(ShuffleMask, VT, 32)) - return DAG.getNode(ARMISD::VREV32, dl, VT, V1); - if (isVREVMask(ShuffleMask, VT, 16)) - return DAG.getNode(ARMISD::VREV16, dl, VT, V1); - - // Check for Neon shuffles that modify both input vectors in place. - // If both results are used, i.e., if there are two shuffles with the same - // source operands and with masks corresponding to both results of one of - // these operations, DAG memoization will ensure that a single node is - // used for both shuffles. - unsigned WhichResult; - if (isVTRNMask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), - V1, V2).getValue(WhichResult); - if (isVUZPMask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), - V1, V2).getValue(WhichResult); - if (isVZIPMask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), - V1, V2).getValue(WhichResult); + bool ReverseVEXT; + unsigned Imm; + if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { + if (ReverseVEXT) + std::swap(V1, V2); + return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, + DAG.getConstant(Imm, MVT::i32)); + } - if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), - V1, V1).getValue(WhichResult); - if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), - V1, V1).getValue(WhichResult); - if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), - V1, V1).getValue(WhichResult); + if (isVREVMask(ShuffleMask, VT, 64)) + return DAG.getNode(ARMISD::VREV64, dl, VT, V1); + if (isVREVMask(ShuffleMask, VT, 32)) + return DAG.getNode(ARMISD::VREV32, dl, VT, V1); + if (isVREVMask(ShuffleMask, VT, 16)) + return DAG.getNode(ARMISD::VREV16, dl, VT, V1); + + // Check for Neon shuffles that modify both input vectors in place. + // If both results are used, i.e., if there are two shuffles with the same + // source operands and with masks corresponding to both results of one of + // these operations, DAG memoization will ensure that a single node is + // used for both shuffles. + unsigned WhichResult; + if (isVTRNMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVUZPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVZIPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + + if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + } // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. @@ -3108,8 +3516,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } - // Implement shuffles with 32- or 64-bit elements as subreg copies. - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. @@ -3117,17 +3524,17 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); - SDValue Val = DAG.getUNDEF(VecVT); + SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElts; ++i) { if (ShuffleMask[i] < 0) - continue; - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - ShuffleMask[i] < (int)NumElts ? V1 : V2, - DAG.getConstant(ShuffleMask[i] & (NumElts-1), - MVT::i32)); - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, - Elt, DAG.getConstant(i, MVT::i32)); + Ops.push_back(DAG.getUNDEF(EltVT)); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + ShuffleMask[i] < (int)NumElts ? V1 : V2, + DAG.getConstant(ShuffleMask[i] & (NumElts-1), + MVT::i32))); } + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); } @@ -3277,7 +3684,12 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); MF->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... @@ -3315,7 +3727,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // ... BB = exitMBB; - MF->DeleteMachineInstr(MI); // The instruction is gone now. + MI->eraseFromParent(); // The instruction is gone now. return BB; } @@ -3358,7 +3770,12 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MF->insert(It, loopMBB); MF->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = MF->getRegInfo(); unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); @@ -3403,7 +3820,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // ... BB = exitMBB; - MF->DeleteMachineInstr(MI); // The instruction is gone now. + MI->eraseFromParent(); // The instruction is gone now. return BB; } @@ -3488,22 +3905,21 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) - .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) + .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -3516,11 +3932,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII->get(ARM::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -3541,7 +3958,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; - BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP) + BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP) .addReg(SrcReg, getKillRegState(SrcIsKill)); } @@ -3573,7 +3990,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, NeedPred = true; NeedCC = true; NeedOp3 = true; break; } - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP); + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP); if (OpOpc == ARM::tAND) AddDefaultT1CC(MIB); MIB.addReg(ARM::SP); @@ -3589,10 +4006,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; - BuildMI(BB, dl, TII->get(CopyOpc)) + BuildMI(*BB, MI, dl, TII->get(CopyOpc)) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) .addReg(ARM::SP); - MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } } @@ -3893,7 +4310,8 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { // Narrowing shifts require an immediate right shift. if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) break; - llvm_unreachable("invalid shift count for narrowing vector shift intrinsic"); + llvm_unreachable("invalid shift count for narrowing vector shift " + "intrinsic"); default: llvm_unreachable("unhandled vector shift"); @@ -4156,14 +4574,13 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { if (!Subtarget->hasV6Ops()) // Pre-v6 does not support unaligned mem access. return false; - else { - // v6+ may or may not support unaligned mem access depending on the system - // configuration. - // FIXME: This is pretty conservative. Should we provide cmdline option to - // control the behaviour? - if (!Subtarget->isTargetDarwin()) - return false; - } + + // v6+ may or may not support unaligned mem access depending on the system + // configuration. + // FIXME: This is pretty conservative. Should we provide cmdline option to + // control the behaviour? + if (!Subtarget->isTargetDarwin()) + return false; switch (VT.getSimpleVT().SimpleTy) { default: @@ -4619,7 +5036,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } } if (StringRef("{cc}").equals_lower(Constraint)) - return std::make_pair(0U, ARM::CCRRegisterClass); + return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } @@ -4669,7 +5086,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, /// vector. If it is invalid, don't add anything to Ops. void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Constraint, - bool hasMemory, std::vector<SDValue>&Ops, SelectionDAG &DAG) const { SDValue Result(0, 0); @@ -4818,8 +5234,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, Ops.push_back(Result); return; } - return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, - Ops, DAG); + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } bool diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 9c7517c..3a38669 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -70,6 +70,8 @@ namespace llvm { EH_SJLJ_SETJMP, // SjLj exception handling setjmp. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. + TC_RETURN, // Tail call return pseudo. + THREAD_POINTER, DYN_ALLOC, // Dynamic allocation on the stack. @@ -133,6 +135,13 @@ namespace llvm { VUZP, // unzip (deinterleave) VTRN, // transpose + // Operands of the standard BUILD_VECTOR node are not legalized, which + // is fine if BUILD_VECTORs are always lowered to shuffles or other + // operations, but for ARM some BUILD_VECTORs are legal as-is and their + // operands need to be legalized. Define an ARM-specific version of + // BUILD_VECTOR for this purpose. + BUILD_VECTOR, + // Floating-point max and min: FMAX, FMIN @@ -141,11 +150,12 @@ namespace llvm { /// Define some predicates that are used for node matching. namespace ARM { - /// getVMOVImm - If this is a build_vector of constants which can be - /// formed by using a VMOV instruction of the specified element size, - /// return the constant being splatted. The ByteSize field indicates the - /// number of bytes of each element [1248]. - SDValue getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); + /// getNEONModImm - If this is a valid vector constant for a NEON + /// instruction with a "modified immediate" operand (e.g., VMOV) of the + /// specified element size, return the encoded value for that immediate. + /// The ByteSize field indicates the number of bytes of each element [1248]. + SDValue getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV, + SelectionDAG &DAG); /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd) @@ -189,9 +199,9 @@ namespace llvm { bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal - /// icmp immediate, that is the target has icmp instructions which can compare - /// a register against the immediate without having to materialize the - /// immediate into a register. + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. virtual bool isLegalICmpImmediate(int64_t Imm) const; /// getPreIndexedAddressParts - returns true by value, base pointer and @@ -232,7 +242,6 @@ namespace llvm { /// being processed is 'm'. virtual void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; @@ -282,7 +291,8 @@ namespace llvm { SDValue &Root, SelectionDAG &DAG, DebugLoc dl) const; - CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; + CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, + bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, @@ -303,6 +313,7 @@ namespace llvm { SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; @@ -327,18 +338,34 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; + SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index d487df1..ac568e7 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -50,27 +50,23 @@ def VFPLdStMulFrm : Format<22>; def VFPMiscFrm : Format<23>; def ThumbFrm : Format<24>; - -def NEONFrm : Format<25>; -def NEONGetLnFrm : Format<26>; -def NEONSetLnFrm : Format<27>; -def NEONDupFrm : Format<28>; - -def MiscFrm : Format<29>; -def ThumbMiscFrm : Format<30>; - -def NLdStFrm : Format<31>; -def N1RegModImmFrm : Format<32>; -def N2RegFrm : Format<33>; -def NVCVTFrm : Format<34>; -def NVDupLnFrm : Format<35>; -def N2RegVShLFrm : Format<36>; -def N2RegVShRFrm : Format<37>; -def N3RegFrm : Format<38>; -def N3RegVShFrm : Format<39>; -def NVExtFrm : Format<40>; -def NVMulSLFrm : Format<41>; -def NVTBLFrm : Format<42>; +def MiscFrm : Format<25>; + +def NGetLnFrm : Format<26>; +def NSetLnFrm : Format<27>; +def NDupFrm : Format<28>; +def NLdStFrm : Format<29>; +def N1RegModImmFrm: Format<30>; +def N2RegFrm : Format<31>; +def NVCVTFrm : Format<32>; +def NVDupLnFrm : Format<33>; +def N2RegVShLFrm : Format<34>; +def N2RegVShRFrm : Format<35>; +def N3RegFrm : Format<36>; +def N3RegVShFrm : Format<37>; +def NVExtFrm : Format<38>; +def NVMulSLFrm : Format<39>; +def NVTBLFrm : Format<40>; // Misc flags. @@ -1653,17 +1649,17 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, list<dag> pattern> - : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONGetLnFrm, itin, + : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin, opc, dt, asm, pattern>; class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, list<dag> pattern> - : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONSetLnFrm, itin, + : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin, opc, dt, asm, pattern>; class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, list<dag> pattern> - : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin, + : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin, opc, dt, asm, pattern>; // Vector Duplicate Lane (from scalar to all elements) diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 85f6b40..ba228ff 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -63,7 +63,7 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { void ARMInstrInfo:: reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo &TRI) const { DebugLoc dl = Orig->getDebugLoc(); unsigned Opcode = Orig->getOpcode(); switch (Opcode) { diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index d4199d1..4563ffe 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -35,7 +35,7 @@ public: void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo &TRI) const; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index f3156d9..c73e204 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -53,6 +53,8 @@ def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; @@ -117,6 +119,9 @@ def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6, def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; +def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, + [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // @@ -858,13 +863,13 @@ def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, IIC_iALUi, "adr$p\t$dst, #$label", []>; +} // neverHasSideEffects def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), Pseudo, IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []> { let Inst{25} = 1; } -} // neverHasSideEffects //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -1026,6 +1031,74 @@ let isCall = 1, } } +// Tail calls. + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { + // Darwin versions. + let Defs = [R0, R1, R2, R3, R9, R12, + D0, D1, D2, D3, D4, D5, D6, D7, + D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, + D27, D28, D29, D30, D31, PC], + Uses = [SP] in { + def TCRETURNdi : AInoP<(outs), (ins i32imm:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>; + + def TCRETURNri : AInoP<(outs), (ins tcGPR:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>; + + def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b\t$dst @ TAILCALL", + []>, Requires<[IsDarwin]>; + + def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b.w\t$dst @ TAILCALL", + []>, Requires<[IsDarwin]>; + + def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops), + BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", + []>, Requires<[IsDarwin]> { + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0b1110; + } + } + + // Non-Darwin versions (the difference is R9). + let Defs = [R0, R1, R2, R3, R12, + D0, D1, D2, D3, D4, D5, D6, D7, + D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, + D27, D28, D29, D30, D31, PC], + Uses = [SP] in { + def TCRETURNdiND : AInoP<(outs), (ins i32imm:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>; + + def TCRETURNriND : AInoP<(outs), (ins tcGPR:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>; + + def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b\t$dst @ TAILCALL", + []>, Requires<[IsARM, IsNotDarwin]>; + + def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b.w\t$dst @ TAILCALL", + []>, Requires<[IsThumb, IsNotDarwin]>; + + def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops), + BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", + []>, Requires<[IsNotDarwin]> { + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0b1110; + } + } +} + let isBranch = 1, isTerminator = 1 in { // B is "predicable" since it can be xformed into a Bcc. let isBarrier = 1 in { @@ -1397,6 +1470,14 @@ def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, let Inst{25} = 0; } +// A version for the smaller set of tail call registers. +let neverHasSideEffects = 1 in +def MOVr_TC : AsI1<0b1101, (outs tcGPR:$dst), (ins tcGPR:$src), DPFrm, + IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP { + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; +} + def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP { @@ -2530,31 +2611,30 @@ let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, - D31 ] in { + D31 ], hasSideEffects = 1, isBarrier = 1 in { def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" - "add\t$val, pc, #8\n\t" - "str\t$val, [$src, #+4]\n\t" - "mov\tr0, #0\n\t" - "add\tpc, pc, #0\n\t" - "mov\tr0, #1 ${:comment} eh_setjmp end", "", + "add\t$val, pc, #8\t${:comment} eh_setjmp begin\n\t" + "str\t$val, [$src, #+4]\n\t" + "mov\tr0, #0\n\t" + "add\tpc, pc, #0\n\t" + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, HasVFP2]>; } let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], + hasSideEffects = 1, isBarrier = 1 in { def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" - "add\t$val, pc, #8\n\t" - "str\t$val, [$src, #+4]\n\t" - "mov\tr0, #0\n\t" - "add\tpc, pc, #0\n\t" - "mov\tr0, #1 ${:comment} eh_setjmp end", "", + "add\t$val, pc, #8\n ${:comment} eh_setjmp begin\n\t" + "str\t$val, [$src, #+4]\n\t" + "mov\tr0, #0\n\t" + "add\tpc, pc, #0\n\t" + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } @@ -2621,6 +2701,24 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), // TODO: add,sub,and, 3-instr forms? +// Tail calls +def : ARMPat<(ARMtcret tcGPR:$dst), + (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), + (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), + (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret tcGPR:$dst), + (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>; + +def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; + +def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; // Direct calls def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 197ec16..a84315f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -98,17 +98,8 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; // NEON operand definitions //===----------------------------------------------------------------------===// -def h8imm : Operand<i8> { - let PrintMethod = "printHex8ImmOperand"; -} -def h16imm : Operand<i16> { - let PrintMethod = "printHex16ImmOperand"; -} -def h32imm : Operand<i32> { - let PrintMethod = "printHex32ImmOperand"; -} -def h64imm : Operand<i64> { - let PrintMethod = "printHex64ImmOperand"; +def nModImm : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; } //===----------------------------------------------------------------------===// @@ -812,11 +803,6 @@ def DSubReg_f64_reg : SDNodeXForm<imm, [{ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); }]>; -def DSubReg_f64_other_reg : SDNodeXForm<imm, [{ - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); - return CurDAG->getTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()), - MVT::i32); -}]>; // Extract S sub-registers of Q/D registers. def SSubReg_f32_reg : SDNodeXForm<imm, [{ @@ -2282,7 +2268,7 @@ def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; // For disassembly only. defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$dst, $src, #0">; + "$dst, $src, #0">; // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -2834,73 +2820,70 @@ def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), // VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. def VMOV_get_imm8 : SDNodeXForm<build_vector, [{ - return ARM::getVMOVImm(N, 1, *CurDAG); + return ARM::getNEONModImm(N, 1, true, *CurDAG); }]>; def vmovImm8 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 1, true, *CurDAG).getNode() != 0; }], VMOV_get_imm8>; // VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. def VMOV_get_imm16 : SDNodeXForm<build_vector, [{ - return ARM::getVMOVImm(N, 2, *CurDAG); + return ARM::getNEONModImm(N, 2, true, *CurDAG); }]>; def vmovImm16 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 2, true, *CurDAG).getNode() != 0; }], VMOV_get_imm16>; // VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. def VMOV_get_imm32 : SDNodeXForm<build_vector, [{ - return ARM::getVMOVImm(N, 4, *CurDAG); + return ARM::getNEONModImm(N, 4, true, *CurDAG); }]>; def vmovImm32 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 4, true, *CurDAG).getNode() != 0; }], VMOV_get_imm32>; // VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. def VMOV_get_imm64 : SDNodeXForm<build_vector, [{ - return ARM::getVMOVImm(N, 8, *CurDAG); + return ARM::getNEONModImm(N, 8, true, *CurDAG); }]>; def vmovImm64 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 8, true, *CurDAG).getNode() != 0; }], VMOV_get_imm64>; -// Note: Some of the cmode bits in the following VMOV instructions need to -// be encoded based on the immed values. - let isReMaterializable = 1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), - (ins h8imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), - (ins h8imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; -def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), - (ins h16imm:$SIMM), IIC_VMOVImm, +def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; -def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), - (ins h16imm:$SIMM), IIC_VMOVImm, +def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; -def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), - (ins h32imm:$SIMM), IIC_VMOVImm, +def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; -def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), - (ins h32imm:$SIMM), IIC_VMOVImm, +def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), - (ins h64imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), - (ins h64imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; } // isReMaterializable @@ -3122,17 +3105,6 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; -def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), - (INSERT_SUBREG QPR:$src, - (i64 (EXTRACT_SUBREG QPR:$src, - (DSubReg_f64_reg imm:$lane))), - (DSubReg_f64_other_reg imm:$lane))>; -def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), - (INSERT_SUBREG QPR:$src, - (f64 (EXTRACT_SUBREG QPR:$src, - (DSubReg_f64_reg imm:$lane))), - (DSubReg_f64_other_reg imm:$lane))>; - // VMOVN : Vector Narrowing Move defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn", "i", int_arm_neon_vmovn>; @@ -3319,22 +3291,16 @@ let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 - DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 - DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, - DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>; } // hasExtraSrcRegAllocReq = 1 // VTBX : Vector Table Extension @@ -3348,23 +3314,18 @@ let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2, - "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 - DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; + "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTBX3, - "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, - DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; + "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", + "$orig = $dst", []>; def VTBX4 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4, "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", - "$orig = $dst", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; + "$orig = $dst", []>; } // hasExtraSrcRegAllocReq = 1 //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 40f924b..bc0790d 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -894,11 +894,11 @@ def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, "adr$p\t$dst, #$label", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 +} // neverHasSideEffects def tLEApcrelJT : T1I<(outs tGPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 -} // neverHasSideEffects //===----------------------------------------------------------------------===// // TLS Instructions @@ -923,18 +923,18 @@ let isCall = 1, // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. -// The current SP is passed in $val, and we reuse the reg as a scratch. +// $val is a scratch register for our use. let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in { + [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ], hasSideEffects = 1, + isBarrier = 1 in { def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" - "\tmov\t$val, pc\n" - "\tadds\t$val, #7\n" - "\tstr\t$val, [$src, #4]\n" - "\tmovs\tr0, #0\n" - "\tb\t1f\n" - "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" + "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" + "adds\t$val, #7\n\t" + "str\t$val, [$src, #4]\n\t" + "movs\tr0, #0\n\t" + "b\t1f\n\t" + "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; } @@ -1037,7 +1037,8 @@ def : T1Pat<(i32 imm0_255_comp:$src), // scheduling. let isReMaterializable = 1 in def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, + "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb1Only]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index b91c089..4692f2a 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -637,8 +637,7 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]>, - Requires<[HasT2ExtractPack]> { + [(set GPR:$dst, (opnode GPR:$src))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -649,8 +648,7 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { } def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, opc, ".w\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack]> { + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -661,8 +659,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { } } -// SXTB16 and UXTB16 do not need the .w qualifier. -multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { +// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. +multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, "\t$dst, $src", [(set GPR:$dst, (opnode GPR:$src))]>, @@ -689,9 +687,9 @@ multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { } } -// DO variant - disassembly only, no pattern - -multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> { +// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern +// supported yet. +multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, "\t$dst, $src", []> { let Inst{31-27} = 0b11111; @@ -787,6 +785,7 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; } +} // neverHasSideEffects def t2LEApcrelJT : T2XI<(outs GPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, "adr$p.w\t$dst, #${label}_${id}", []> { @@ -798,7 +797,6 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst), let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; } -} // neverHasSideEffects // ADD r, sp, {so_imm|i12} def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), @@ -1330,7 +1328,7 @@ defm t2SXTB : T2I_unary_rrot<0b100, "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; defm t2SXTH : T2I_unary_rrot<0b000, "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; -defm t2SXTB16 : T2I_unary_rrot_DO<0b010, "sxtb16">; +defm t2SXTB16 : T2I_unary_rrot_sxtb16<0b010, "sxtb16">; defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; @@ -1347,13 +1345,13 @@ defm t2UXTB : T2I_unary_rrot<0b101, "uxtb", UnOpFrag<(and node:$Src, 0x000000FF)>>; defm t2UXTH : T2I_unary_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16", +defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 24)>; + (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>; def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 8)>; + (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>; defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; @@ -2389,37 +2387,36 @@ let isCall = 1, // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. -// The current SP is passed in $val, and we reuse the reg as a scratch. +// $val is a scratch register for our use. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, - D31 ] in { + D31 ], hasSideEffects = 1, isBarrier = 1 in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" - "\tmov\t$val, pc\n" - "\tadds\t$val, #7\n" - "\tstr\t$val, [$src, #4]\n" - "\tmovs\tr0, #0\n" - "\tb\t1f\n" - "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" + "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" + "adds\t$val, #7\n\t" + "str\t$val, [$src, #4]\n\t" + "movs\tr0, #0\n\t" + "b\t1f\n\t" + "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; } let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], + hasSideEffects = 1, isBarrier = 1 in { def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" - "\tmov\t$val, pc\n" - "\tadds\t$val, #7\n" - "\tstr\t$val, [$src, #4]\n" - "\tmovs\tr0, #0\n" - "\tb\t1f\n" - "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" + "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" + "adds\t$val, #7\n\t" + "str\t$val, [$src, #4]\n\t" + "movs\tr0, #0\n\t" + "b\t1f\n\t" + "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; @@ -2529,6 +2526,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, // IT block +let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), AddrModeNone, Size2Bytes, IIC_iALUx, "it$mask\t$cc", "", []> { @@ -2691,7 +2689,8 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // scheduling. let canFoldAsLoad = 1, isReMaterializable = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, + "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 54474cf..84c23e1 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -255,25 +255,25 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, // Between half-precision and single-precision. For disassembly only. -def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f32_to_f16 SPR:$a), (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; -def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f16_to_f32 GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; -def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index ff332b7..f5d9eff 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -143,7 +143,8 @@ namespace llvm { JumpTableId2AddrMap[JTI] = Addr; } - /// getPCLabelAddr - Retrieve the address of the PC label of the specified id. + /// getPCLabelAddr - Retrieve the address of the PC label of the + /// specified id. intptr_t getPCLabelAddr(unsigned Id) const { DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id); assert(I != PCLabelMap.end()); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 8585c1e..f80e316 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -74,11 +74,14 @@ namespace { private: struct MemOpQueueEntry { int Offset; + unsigned Reg; + bool isKill; unsigned Position; MachineBasicBlock::iterator MBBI; bool Merged; - MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) - : Offset(o), Position(p), MBBI(i), Merged(false) {} + MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, + MachineBasicBlock::iterator i) + : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; @@ -128,30 +131,30 @@ namespace { static int getLoadStoreMultipleOpcode(int Opcode) { switch (Opcode) { case ARM::LDR: - NumLDMGened++; + ++NumLDMGened; return ARM::LDM; case ARM::STR: - NumSTMGened++; + ++NumSTMGened; return ARM::STM; case ARM::t2LDRi8: case ARM::t2LDRi12: - NumLDMGened++; + ++NumLDMGened; return ARM::t2LDM; case ARM::t2STRi8: case ARM::t2STRi12: - NumSTMGened++; + ++NumSTMGened; return ARM::t2STM; case ARM::VLDRS: - NumVLDMGened++; + ++NumVLDMGened; return ARM::VLDMS; case ARM::VSTRS: - NumVSTMGened++; + ++NumVSTMGened; return ARM::VSTMS; case ARM::VLDRD: - NumVLDMGened++; + ++NumVLDMGened; return ARM::VLDMD; case ARM::VSTRD: - NumVSTMGened++; + ++NumVSTMGened; return ARM::VSTMD; default: llvm_unreachable("Unhandled opcode!"); } @@ -264,45 +267,59 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on // success. -void ARMLoadStoreOpt:: -MergeOpsUpdate(MachineBasicBlock &MBB, - MemOpQueue &memOps, - unsigned memOpsBegin, - unsigned memOpsEnd, - unsigned insertAfter, - int Offset, - unsigned Base, - bool BaseKill, - int Opcode, - ARMCC::CondCodes Pred, - unsigned PredReg, - unsigned Scratch, - DebugLoc dl, - SmallVector<MachineBasicBlock::iterator, 4> &Merges) { +void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, + MemOpQueue &memOps, + unsigned memOpsBegin, unsigned memOpsEnd, + unsigned insertAfter, int Offset, + unsigned Base, bool BaseKill, + int Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, + DebugLoc dl, + SmallVector<MachineBasicBlock::iterator, 4> &Merges) { // First calculate which of the registers should be killed by the merged // instruction. - SmallVector<std::pair<unsigned, bool>, 8> Regs; const unsigned insertPos = memOps[insertAfter].Position; + + SmallSet<unsigned, 4> UnavailRegs; + SmallSet<unsigned, 4> KilledRegs; + DenseMap<unsigned, unsigned> Killer; + for (unsigned i = 0; i < memOpsBegin; ++i) { + if (memOps[i].Position < insertPos && memOps[i].isKill) { + unsigned Reg = memOps[i].Reg; + if (memOps[i].Merged) + UnavailRegs.insert(Reg); + else { + KilledRegs.insert(Reg); + Killer[Reg] = i; + } + } + } + for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { + if (memOps[i].Position < insertPos && memOps[i].isKill) { + unsigned Reg = memOps[i].Reg; + KilledRegs.insert(Reg); + Killer[Reg] = i; + } + } + + SmallVector<std::pair<unsigned, bool>, 8> Regs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - const MachineOperand &MO = memOps[i].MBBI->getOperand(0); - unsigned Reg = MO.getReg(); - bool isKill = MO.isKill(); + unsigned Reg = memOps[i].Reg; + if (UnavailRegs.count(Reg)) + // Register is killed before and it's not easy / possible to update the + // kill marker on already merged instructions. Abort. + return; // If we are inserting the merged operation after an unmerged operation that // uses the same register, make sure to transfer any kill flag. - for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j) - if (memOps[j].Position<insertPos) { - const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); - if (MOJ.getReg() == Reg && MOJ.isKill()) - isKill = true; - } - + bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); } // Try to do the merge. MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; - Loc++; + ++Loc; if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Regs)) return; @@ -311,13 +328,13 @@ MergeOpsUpdate(MachineBasicBlock &MBB, Merges.push_back(prior(Loc)); for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { // Remove kill flags from any unmerged memops that come before insertPos. - if (Regs[i-memOpsBegin].second) - for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j) - if (memOps[j].Position<insertPos) { - MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); - if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill()) - MOJ.setIsKill(false); - } + if (Regs[i-memOpsBegin].second) { + unsigned Reg = Regs[i-memOpsBegin].first; + if (KilledRegs.count(Reg)) { + unsigned j = Killer[Reg]; + memOps[j].MBBI->getOperand(0).setIsKill(false); + } + } MBB.erase(memOps[i].MBBI); memOps[i].Merged = true; } @@ -517,8 +534,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } // Try merging with the previous instruction. - if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator BeginMBBI = MBB.begin(); + if (MBBI != BeginMBBI) { MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) + --PrevMBBI; if (isAM4) { if (Mode == ARM_AM::ia && isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { @@ -541,8 +561,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } // Try merging with the next instruction. - if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator EndMBBI = MBB.end(); + if (!DoMerge && MBBI != EndMBBI) { MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) + ++NextMBBI; if (isAM4) { if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { @@ -669,8 +692,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); // Try merging with the previous instruction. - if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator BeginMBBI = MBB.begin(); + if (MBBI != BeginMBBI) { MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) + --PrevMBBI; if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; AddSub = ARM_AM::sub; @@ -685,8 +711,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, } // Try merging with the next instruction. - if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator EndMBBI = MBB.end(); + if (!DoMerge && MBBI != EndMBBI) { MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) + ++NextMBBI; if (!isAM5 && isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; @@ -759,18 +788,21 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, /// isMemoryOp - Returns true if instruction is a memory operations (that this /// pass is capable of operating on). static bool isMemoryOp(const MachineInstr *MI) { - if (MI->hasOneMemOperand()) { - const MachineMemOperand *MMO = *MI->memoperands_begin(); + // When no memory operands are present, conservatively assume unaligned, + // volatile, unfoldable. + if (!MI->hasOneMemOperand()) + return false; - // Don't touch volatile memory accesses - we may be changing their order. - if (MMO->isVolatile()) - return false; + const MachineMemOperand *MMO = *MI->memoperands_begin(); - // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is - // not. - if (MMO->getAlignment() < 4) - return false; - } + // Don't touch volatile memory accesses - we may be changing their order. + if (MMO->isVolatile()) + return false; + + // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is + // not. + if (MMO->getAlignment() < 4) + return false; // str <undef> could probably be eliminated entirely, but for now we just want // to avoid making a mess of it. @@ -898,6 +930,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) return false; + MachineBasicBlock::iterator NewBBI = MBBI; bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; bool EvenDeadKill = isLd ? @@ -942,6 +975,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); ++NumSTRD2STM; } + NewBBI = llvm::prior(MBBI); } else { // Split into two instructions. assert((!isT2 || !OffReg) && @@ -962,14 +996,15 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, OffReg, false, OffUndef, Pred, PredReg, TII, isT2); + NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, Pred, PredReg, TII, isT2); } else { if (OddReg == EvenReg && EvenDeadKill) { - // If the two source operands are the same, the kill marker is probably - // on the first one. e.g. + // If the two source operands are the same, the kill marker is + // probably on the first one. e.g. // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 EvenDeadKill = false; OddDeadKill = true; @@ -978,6 +1013,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, OffReg, false, OffUndef, Pred, PredReg, TII, isT2); + NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, @@ -989,8 +1025,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, ++NumSTRD2STR; } - MBBI = prior(MBBI); MBB.erase(MI); + MBBI = NewBBI; + return true; } return false; } @@ -1023,6 +1060,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (isMemOp) { int Opcode = MBBI->getOpcode(); unsigned Size = getLSMultipleTransferSize(MBBI); + const MachineOperand &MO = MBBI->getOperand(0); + unsigned Reg = MO.getReg(); + bool isKill = MO.isDef() ? false : MO.isKill(); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); @@ -1044,8 +1084,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrSize = Size; CurrPred = Pred; CurrPredReg = PredReg; - MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); + ++NumMemOps; Advance = true; } else { if (Clobber) { @@ -1057,15 +1097,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // No need to match PredReg. // Continue adding to the queue. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; } else { for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { if (Offset < I->Offset) { - MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; break; } else if (Offset == I->Offset) { @@ -1078,7 +1120,12 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { } } - if (Advance) { + if (MBBI->isDebugValue()) { + ++MBBI; + if (MBBI == E) + // Reach the end of the block, try merging the memory instructions. + TryMerge = true; + } else if (Advance) { ++Position; ++MBBI; if (MBBI == E) @@ -1279,7 +1326,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, // some day. SmallSet<unsigned, 4> AddedRegPressure; while (++I != E) { - if (MemOps.count(&*I)) + if (I->isDebugValue() || MemOps.count(&*I)) continue; const TargetInstrDesc &TID = I->getDesc(); if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) @@ -1411,7 +1458,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, std::sort(Ops.begin(), Ops.end(), OffsetCompare()); // The loads / stores of the same base are in order. Scan them from first to - // last and check for the followins: + // last and check for the following: // 1. Any def of base. // 2. Any gaps. while (Ops.size() > 1) { @@ -1474,7 +1521,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, } else { // This is the new location for the loads / stores. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; - while (InsertPos != MBB->end() && MemOps.count(InsertPos)) + while (InsertPos != MBB->end() + && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) ++InsertPos; // If we are moving a pair of loads / stores, see if it makes sense @@ -1562,7 +1610,9 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { break; } - MI2LocMap[MI] = Loc++; + if (!MI->isDebugValue()) + MI2LocMap[MI] = ++Loc; + if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 0134276..7e57a1c 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -88,6 +88,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// VarArgsFrameIndex - FrameIndex for start of varargs area. int VarArgsFrameIndex; + /// HasITBlocks - True if IT blocks have been inserted. + bool HasITBlocks; + public: ARMFunctionInfo() : isThumb(false), @@ -97,7 +100,8 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), - JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0), + HasITBlocks(false) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), @@ -108,7 +112,8 @@ public: GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()), - JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0), + HasITBlocks(false) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -229,6 +234,9 @@ public: int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + bool hasITBlocks() const { return HasITBlocks; } + void setHasITBlocks(bool h) { HasITBlocks = h; } }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 6beca8b..d020f3c 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -153,11 +153,11 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // Pseudo 256-bit registers to represent pairs of Q registers. These should // never be present in the emitted code. -// These are used for NEON load / store instructions, e.g. vld4, vst3. -// NOTE: It's possible to define more QQ registers since technical the -// starting D register number doesn't have to be multiple of 4. e.g. -// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register -// stuffs very messy. +// These are used for NEON load / store instructions, e.g., vld4, vst3. +// NOTE: It's possible to define more QQ registers since technically the +// starting D register number doesn't have to be multiple of 4, e.g., +// D1, D2, D3, D4 would be a legal quad, but that would make the subregister +// stuff very messy. let SubRegIndices = [qsub_0, qsub_1] in { let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1), (ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1), @@ -183,7 +183,8 @@ let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), (ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1), (ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3), (ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5), - (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in { + (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in +{ def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>; def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>; } @@ -196,9 +197,9 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; } // Current Program Status Register. -def CPSR : ARMReg<0, "cpsr">; - -def FPSCR : ARMReg<1, "fpscr">; +def CPSR : ARMReg<0, "cpsr">; +def FPSCR : ARMReg<1, "fpscr">; +def ITSTATE : ARMReg<2, "itstate">; // Register classes. // @@ -348,6 +349,73 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { }]; } +// For tail calls, we can't use callee-saved registers, as they are restored +// to the saved value before the tail call, which would clobber a call address. +// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of +// this class and the preceding one(!) This is what we want. +def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // R9 is available. + static const unsigned ARM_GPR_R9_TC[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R9, ARM::R12 }; + // R9 is not available. + static const unsigned ARM_GPR_NOR9_TC[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R12 }; + + // For Thumb1 mode, we don't want to allocate hi regs at all, as we + // don't know how to spill them. If we make our prologue/epilogue code + // smarter at some point, we can go back to using the above allocation + // orders for the Thumb1 instructions that know how to use hi regs. + static const unsigned THUMB_GPR_AO_TC[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + + tcGPRClass::iterator + tcGPRClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + if (Subtarget.isThumb1Only()) + return THUMB_GPR_AO_TC; + if (Subtarget.isTargetDarwin()) { + if (Subtarget.isR9Reserved()) + return ARM_GPR_NOR9_TC; + else + return ARM_GPR_R9_TC; + } else + // R9 is either callee-saved or reserved; can't use it. + return ARM_GPR_NOR9_TC; + } + + tcGPRClass::iterator + tcGPRClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + GPRClass::iterator I; + + if (Subtarget.isThumb1Only()) { + I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned)); + return I; + } + + if (Subtarget.isTargetDarwin()) { + if (Subtarget.isR9Reserved()) + I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); + else + I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)); + } else + // R9 is either callee-saved or reserved; can't use it. + I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); + return I; + } + }]; +} + + // Scalar single precision floating point register class.. def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, @@ -479,4 +547,3 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; - diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index bbfc0b2..282abca 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -1,10 +1,10 @@ //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the ARM Cortex A8 processors. @@ -32,50 +32,50 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, // // Binary Instructions that produce a result - InstrItinData<IIC_iALUi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iALUr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, - InstrItinData<IIC_iALUsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, - InstrItinData<IIC_iALUsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, + InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, // // Unary Instructions that produce a result - InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iUNAsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iUNAsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, // // Compare instructions - InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, - InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iCMPsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMPsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, // // Move instructions, unconditional - InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, - InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, + InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, // // Move instructions, conditional - InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, - InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, // Integer multiply pipeline // Result written in E5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases // InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>, - InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>, InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>, InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, - InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>, InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>, InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, - InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>, InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, - + // Integer load pipeline // // loads have an extra cycle of latency, but are fully pipelined @@ -166,7 +166,7 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<2, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0]>]>, - + // Branch // // no delay slots, so the latency of a branch is unimportant @@ -276,14 +276,14 @@ def CortexA8Itineraries : ProcessorItineraries< // // Single-precision FP Load // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, // // Double-precision FP Load // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>, + InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0], 0>, InstrStage<1, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -292,7 +292,7 @@ def CortexA8Itineraries : ProcessorItineraries< // // FP Load Multiple // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>, + InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>, InstrStage<2, [A8_Pipe0], 0>, InstrStage<2, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -301,14 +301,14 @@ def CortexA8Itineraries : ProcessorItineraries< // // Single-precision FP Store // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, // // Double-precision FP Store // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>, + InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0], 0>, InstrStage<1, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -317,7 +317,7 @@ def CortexA8Itineraries : ProcessorItineraries< // // FP Store Multiple // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>, + InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>, InstrStage<2, [A8_Pipe0], 0>, InstrStage<2, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -329,35 +329,35 @@ def CortexA8Itineraries : ProcessorItineraries< // // VLD1 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, // // VLD2 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>, // // VLD3 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>, // // VLD4 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>, // // VST // FIXME: We don't model this instruction properly - InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, @@ -600,7 +600,7 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, - InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, + InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>, // // VTBX InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -610,9 +610,9 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, - InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>, InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, - InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> + InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>; diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 75320d9..df2f896 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1,10 +1,10 @@ //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the ARM Cortex A9 processors. @@ -16,7 +16,6 @@ // Reference Manual". // // Functional units -def A9_Issue : FuncUnit; // issue def A9_Pipe0 : FuncUnit; // pipeline 0 def A9_Pipe1 : FuncUnit; // pipeline 1 def A9_LSPipe : FuncUnit; // LS pipe @@ -27,7 +26,121 @@ def A9_DRegsN : FuncUnit; // FP register set, NEON side // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1 // def CortexA9Itineraries : ProcessorItineraries< - [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [ + [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [ + // Two fully-pipelined integer ALU pipelines + // FIXME: There are no operand latencies for these instructions at all! + // + // Move instructions, unconditional + InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>, + // + // No operand cycles + InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>, + // + // Binary Instructions that produce a result + InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>, + // + // Unary Instructions that produce a result + InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>, + // + // Compare instructions + InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>, + // + // Move instructions, conditional + InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>, + + // Integer multiply pipeline + // + InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>, + InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>, + InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>, + InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>, + // Integer load pipeline + // FIXME: The timings are some rough approximations + // + // Immediate offset + InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [3, 1]>, + // + // Register offset + InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>, + // + // Register offset with update + InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>, + // + // Load multiple + InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>]>, + + // Integer store pipeline + /// + // Immediate offset + InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [3, 1]>, + // + // Register offset + InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>, + // + // Register offset with update + InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>, + // + // Store multiple + InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>]>, + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>, + // VFP and NEON shares the same register file. This means that every VFP // instruction should wait for full completion of the consecutive NEON // instruction and vice-versa. We model this behavior with two artificial FUs: @@ -39,8 +152,8 @@ def CortexA9Itineraries : ProcessorItineraries< // register file writeback!). // Every NEON instruction does the same but with FUs swapped. // - // Since the reserved FU cannot be acquired this models precisly "cross-domain" - // stalls. + // Since the reserved FU cannot be acquired, this models precisely + // "cross-domain" stalls. // VFP // Issue through integer pipeline, and execute in NEON unit. @@ -48,21 +161,21 @@ def CortexA9Itineraries : ProcessorItineraries< // FP Special Register to Integer Register File Move InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>]>, // // Single-precision FP Unary InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision FP Unary InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // @@ -70,124 +183,124 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 4 cycles InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision FP Compare InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 4 cycles InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Single to Double FP Convert InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double to Single FP Convert InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Single to Half FP Convert InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Half to Single FP Convert InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Single-Precision FP to Integer Convert InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double-Precision FP to Integer Convert InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Integer to Single-Precision FP Convert InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Integer to Double-Precision FP Convert InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Single-precision FP ALU InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Double-precision FP ALU InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Single-precision FP Multiply InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<6, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [5, 1, 1]>, // // Double-precision FP Multiply InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<7, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 1, 1]>, // // Single-precision FP MAC InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<9, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>, // // Double-precision FP MAC InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<10, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>, // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<16, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<10, [A9_NPipe]>], [15, 1, 1]>, // // Double-precision FP DIV InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<26, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<20, [A9_NPipe]>], [25, 1, 1]>, // // Single-precision FP SQRT InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<18, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<13, [A9_NPipe]>], [17, 1]>, + InstrStage<1, [A9_Pipe1]>, + InstrStage<13, [A9_NPipe]>], [17, 1]>, // // Double-precision FP SQRT InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<33, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<28, [A9_NPipe]>], [32, 1]>, // @@ -195,92 +308,79 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra 1 latency cycle since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Integer to Double-precision Move InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra 1 latency cycle since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, // // Single-precision to Integer Move InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision to Integer Move InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, // // Single-precision FP Load - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Double-precision FP Load - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // FP Load Multiple - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Single-precision FP Store - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Double-precision FP Store - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // FP Store Multiple - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // NEON // Issue through integer pipeline, and execute in NEON unit. - // FIXME: Neon pipeline and LdSt unit are multiplexed. + // FIXME: Neon pipeline and LdSt unit are multiplexed. // Add some syntactic sugar to model this! // VLD1 // FIXME: We don't model this instruction properly InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>, InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // VLD2 @@ -288,9 +388,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, // // VLD3 @@ -298,9 +397,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>, // // VLD4 @@ -308,9 +406,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>, // // VST @@ -318,121 +415,120 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Double-register Integer Unary InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2]>, // // Quad-register Integer Unary InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2]>, // // Double-register Integer Q-Unary InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Quad-register Integer CountQ-Unary InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double-register Integer Binary InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Binary InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Double-register Integer Subtract InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, // // Quad-register Integer Subtract InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, // // Double-register Integer Shift InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, // // Quad-register Integer Shift InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, // // Double-register Integer Shift (4 cycle) InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Quad-register Integer Shift (4 cycle) InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Double-register Integer Binary (4 cycle) InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, // // Quad-register Integer Binary (4 cycle) InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, // // Double-register Integer Subtract (4 cycle) InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, // // Quad-register Integer Subtract (4 cycle) InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, // @@ -440,7 +536,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Count @@ -449,35 +545,35 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [4, 2, 2]>, // // Double-register Absolute Difference and Accumulate InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>, // // Quad-register Absolute Difference and Accumulate InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, // // Double-register Integer Pair Add Long InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 3, 1]>, // // Quad-register Integer Pair Add Long InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 3, 1]>, // @@ -485,14 +581,14 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 2, 2]>, // // Quad-register Integer Multiply (.8, .16) InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 2, 2]>, // @@ -500,56 +596,56 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 2, 1]>, // // Quad-register Integer Multiply (.32) InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [9, 2, 1]>, // // Double-register Integer Multiply-Accumulate (.8, .16) InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>, // // Double-register Integer Multiply-Accumulate (.32) InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>, // // Quad-register Integer Multiply-Accumulate (.8, .16) InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>, // // Quad-register Integer Multiply-Accumulate (.32) InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>, // // Move Immediate InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3]>, // // Double-register Permute Move InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_LSPipe]>], [2, 1]>, // // Quad-register Permute Move @@ -558,42 +654,42 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<4, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1]>, // // Integer to Single-precision Move InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Integer to Double-precision Move InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, // // Single-precision to Integer Move InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Double-precision to Integer Move InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, // // Integer to Lane Move InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<4, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, // @@ -601,7 +697,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [5, 2]>, // // Quad-register FP Unary @@ -610,7 +706,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 2]>, // // Double-register FP Binary @@ -619,7 +715,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [5, 2, 2]>, // // Quad-register FP Binary @@ -630,14 +726,14 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, // // Double-register FP Multiple-Accumulate InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, // // Quad-register FP Multiple-Accumulate @@ -646,28 +742,28 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, // // Double-register Reciprical Step InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, // // Quad-register Reciprical Step InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [8, 2, 2]>, // // Double-register Permute InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>, // // Quad-register Permute @@ -676,7 +772,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>, // // Quad-register Permute (3 cycle issue) @@ -685,7 +781,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>, // @@ -693,57 +789,57 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, // // Quad-register VEXT InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, // // VTB InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 2, 1]>, InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>, InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>, InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>, // // VTBX InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>, InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>, InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>, InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> + InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index f813022..08b560c 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -16,7 +16,7 @@ // Functional Units def V6_Pipe : FuncUnit; // pipeline -// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual". +// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual" // def ARMV6Itineraries : ProcessorItineraries< [V6_Pipe], [ diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index b4a9252..09203f9 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -60,8 +60,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? - std::string("e-p:32:32-f64:32:32-i64:32:32-n32") : - std::string("e-p:32:32-f64:64:64-i64:64:64-n32")), + std::string("e-p:32:32-f64:32:32-i64:32:32-" + "v128:32:128-v64:32:64-n32") : + std::string("e-p:32:32-f64:64:64-i64:64:64-" + "v128:64:128-v64:64:64-n32")), TLInfo(*this), TSInfo(*this) { } @@ -74,9 +76,11 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32-" - "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") : + "i16:16:32-i8:8:32-i1:8:32-" + "v128:32:128-v64:32:64-a:0:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" - "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")), + "i16:16:32-i8:8:32-i1:8:32-" + "v128:64:128-v64:64:64-a:0:32-n32")), TLInfo(*this), TSInfo(*this) { } @@ -98,6 +102,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); + return true; } @@ -115,21 +120,20 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, // proper scheduling. PM.add(createARMExpandPseudoPass()); - return true; -} - -bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None) { if (!Subtarget.isThumb1Only()) PM.add(createIfConverterPass()); } - - if (Subtarget.isThumb2()) { + if (Subtarget.isThumb2()) PM.add(createThumb2ITBlockPass()); + + return true; +} + +bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (Subtarget.isThumb2()) PM.add(createThumb2SizeReductionPass()); - } PM.add(createARMConstantIslandPass()); return true; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index bfa89c4..8415d1a 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -425,7 +425,7 @@ bool ARMAsmParser::ParseMemory(OwningPtr<ARMOperand> &Op) { const AsmToken &NextTok = Parser.getTok(); if (NextTok.isNot(AsmToken::EndOfStatement)) { if (NextTok.isNot(AsmToken::Comma)) - return Error(NextTok.getLoc(), "',' expected"); + return Error(NextTok.getLoc(), "',' expected"); Parser.Lex(); // Eat comma token. if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, @@ -488,7 +488,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, const AsmToken &Tok = Parser.getTok(); if (ParseShift(ShiftType, ShiftAmount, E)) - return Error(Tok.getLoc(), "shift expected"); + return Error(Tok.getLoc(), "shift expected"); OffsetRegShifted = true; } } @@ -665,7 +665,6 @@ bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc, Operands.push_back(Op.take()); - SMLoc Loc = Parser.getTok().getLoc(); if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. @@ -763,15 +762,10 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { if (Tok.isNot(AsmToken::Identifier)) return Error(L, "unexpected token in .syntax directive"); const StringRef &Mode = Tok.getString(); - bool unified_syntax; - if (Mode == "unified" || Mode == "UNIFIED") { + if (Mode == "unified" || Mode == "UNIFIED") Parser.Lex(); - unified_syntax = true; - } - else if (Mode == "divided" || Mode == "DIVIDED") { + else if (Mode == "divided" || Mode == "DIVIDED") Parser.Lex(); - unified_syntax = false; - } else return Error(L, "unrecognized syntax mode in .syntax directive"); @@ -791,15 +785,10 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { if (Tok.isNot(AsmToken::Integer)) return Error(L, "unexpected token in .code directive"); int64_t Val = Parser.getTok().getIntVal(); - bool thumb_mode; - if (Val == 16) { + if (Val == 16) Parser.Lex(); - thumb_mode = true; - } - else if (Val == 32) { + else if (Val == 32) Parser.Lex(); - thumb_mode = false; - } else return Error(L, "invalid operand to .code directive"); diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index d95efdb..6a40cf3 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -175,23 +175,8 @@ namespace { raw_ostream &O); void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); - - void printHex8ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff); - } - void printHex16ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff); - } - void printHex32ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff); - } - void printHex64ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm()); - } + void printNEONModImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, @@ -322,7 +307,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0); unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1); O << '{' - << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) + << getRegisterName(DRegLo) << ", " << getRegisterName(DRegHi) << '}'; } else if (Modifier && strcmp(Modifier, "lane") == 0) { unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); @@ -617,8 +602,12 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op, O << "[" << getRegisterName(MO1.getReg()); if (MO2.getImm()) { + unsigned Align = MO2.getImm(); + assert((Align == 8 || Align == 16 || Align == 32) && + "unexpected NEON load/store alignment"); + Align <<= 3; // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << MO2.getImm(); + O << ", :" << Align; } O << "]"; } @@ -1039,6 +1028,14 @@ void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum, } } +void ARMAsmPrinter::printNEONModImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + unsigned EncodedImm = MI->getOperand(OpNum).getImm(); + unsigned EltBits; + uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); + O << "#0x" << utohexstr(Val); +} + bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { @@ -1064,20 +1061,10 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, printOperand(MI, OpNum, O); return false; case 'Q': - if (TM.getTargetData()->isLittleEndian()) - break; - // Fallthrough case 'R': - if (TM.getTargetData()->isBigEndian()) - break; - // Fallthrough - case 'H': // Write second word of DI / DF reference. - // Verify that this operand has two consecutive registers. - if (!MI->getOperand(OpNum).isReg() || - OpNum+1 == MI->getNumOperands() || - !MI->getOperand(OpNum+1).isReg()) - return true; - ++OpNum; // Return the high-part. + case 'H': + report_fatal_error("llvm does not support 'Q', 'R', and 'H' modifiers!"); + return true; } } @@ -1384,11 +1371,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { } else if (MO.isGlobal()) { MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO); const MCSymbolRefExpr *SymRef1 = - MCSymbolRefExpr::Create(Symbol, - MCSymbolRefExpr::VK_ARM_LO16, OutContext); + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_LO16, OutContext); const MCSymbolRefExpr *SymRef2 = - MCSymbolRefExpr::Create(Symbol, - MCSymbolRefExpr::VK_ARM_HI16, OutContext); + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_HI16, OutContext); V1 = MCOperand::CreateExpr(SymRef1); V2 = MCOperand::CreateExpr(SymRef2); } else { diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index 2b94b76..170819a 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -779,22 +779,10 @@ void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, O << '#' << MI->getOperand(OpNum).getImm(); } -void ARMInstPrinter::printHex8ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff); -} - -void ARMInstPrinter::printHex16ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff); -} - -void ARMInstPrinter::printHex32ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff); -} - -void ARMInstPrinter::printHex64ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm()); +void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned EncodedImm = MI->getOperand(OpNum).getImm(); + unsigned EltBits; + uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); + O << "#0x" << utohexstr(Val); } diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index be0b7c1..ddf5047 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -104,10 +104,7 @@ public: void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex8ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex16ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); // FIXME: Implement. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 29e66e1..0df3466 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_target(ARMCodeGen NEONPreAllocPass.cpp Thumb1InstrInfo.cpp Thumb1RegisterInfo.cpp + Thumb2HazardRecognizer.cpp Thumb2ITBlockPass.cpp Thumb2InstrInfo.cpp Thumb2RegisterInfo.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index adb7795..a07ff28 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -34,7 +34,7 @@ /// Uses and Defs by this instr. For the Uses part, the pred:$p operand is /// defined with two components: /// -/// def pred { // Operand PredicateOperand +/// def pred { // Operand PredicateOperand /// ValueType Type = OtherVT; /// string PrintMethod = "printPredicateOperand"; /// string AsmOperandLowerMethod = ?; @@ -54,7 +54,7 @@ /// /// For the Defs part, in the simple case of only cc_out:$s, we have: /// -/// def cc_out { // Operand OptionalDefOperand +/// def cc_out { // Operand OptionalDefOperand /// ValueType Type = OtherVT; /// string PrintMethod = "printSBitModifierOperand"; /// string AsmOperandLowerMethod = ?; @@ -765,7 +765,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, || Opcode == ARM::SMC || Opcode == ARM::SVC) && "Unexpected Opcode"); - assert(NumOps >= 1 && OpInfo[0].RegClass == 0 && "Reg operand expected"); + assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected"); int Imm32 = 0; if (Opcode == ARM::SMC) { @@ -1106,7 +1106,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+2].RegClass == 0) && + (OpInfo[OpIdx+2].RegClass < 0) && "Expect 3 reg operands"); // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1. @@ -1201,7 +1201,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass == 0) && + (OpInfo[OpIdx+1].RegClass < 0) && "Expect 1 reg operand followed by 1 imm operand"); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; @@ -1323,7 +1323,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass == 0) && + (OpInfo[OpIdx+1].RegClass < 0) && "Expect 1 reg operand followed by 1 imm operand"); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; @@ -1494,7 +1494,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // If there is still an operand info left which is an immediate operand, add // an additional imm5 LSL/ASR operand. - if (ThreeReg && OpInfo[OpIdx].RegClass == 0 + if (ThreeReg && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Extract the 5-bit immediate field Inst{11-7}. unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F; @@ -1540,7 +1540,7 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // If there is still an operand info left which is an immediate operand, add // an additional rotate immediate operand. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Extract the 2-bit rotate field Inst{11-10}. unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3; @@ -1725,7 +1725,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, "Tied to operand expected"); MI.addOperand(MI.getOperand(0)); - assert(OpInfo[2].RegClass == 0 && !OpInfo[2].isPredicate() && + assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() && !OpInfo[2].isOptionalDef() && "Imm operand expected"); MI.addOperand(MCOperand::CreateImm(fbits)); @@ -1984,7 +1984,7 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; // Extract/decode the f64/f32 immediate. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // The asm syntax specifies the before-expanded <imm>. // Not VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0), @@ -2077,42 +2077,12 @@ static unsigned decodeLaneIndex(uint32_t insn) { // imm3 = Inst{18-16}, imm4 = Inst{3-0} // Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions. static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) { + unsigned char op = (insn >> 5) & 1; unsigned char cmode = (insn >> 8) & 0xF; unsigned char Imm8 = ((insn >> 24) & 1) << 7 | ((insn >> 16) & 7) << 4 | (insn & 0xF); - uint64_t Imm64 = 0; - - switch (esize) { - case ESize8: - Imm64 = Imm8; - break; - case ESize16: - Imm64 = Imm8 << 8*(cmode >> 1 & 1); - break; - case ESize32: { - if (cmode == 12) - Imm64 = (Imm8 << 8) | 0xFF; - else if (cmode == 13) - Imm64 = (Imm8 << 16) | 0xFFFF; - else { - // Imm8 to be shifted left by how many bytes... - Imm64 = Imm8 << 8*(cmode >> 1 & 3); - } - break; - } - case ESize64: { - for (unsigned i = 0; i < 8; ++i) - if ((Imm8 >> i) & 1) - Imm64 |= (uint64_t)0xFF << 8*i; - break; - } - default: - assert(0 && "Unreachable code!"); - return 0; - } - - return Imm64; + return (op << 12) | (cmode << 8) | Imm8; } // A8.6.339 VMUL, VMULL (by scalar) @@ -2303,7 +2273,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); + OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? @@ -2320,7 +2290,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); RegClass = OpInfo[OpIdx].RegClass; - while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { + while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( getRegisterEnum(B, RegClass, Rd, UseDRegPair(Opcode)))); @@ -2329,7 +2299,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } // Handle possible lane index. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn))); ++OpIdx; @@ -2340,7 +2310,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // possible TIED_TO DPR/QPR's (ignored), then possible lane index. RegClass = OpInfo[0].RegClass; - while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { + while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( getRegisterEnum(B, RegClass, Rd, UseDRegPair(Opcode)))); @@ -2355,7 +2325,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); + OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? @@ -2366,7 +2336,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; } - while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { + while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 && "Tied to operand expected"); MI.addOperand(MCOperand::CreateReg(0)); @@ -2374,7 +2344,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } // Handle possible lane index. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn))); ++OpIdx; @@ -2438,7 +2408,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, assert(NumOps >= 2 && (OpInfo[0].RegClass == ARM::DPRRegClassID || OpInfo[0].RegClass == ARM::QPRRegClassID) && - (OpInfo[1].RegClass == 0) && + (OpInfo[1].RegClass < 0) && "Expect 1 reg operand followed by 1 imm operand"); // Qd/Dd = Inst{22:15-12} => NEON Rd @@ -2552,7 +2522,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } // Add the imm operand, if required. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { unsigned imm = 0xFFFFFFFF; @@ -2632,7 +2602,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, decodeNEONRm(insn)))); ++OpIdx; - assert(OpInfo[OpIdx].RegClass == 0 && "Imm operand expected"); + assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected"); // Add the imm operand. @@ -2762,7 +2732,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, getRegisterEnum(B, OpInfo[OpIdx].RegClass, m))); ++OpIdx; - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Add the imm operand. unsigned Imm = 0; @@ -2869,15 +2839,9 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } -static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - assert(0 && "Unreachable code!"); - return false; -} - // Vector Get Lane (move scalar to ARM core register) Instructions. // VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index -static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, +static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; @@ -2887,7 +2851,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(TID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && - OpInfo[2].RegClass == 0 && + OpInfo[2].RegClass < 0 && "Expect >= 3 operands with one dst operand"); ElemSize esize = @@ -2911,7 +2875,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Set Lane (move ARM core register to scalar) Instructions. // VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index -static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, +static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; @@ -2923,7 +2887,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpInfo[1].RegClass == ARM::DPRRegClassID && TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && OpInfo[2].RegClass == ARM::GPRRegClassID && - OpInfo[3].RegClass == 0 && + OpInfo[3].RegClass < 0 && "Expect >= 3 operands with one dst operand"); ElemSize esize = @@ -2950,7 +2914,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Duplicate Instructions (from ARM core register to all elements). // VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt -static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, +static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -3090,13 +3054,6 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; } -static bool DisassembleThumbMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - - assert(0 && "Unexpected thumb misc. instruction!"); - return false; -} - /// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP. /// We divide the disassembly task into different categories, with each one /// corresponding to a specific instruction encoding format. There could be @@ -3128,12 +3085,10 @@ static const DisassembleFP FuncPtrs[] = { &DisassembleVFPLdStMulFrm, &DisassembleVFPMiscFrm, &DisassembleThumbFrm, - &DisassembleNEONFrm, - &DisassembleNEONGetLnFrm, - &DisassembleNEONSetLnFrm, - &DisassembleNEONDupFrm, &DisassembleMiscFrm, - &DisassembleThumbMiscFrm, + &DisassembleNGetLnFrm, + &DisassembleNSetLnFrm, + &DisassembleNDupFrm, // VLD and VST (including one lane) Instructions. &DisassembleNLdSt, @@ -3233,7 +3188,8 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, // a pair of TargetOperandInfos with isPredicate() property. if (NumOpsRemaining >= 2 && OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && - OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) + OpInfo[Idx].RegClass < 0 && + OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) { // If we are inside an IT block, get the IT condition bits maintained via // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). @@ -3265,7 +3221,8 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, // a pair of TargetOperandInfos with isPredicate() property. if (NumOpsRemaining >= 2 && OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && - OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) + OpInfo[Idx].RegClass < 0 && + OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) { // If we are inside an IT block, get the IT condition bits maintained via // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index b1d90df..7d21256 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -137,25 +137,25 @@ static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To, /// Various utilities for checking the target specific flags. /// A unary data processing instruction doesn't have an Rn operand. -static inline bool isUnaryDP(unsigned TSFlags) { +static inline bool isUnaryDP(uint64_t TSFlags) { return (TSFlags & ARMII::UnaryDP); } /// This four-bit field describes the addressing mode used. /// See also ARMBaseInstrInfo.h. -static inline unsigned getAddrMode(unsigned TSFlags) { +static inline unsigned getAddrMode(uint64_t TSFlags) { return (TSFlags & ARMII::AddrModeMask); } /// {IndexModePre, IndexModePost} /// Only valid for load and store ops. /// See also ARMBaseInstrInfo.h. -static inline unsigned getIndexMode(unsigned TSFlags) { +static inline unsigned getIndexMode(uint64_t TSFlags) { return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; } /// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList. -static inline bool isPrePostLdSt(unsigned TSFlags) { +static inline bool isPrePostLdSt(uint64_t TSFlags) { return (TSFlags & ARMII::IndexModeMask) != 0; } diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 4b2e308..4b7a0bf 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -395,7 +395,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRm(insn)))); } else { - assert(OpInfo[OpIdx].RegClass == 0 && + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn) @@ -531,7 +531,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass == 0 && + (OpInfo[1].RegClass < 0 && !OpInfo[1].isPredicate() && !OpInfo[1].isOptionalDef()) && "Invalid arguments"); @@ -598,7 +598,7 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, assert(OpIdx < NumOps && "More operands expected"); - if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() && + if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0)); @@ -632,7 +632,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID && - (OpInfo[2].RegClass == 0 && + (OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() && !OpInfo[2].isOptionalDef()) && "Invalid arguments"); @@ -658,7 +658,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass == 0 && + (OpInfo[1].RegClass < 0 && !OpInfo[1].isPredicate() && !OpInfo[1].isOptionalDef()) && "Invalid arguments"); @@ -685,7 +685,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID && - (OpInfo[2].RegClass == 0 && + (OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() && !OpInfo[2].isOptionalDef()) && "Invalid arguments"); @@ -761,7 +761,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, // Predicate operands are handled elsewhere. if (NumOps == 2 && OpInfo[0].isPredicate() && OpInfo[1].isPredicate() && - OpInfo[0].RegClass == 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) { + OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) { return true; } @@ -808,7 +808,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, } assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass==0 || OpInfo[1].RegClass==ARM::tGPRRegClassID) + (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID) && "Expect >=2 operands"); // Add the destination operand. @@ -913,7 +913,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; - assert(NumOps == 3 && OpInfo[0].RegClass == 0 && + assert(NumOps == 3 && OpInfo[0].RegClass < 0 && OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID && "Exactly 3 operands expected"); @@ -939,7 +939,7 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; - assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected"); + assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected"); unsigned Imm11 = getT1Imm11(insn); @@ -1239,7 +1239,7 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID && OpInfo[2].RegClass == ARM::GPRRegClassID - && OpInfo[3].RegClass == 0 + && OpInfo[3].RegClass < 0 && "Expect >= 4 operands and first 3 as reg operands"); // Add the <Rt> <Rt2> operands. @@ -1322,8 +1322,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps == 4 && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID - && OpInfo[2].RegClass == 0 - && OpInfo[3].RegClass == 0 + && OpInfo[2].RegClass < 0 + && OpInfo[3].RegClass < 0 && "Exactlt 4 operands expect and first two as reg operands"); // Only need to populate the src reg operand. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -1375,7 +1375,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, if (NumOps == OpIdx) return true; - if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { if (Thumb2ShiftOpcode(Opcode)) @@ -1440,7 +1440,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, } // The modified immediate operand should come next. - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 && + assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1555,7 +1555,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, ++OpIdx; } - assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1772,7 +1772,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); } else { - assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); int Offset = 0; @@ -1792,7 +1792,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, } ++OpIdx; - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 && + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs. MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4))); @@ -1818,7 +1818,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == 0 && + OpInfo[1].RegClass < 0 && "Expect >= 2 operands, first as reg, and second as imm operand"); // Build the register operand, followed by the (+/-)imm12 immediate. @@ -1930,7 +1930,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, ++OpIdx; } - assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1981,7 +1981,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, decodeRm(insn)))); ++OpIdx; - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Add the rotation amount immediate. MI.addOperand(MCOperand::CreateImm(decodeRotate(insn))); diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 0a4400c..bbdd3c7 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -105,8 +105,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { unsigned MOReg = MO.getReg(); Defs[MOReg] = MI; - // Catch subregs as well. - for (const unsigned *R = TRI->getSubRegisters(MOReg); *R; ++R) + // Catch aliases as well. + for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R) Defs[*R] = MI; } } diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index a725898..f67717c 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -407,7 +407,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, "expected a virtual register"); // Extracting from a Q or QQ register. MachineInstr *DefMI = MRI->getVRegDef(VirtReg); - if (!DefMI || !DefMI->isExtractSubreg()) + if (!DefMI || !DefMI->isCopy() || !DefMI->getOperand(1).getSubReg()) return false; VirtReg = DefMI->getOperand(1).getReg(); if (LastSrcReg && LastSrcReg != VirtReg) @@ -418,7 +418,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, RC != ARM::QQPRRegisterClass && RC != ARM::QQQQPRRegisterClass) return false; - unsigned SubIdx = DefMI->getOperand(2).getImm(); + unsigned SubIdx = DefMI->getOperand(1).getSubReg(); if (LastSubIdx) { if (LastSubIdx != SubIdx-Stride) return false; @@ -434,22 +434,21 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is // currently required for correctness. e.g. - // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6 + // %reg1041<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6 // %reg1042<def> = EXTRACT_SUBREG %reg1041, 6 // %reg1043<def> = EXTRACT_SUBREG %reg1041, 5 // VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>, - // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5 + // reg1042 and reg1043 should be replaced with reg1041:6 and reg1041:5 // respectively. // We need to change how we model uses of REG_SEQUENCE. for (unsigned R = 0; R < NumRegs; ++R) { MachineOperand &MO = MI->getOperand(FirstOpnd + R); unsigned OldReg = MO.getReg(); MachineInstr *DefMI = MRI->getVRegDef(OldReg); - assert(DefMI->isExtractSubreg()); + assert(DefMI->isCopy()); MO.setReg(LastSrcReg); MO.setSubReg(SubIds[R]); - if (R != 0) - MO.setIsKill(false); + MO.setIsKill(false); // Delete the EXTRACT_SUBREG if its result is now dead. if (MRI->use_empty(OldReg)) DefMI->eraseFromParent(); @@ -467,43 +466,9 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { unsigned FirstOpnd, NumRegs, Offset, Stride; if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; - if (llvm::ModelWithRegSequence() && - FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) + if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) continue; - - MachineBasicBlock::iterator NextI = llvm::next(MBBI); - for (unsigned R = 0; R < NumRegs; ++R) { - MachineOperand &MO = MI->getOperand(FirstOpnd + R); - assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - - // For now, just assign a fixed set of adjacent registers. - // This leaves plenty of room for future improvements. - static const unsigned NEONDRegs[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7 - }; - MO.setReg(NEONDRegs[Offset + R * Stride]); - - if (MO.isUse()) { - // Insert a copy from VirtReg. - TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg, - ARM::DPRRegisterClass, ARM::DPRRegisterClass, - DebugLoc()); - if (MO.isKill()) { - MachineInstr *CopyMI = prior(MBBI); - CopyMI->findRegisterUseOperand(VirtReg)->setIsKill(); - } - MO.setIsKill(); - } else if (MO.isDef() && !MO.isDead()) { - // Add a copy to VirtReg. - TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(), - ARM::DPRRegisterClass, ARM::DPRRegisterClass, - DebugLoc()); - } - } + llvm_unreachable("expected a REG_SEQUENCE"); } return Modified; diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index fae84d4..af630ac 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -33,64 +33,24 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC == ARM::GPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); - return true; - } - } else if (DestRC == ARM::tGPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); - return true; - } - } - - return false; -} - -bool Thumb1InstrInfo:: -canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { - if (Ops.size() != 1) return false; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - switch (Opc) { - default: break; - case ARM::tMOVr: - case ARM::tMOVtgpr2gpr: - case ARM::tMOVgpr2tgpr: - case ARM::tMOVgpr2gpr: { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - !isARMLowRegister(SrcReg)) - // tSpill cannot take a high register operand. - return false; - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg) && - !isARMLowRegister(DstReg)) - // tRestore cannot target a high register operand. - return false; - } - return true; - } - } - - return false; +void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool tDest = ARM::tGPRRegClass.contains(DestReg); + bool tSrc = ARM::tGPRRegClass.contains(SrcReg); + unsigned Opc = ARM::tMOVgpr2gpr; + if (tDest && tSrc) + Opc = ARM::tMOVr; + else if (tSrc) + Opc = ARM::tMOVtgpr2gpr; + else if (tDest) + Opc = ARM::tMOVgpr2tgpr; + + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + assert(ARM::GPRRegClass.contains(DestReg, SrcReg) && + "Thumb1 can only copy GPR registers"); } void Thumb1InstrInfo:: @@ -175,10 +135,10 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, isKill = false; } - if (isKill) { + if (isKill) MBB.addLiveIn(Reg); - MIB.addReg(Reg, RegState::Kill); - } + + MIB.addReg(Reg, getKillRegState(isKill)); } return true; } @@ -221,46 +181,3 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } - -MachineInstr *Thumb1InstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = NULL; - switch (Opc) { - default: break; - case ARM::tMOVr: - case ARM::tMOVtgpr2gpr: - case ARM::tMOVgpr2tgpr: - case ARM::tMOVgpr2gpr: { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - !isARMLowRegister(SrcReg)) - // tSpill cannot take a high register operand. - break; - NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0)); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg) && - !isARMLowRegister(DstReg)) - // tRestore cannot target a high register operand. - break; - bool isDead = MI->getOperand(0).isDead(); - NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore)) - .addReg(DstReg, - RegState::Define | getDeadRegState(isDead)) - .addFrameIndex(FI).addImm(0)); - } - break; - } - } - - return NewMI; -} diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index c937296..555135a 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -46,12 +46,10 @@ public: const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const; - bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -64,20 +62,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const; - - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } }; } diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 2f635fe..39b70b4 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -68,21 +68,6 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg); } -const TargetRegisterClass* -Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const { - if (isARMLowRegister(Reg)) - return ARM::tGPRRegisterClass; - switch (Reg) { - default: - break; - case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: - case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC: - return ARM::GPRRegisterClass; - } - - return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT); -} - bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); @@ -410,6 +395,8 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // before that instead and adjust the UseMI. bool done = false; for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { + if (II->isDebugValue()) + continue; // If this instruction affects R12, adjust our restore point. for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { const MachineOperand &MO = II->getOperand(i); diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 4eca367..9a0308af 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -38,9 +38,6 @@ public: unsigned PredReg = 0) const; /// Code Generation virtual methods... - const TargetRegisterClass * - getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const; - bool hasReservedCallFrame(MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, @@ -51,7 +48,8 @@ public: // could not be handled directly in MI. int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int Offset, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const; + unsigned MOVOpc, unsigned ADDriOpc, + unsigned SUBriOpc) const; bool saveScavengerRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.cpp b/lib/Target/ARM/Thumb2HazardRecognizer.cpp new file mode 100644 index 0000000..172908d --- /dev/null +++ b/lib/Target/ARM/Thumb2HazardRecognizer.cpp @@ -0,0 +1,53 @@ +//===-- Thumb2HazardRecognizer.cpp - Thumb2 postra hazard recognizer ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "Thumb2HazardRecognizer.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/ScheduleDAG.h" +using namespace llvm; + +ScheduleHazardRecognizer::HazardType +Thumb2HazardRecognizer::getHazardType(SUnit *SU) { + if (ITBlockSize) { + MachineInstr *MI = SU->getInstr(); + if (!MI->isDebugValue() && MI != ITBlockMIs[ITBlockSize-1]) + return Hazard; + } + + return PostRAHazardRecognizer::getHazardType(SU); +} + +void Thumb2HazardRecognizer::Reset() { + ITBlockSize = 0; + PostRAHazardRecognizer::Reset(); +} + +void Thumb2HazardRecognizer::EmitInstruction(SUnit *SU) { + MachineInstr *MI = SU->getInstr(); + unsigned Opcode = MI->getOpcode(); + if (ITBlockSize) { + --ITBlockSize; + } else if (Opcode == ARM::t2IT) { + unsigned Mask = MI->getOperand(1).getImm(); + unsigned NumTZ = CountTrailingZeros_32(Mask); + assert(NumTZ <= 3 && "Invalid IT mask!"); + ITBlockSize = 4 - NumTZ; + MachineBasicBlock::iterator I = MI; + for (unsigned i = 0; i < ITBlockSize; ++i) { + // Advance to the next instruction, skipping any dbg_value instructions. + do { + ++I; + } while (I->isDebugValue()); + ITBlockMIs[ITBlockSize-1-i] = &*I; + } + } + + PostRAHazardRecognizer::EmitInstruction(SU); +} diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.h b/lib/Target/ARM/Thumb2HazardRecognizer.h new file mode 100644 index 0000000..4726658 --- /dev/null +++ b/lib/Target/ARM/Thumb2HazardRecognizer.h @@ -0,0 +1,40 @@ +//===-- Thumb2HazardRecognizer.h - Thumb2 Hazard Recognizers ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines hazard recognizers for scheduling Thumb2 functions on +// ARM processors. +// +//===----------------------------------------------------------------------===// + +#ifndef THUMB2HAZARDRECOGNIZER_H +#define THUMB2HAZARDRECOGNIZER_H + +#include "llvm/CodeGen/PostRAHazardRecognizer.h" + +namespace llvm { + +class MachineInstr; + +class Thumb2HazardRecognizer : public PostRAHazardRecognizer { + unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled. + MachineInstr *ITBlockMIs[4]; + +public: + Thumb2HazardRecognizer(const InstrItineraryData &ItinData) : + PostRAHazardRecognizer(ItinData) {} + + virtual HazardType getHazardType(SUnit *SU); + virtual void Reset(); + virtual void EmitInstruction(SUnit *SU); +}; + + +} // end namespace llvm + +#endif // THUMB2HAZARDRECOGNIZER_H diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index f36d4ef..cd15bbe 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -14,17 +14,23 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumITs, "Number of IT blocks inserted"); +STATISTIC(NumITs, "Number of IT blocks inserted"); +STATISTIC(NumMovedInsts, "Number of predicated instructions moved"); namespace { - struct Thumb2ITBlockPass : public MachineFunctionPass { + class Thumb2ITBlockPass : public MachineFunctionPass { + bool PreRegAlloc; + + public: static char ID; Thumb2ITBlockPass() : MachineFunctionPass(&ID) {} const Thumb2InstrInfo *TII; + const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -34,61 +40,167 @@ namespace { } private: - bool InsertITBlocks(MachineBasicBlock &MBB); + bool MoveCopyOutOfITBlock(MachineInstr *MI, + ARMCC::CondCodes CC, ARMCC::CondCodes OCC, + SmallSet<unsigned, 4> &Defs, + SmallSet<unsigned, 4> &Uses); + bool InsertITInstructions(MachineBasicBlock &MBB); }; char Thumb2ITBlockPass::ID = 0; } -static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){ - unsigned Opc = MI->getOpcode(); - if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) - return ARMCC::AL; - return llvm::getInstrPredicate(MI, PredReg); +/// TrackDefUses - Tracking what registers are being defined and used by +/// instructions in the IT block. This also tracks "dependencies", i.e. uses +/// in the IT block that are defined before the IT instruction. +static void TrackDefUses(MachineInstr *MI, + SmallSet<unsigned, 4> &Defs, + SmallSet<unsigned, 4> &Uses, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 4> LocalDefs; + SmallVector<unsigned, 4> LocalUses; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP) + continue; + if (MO.isUse()) + LocalUses.push_back(Reg); + else + LocalDefs.push_back(Reg); + } + + for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { + unsigned Reg = LocalUses[i]; + Uses.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Uses.insert(*Subreg); + } + + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Reg = LocalDefs[i]; + Defs.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Defs.insert(*Subreg); + if (Reg == ARM::CPSR) + continue; + } +} + +bool +Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, + ARMCC::CondCodes CC, ARMCC::CondCodes OCC, + SmallSet<unsigned, 4> &Defs, + SmallSet<unsigned, 4> &Uses) { + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + assert(SrcSubIdx == 0 && DstSubIdx == 0 && + "Sub-register indices still around?"); + // llvm models select's as two-address instructions. That means a copy + // is inserted before a t2MOVccr, etc. If the copy is scheduled in + // between selects we would end up creating multiple IT blocks. + + // First check if it's safe to move it. + if (Uses.count(DstReg) || Defs.count(SrcReg)) + return false; + + // Then peek at the next instruction to see if it's predicated on CC or OCC. + // If not, then there is nothing to be gained by moving the copy. + MachineBasicBlock::iterator I = MI; ++I; + MachineBasicBlock::iterator E = MI->getParent()->end(); + while (I != E && I->isDebugValue()) + ++I; + if (I != E) { + unsigned NPredReg = 0; + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); + if (NCC == CC || NCC == OCC) + return true; + } + } + return false; } -bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { +bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Modified = false; + SmallSet<unsigned, 4> Defs; + SmallSet<unsigned, 4> Uses; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; - ARMCC::CondCodes CC = getPredicate(MI, PredReg); - + ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; } + Defs.clear(); + Uses.clear(); + TrackDefUses(MI, Defs, Uses, TRI); + // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); + + // Add implicit use of ITSTATE to IT block instructions. + MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, + true/*isImp*/, false/*isKill*/)); + + MachineInstr *LastITMI = MI; + MachineBasicBlock::iterator InsertPos = MIB; ++MBBI; - // Finalize IT mask. + // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. - while (MBBI != E && Pos && - (!MI->getDesc().isBranch() && !MI->getDesc().isReturn())) { + for (; MBBI != E && Pos && + (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) { + if (MBBI->isDebugValue()) + continue; + MachineInstr *NMI = &*MBBI; MI = NMI; - DebugLoc ndl = NMI->getDebugLoc(); + unsigned NPredReg = 0; - ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); - if (NCC == CC || NCC == OCC) + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg); + if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; - else + // Add implicit use of ITSTATE. + NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, + true/*isImp*/, false/*isKill*/)); + LastITMI = NMI; + } else { + if (NCC == ARMCC::AL && + MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { + --MBBI; + MBB.remove(NMI); + MBB.insert(InsertPos, NMI); + ++NumMovedInsts; + continue; + } break; + } + TrackDefUses(NMI, Defs, Uses, TRI); --Pos; - ++MBBI; } + + // Finalize IT mask. Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); + + // Last instruction in IT block kills ITSTATE. + LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); + Modified = true; ++NumITs; } @@ -100,17 +212,21 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); AFI = Fn.getInfo<ARMFunctionInfo>(); TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); + TRI = TM.getRegisterInfo(); if (!AFI->isThumbFunction()) return false; bool Modified = false; - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) { MachineBasicBlock &MBB = *MFI; - Modified |= InsertITBlocks(MBB); + ++MFI; + Modified |= InsertITInstructions(MBB); } + if (Modified) + AFI->setHasITBlocks(true); + return Modified; } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 531d5e9..ee51727 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -17,15 +17,27 @@ #include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" +#include "Thumb2HazardRecognizer.h" +#include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" -#include "Thumb2InstrInfo.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt<unsigned> +IfCvtLimit("thumb2-ifcvt-limit", cl::Hidden, + cl::desc("Thumb2 if-conversion limit (default 3)"), + cl::init(3)); + +static cl::opt<unsigned> +IfCvtDiamondLimit("thumb2-ifcvt-diamond-limit", cl::Hidden, + cl::desc("Thumb2 diamond if-conversion limit (default 3)"), + cl::init(3)); + Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } @@ -35,33 +47,99 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -bool -Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC == ARM::GPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); - return true; - } - } else if (DestRC == ARM::tGPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); - return true; +void +Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); + if (!AFI->hasITBlocks()) { + TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + return; + } + + // If the first instruction of Tail is predicated, we may have to update + // the IT instruction. + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg); + MachineBasicBlock::iterator MBBI = Tail; + if (CC != ARMCC::AL) + // Expecting at least the t2IT instruction before it. + --MBBI; + + // Actually replace the tail. + TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + + // Fix up IT. + if (CC != ARMCC::AL) { + MachineBasicBlock::iterator E = MBB->begin(); + unsigned Count = 4; // At most 4 instructions in an IT block. + while (Count && MBBI != E) { + if (MBBI->isDebugValue()) { + --MBBI; + continue; + } + if (MBBI->getOpcode() == ARM::t2IT) { + unsigned Mask = MBBI->getOperand(1).getImm(); + if (Count == 4) + MBBI->eraseFromParent(); + else { + unsigned MaskOn = 1 << Count; + unsigned MaskOff = ~(MaskOn - 1); + MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn); + } + return; + } + --MBBI; + --Count; } + + // Ctrl flow can reach here if branch folding is run before IT block + // formation pass. } +} + +bool +Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const { + unsigned PredReg = 0; + return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; +} +bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs) const { + return NumInstrs && NumInstrs <= IfCvtLimit; +} + +bool Thumb2InstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, + MachineBasicBlock &FMBB, unsigned NumF) const { + // FIXME: Catch optimization such as: + // r0 = movne + // r0 = moveq + return NumT && NumF && + NumT <= (IfCvtDiamondLimit) && NumF <= (IfCvtDiamondLimit); +} + +void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { // Handle SPR, DPR, and QPR copies. - return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL); + if (!ARM::GPRRegClass.contains(DestReg, SrcReg)) + return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); + + bool tDest = ARM::tGPRRegClass.contains(DestReg); + bool tSrc = ARM::tGPRRegClass.contains(SrcReg); + unsigned Opc = ARM::tMOVgpr2gpr; + if (tDest && tSrc) + Opc = ARM::tMOVr; + else if (tSrc) + Opc = ARM::tMOVtgpr2gpr; + else if (tDest) + Opc = ARM::tMOVgpr2tgpr; + + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } void Thumb2InstrInfo:: @@ -69,7 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -94,7 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -113,6 +193,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } +ScheduleHazardRecognizer *Thumb2InstrInfo:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { + return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II); +} + void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, @@ -131,14 +216,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, // Use a movw to materialize the 16-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg) .addImm(NumBytes) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg); Fits = true; } else if ((NumBytes & 0xffff) == 0) { // Use a movt to materialize the 32-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg) .addReg(DestReg) .addImm(NumBytes >> 16) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg); Fits = true; } @@ -502,3 +587,54 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Offset = (isSub) ? -Offset : Offset; return Offset == 0; } + +/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the +/// two-addrss instruction inserted by two-address pass. +void +Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, + MachineInstr *UseMI, + const TargetRegisterInfo &TRI) const { + if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr || + SrcMI->getOperand(1).isKill()) + return; + + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg); + if (CC == ARMCC::AL || PredReg != ARM::CPSR) + return; + + // Schedule the copy so it doesn't come between previous instructions + // and UseMI which can form an IT block. + unsigned SrcReg = SrcMI->getOperand(1).getReg(); + ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); + MachineBasicBlock *MBB = UseMI->getParent(); + MachineBasicBlock::iterator MBBI = SrcMI; + unsigned NumInsts = 0; + while (--MBBI != MBB->begin()) { + if (MBBI->isDebugValue()) + continue; + + MachineInstr *NMI = &*MBBI; + ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg); + if (!(NCC == CC || NCC == OCC) || + NMI->modifiesRegister(SrcReg, &TRI) || + NMI->definesRegister(ARM::CPSR)) + break; + if (++NumInsts == 4) + // Too many in a row! + return; + } + + if (NumInsts) { + MBB->remove(SrcMI); + MBB->insert(++MBBI, SrcMI); + } +} + +ARMCC::CondCodes +llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) + return ARMCC::AL; + return llvm::getInstrPredicate(MI, PredReg); +} diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 2948770..3a9f8b1 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -20,7 +20,8 @@ #include "Thumb2RegisterInfo.h" namespace llvm { - class ARMSubtarget; +class ARMSubtarget; +class ScheduleHazardRecognizer; class Thumb2InstrInfo : public ARMBaseInstrInfo { Thumb2RegisterInfo RI; @@ -31,12 +32,21 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const; + + bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const; + + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const; + + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs, + MachineBasicBlock &FMBB, unsigned NumFInstrs) const; + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -50,12 +60,27 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; + /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the + /// two-addrss instruction inserted by two-address pass. + void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI, + const TargetRegisterInfo &TRI) const; + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// const Thumb2RegisterInfo &getRegisterInfo() const { return RI; } + + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const; }; + +/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical +/// to llvm::getInstrPredicate except it returns AL for conditional branch +/// instructions which are "predicated", but are not in IT blocks. +ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg); + + } #endif // THUMB2INSTRUCTIONINFO_H diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 8fe2e42..ba392f3 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -451,11 +451,18 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; - const TargetInstrDesc &TID = MI->getDesc(); unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); - if (Reg0 != Reg1) - return false; + if (Reg0 != Reg1) { + // Try to commute the operands to make it a 2-address instruction. + unsigned CommOpIdx1, CommOpIdx2; + if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || + CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) + return false; + MachineInstr *CommutedMI = TII->commuteInstruction(MI); + if (!CommutedMI) + return false; + } if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) return false; if (Entry.Imm2Limit) { @@ -484,6 +491,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, bool HasCC = false; bool CCDead = false; + const TargetInstrDesc &TID = MI->getDesc(); if (TID.hasOptionalDef()) { unsigned NumOps = TID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); @@ -689,7 +697,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { goto ProcessNext; } - // Try to transform ro a 16-bit non-two-address instruction. + // Try to transform to a 16-bit non-two-address instruction. if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 1d85f12..ea78bf3 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -224,6 +224,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -251,7 +252,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -425,7 +426,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, } } else { //more args // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true, false); + int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -444,7 +445,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, if (TargetRegisterInfo::isPhysicalRegister(args_int[i])) args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass); SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64); - int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false); + int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true); if (i == 0) FuncInfo->setVarArgsBase(FI); SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, @@ -453,7 +454,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass); argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); - FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false); + FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true); SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, false, false, 0)); @@ -470,6 +471,7 @@ SDValue AlphaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26, @@ -483,7 +485,7 @@ AlphaTargetLowering::LowerReturn(SDValue Chain, break; //return SDValue(); // ret void is legal case 1: { - EVT ArgVT = Outs[0].Val.getValueType(); + EVT ArgVT = Outs[0].VT; unsigned ArgReg; if (ArgVT.isInteger()) ArgReg = Alpha::R0; @@ -492,13 +494,13 @@ AlphaTargetLowering::LowerReturn(SDValue Chain, ArgReg = Alpha::F0; } Copy = DAG.getCopyToReg(Copy, dl, ArgReg, - Outs[0].Val, Copy.getValue(1)); + OutVals[0], Copy.getValue(1)); if (DAG.getMachineFunction().getRegInfo().liveout_empty()) DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg); break; } case 2: { - EVT ArgVT = Outs[0].Val.getValueType(); + EVT ArgVT = Outs[0].VT; unsigned ArgReg1, ArgReg2; if (ArgVT.isInteger()) { ArgReg1 = Alpha::R0; @@ -509,13 +511,13 @@ AlphaTargetLowering::LowerReturn(SDValue Chain, ArgReg2 = Alpha::F1; } Copy = DAG.getCopyToReg(Copy, dl, ArgReg1, - Outs[0].Val, Copy.getValue(1)); + OutVals[0], Copy.getValue(1)); if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1) == DAG.getMachineFunction().getRegInfo().liveout_end()) DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1); Copy = DAG.getCopyToReg(Copy, dl, ArgReg2, - Outs[1].Val, Copy.getValue(1)); + OutVals[1], Copy.getValue(1)); if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2) == DAG.getMachineFunction().getRegInfo().liveout_end()) @@ -539,7 +541,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, false, false, 0); SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); - SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1), + SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Base.getValue(1), Tmp, NULL, 0, MVT::i32, false, false, 0); DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset); if (N->getValueType(0).isFloatingPoint()) @@ -643,10 +645,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, case ISD::GlobalAddress: { GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset()); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64, + GSDN->getOffset()); // FIXME there isn't really any debug info here - // if (!GV->hasWeakLinkage() && !GV->isDeclaration() && !GV->hasLinkOnceLinkage()) { + // if (!GV->hasWeakLinkage() && !GV->isDeclaration() + // && !GV->hasLinkOnceLinkage()) { if (GV->hasLocalLinkage()) { SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA, DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); @@ -702,7 +706,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SDValue Result; if (Op.getValueType() == MVT::i32) - Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr, + Result = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Chain, DataPtr, NULL, 0, MVT::i32, false, false, 0); else Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0, @@ -722,7 +726,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, false, false, 0); SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, DAG.getConstant(8, MVT::i64)); - Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, + Val = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Result, NP, NULL,0, MVT::i32, false, false, 0); SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP, DAG.getConstant(8, MVT::i64)); @@ -863,7 +867,10 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - sinkMBB->transferSuccessors(thisMBB); + sinkMBB->splice(sinkMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(MI)), + thisMBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB); F->insert(It, llscMBB); F->insert(It, sinkMBB); @@ -912,7 +919,7 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, thisMBB->addSuccessor(llscMBB); llscMBB->addSuccessor(llscMBB); llscMBB->addSuccessor(sinkMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return sinkMBB; } diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index 7ee823a..46e0c7d 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -121,6 +121,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -129,6 +130,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td index d984556..6f4ebf2 100644 --- a/lib/Target/Alpha/AlphaInstrFormats.td +++ b/lib/Target/Alpha/AlphaInstrFormats.td @@ -182,7 +182,7 @@ class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, Inst bits<5> Rb; bits<7> Function = fun; -// let isTwoAddress = 1; +// let Constraints = "$RFALSE = $RDEST"; let Inst{25-21} = Ra; let Inst{20-16} = Rb; let Inst{15-13} = 0; @@ -223,7 +223,7 @@ class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, Ins bits<8> LIT; bits<7> Function = fun; -// let isTwoAddress = 1; +// let Constraints = "$RFALSE = $RDEST"; let Inst{25-21} = Ra; let Inst{20-13} = LIT; let Inst{12} = 1; diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index 3aba363..ad625a2 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -110,9 +110,8 @@ static bool isAlphaIntCondCode(unsigned Opcode) { unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && "Alpha branch conditions have two components!"); @@ -120,58 +119,47 @@ unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB, // One-way branch. if (FBB == 0) { if (Cond.empty()) // Unconditional branch - BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(TBB); + BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB); else // Conditional branch if (isAlphaIntCondCode(Cond[0].getImm())) - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); else - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); return 1; } // Two-way Conditional Branch. if (isAlphaIntCondCode(Cond[0].getImm())) - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); else - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); - BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(FBB); + BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB); return 2; } -bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n"; - if (DestRC != SrcRC) { - // Not yet supported! - return false; - } - - if (DestRC == Alpha::GPRCRegisterClass) { +void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg) .addReg(SrcReg) - .addReg(SrcReg); - } else if (DestRC == Alpha::F4RCRegisterClass) { + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg) .addReg(SrcReg) - .addReg(SrcReg); - } else if (DestRC == Alpha::F8RCRegisterClass) { + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg) .addReg(SrcReg) - .addReg(SrcReg); + .addReg(SrcReg, getKillRegState(KillSrc)); } else { - // Attempt to copy register that is not GPR or FPR - return false; + llvm_unreachable("Attempt to copy register that is not GPR or FPR"); } - - return true; } void @@ -227,51 +215,6 @@ AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, llvm_unreachable("Unhandled register class"); } -MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { - if (Ops.size() != 1) return NULL; - - // Make sure this is a reg-reg copy. - unsigned Opc = MI->getOpcode(); - - MachineInstr *NewMI = NULL; - switch(Opc) { - default: - break; - case Alpha::BISr: - case Alpha::CPYSS: - case Alpha::CPYST: - if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { - if (Ops[0] == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - Opc = (Opc == Alpha::BISr) ? Alpha::STQ : - ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addFrameIndex(FrameIndex) - .addReg(Alpha::F31); - } else { // load -> move - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - Opc = (Opc == Alpha::BISr) ? Alpha::LDQ : - ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(OutReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addFrameIndex(FrameIndex) - .addReg(Alpha::F31); - } - } - break; - } - return NewMI; -} - static unsigned AlphaRevCondCode(unsigned Opcode) { switch (Opcode) { case Alpha::BEQ: return Alpha::BNE; @@ -428,11 +371,8 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global base register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalBaseReg).addReg(Alpha::R29); RegInfo.addLiveIn(Alpha::R29); AlphaFI->setGlobalBaseReg(GlobalBaseReg); @@ -456,11 +396,8 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global return address register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalRetAddr).addReg(Alpha::R26); RegInfo.addLiveIn(Alpha::R26); AlphaFI->setGlobalRetAddr(GlobalRetAddr); diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index 7d7365b..e20e832 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -42,14 +42,13 @@ public: int &FrameIndex) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -62,18 +61,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index a47a29b..92de78a 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -680,18 +680,32 @@ def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", } //conditional moves, floats -let OutOperandList = (outs F4RC:$RDEST), InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), - isTwoAddress = 1 in { -def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero -def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero -def FCMOVGTS : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if > zero -def FCMOVLES : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if <= zero -def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; // FCMOVE if < zero -def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero +let OutOperandList = (outs F4RC:$RDEST), + InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), + Constraints = "$RTRUE = $RDEST" in { +def FCMOVEQS : FPForm<0x17, 0x02A, + "fcmoveq $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if = zero +def FCMOVGES : FPForm<0x17, 0x02D, + "fcmovge $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if >= zero +def FCMOVGTS : FPForm<0x17, 0x02F, + "fcmovgt $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if > zero +def FCMOVLES : FPForm<0x17, 0x02E, + "fcmovle $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if <= zero +def FCMOVLTS : FPForm<0x17, 0x02C, + "fcmovlt $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; // FCMOVE if < zero +def FCMOVNES : FPForm<0x17, 0x02B, + "fcmovne $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if != zero } //conditional moves, doubles -let OutOperandList = (outs F8RC:$RDEST), InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), - isTwoAddress = 1 in { +let OutOperandList = (outs F8RC:$RDEST), + InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), + Constraints = "$RTRUE = $RDEST" in { def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>; def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>; def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>; diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index c083d8c..dc9d935 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -74,20 +74,6 @@ const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) return CalleeSavedRegs; } -const TargetRegisterClass* const* -AlphaRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, 0 - }; - return CalleeSavedRegClasses; -} - BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(Alpha::R15); diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index 720367a..f9fd87a 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -30,9 +30,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index b4da96c..80ee107 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -132,8 +132,8 @@ static void UpdateNodeOperand(SelectionDAG &DAG, SDValue Val) { SmallVector<SDValue, 8> ops(N->op_begin(), N->op_end()); ops[Num] = Val; - SDValue New = DAG.UpdateNodeOperands(SDValue(N, 0), ops.data(), ops.size()); - DAG.ReplaceAllUsesWith(N, New.getNode()); + SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size()); + DAG.ReplaceAllUsesWith(N, New); } // After instruction selection, insert COPY_TO_REGCLASS nodes to help in diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index adf2118..6e828e1 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -143,7 +143,7 @@ SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op, DebugLoc DL = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - Op = DAG.getTargetGlobalAddress(GV, MVT::i32); + Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op); } @@ -205,8 +205,7 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, } else { assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); unsigned ObjSize = VA.getLocVT().getStoreSize(); - int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, false, false, 0)); @@ -220,6 +219,7 @@ SDValue BlackfinTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. @@ -245,7 +245,7 @@ BlackfinTargetLowering::LowerReturn(SDValue Chain, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue Opi = Outs[i].Val; + SDValue Opi = OutVals[i]; // Expand to i32 if necessary switch (VA.getLocInfo()) { @@ -278,6 +278,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -301,7 +302,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -357,7 +358,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index a784248..6bebcc3 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -63,6 +63,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -71,6 +72,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; }; } // end namespace llvm diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp index 73924b7..a74d42d 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -104,10 +104,8 @@ unsigned BlackfinInstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc operand - DebugLoc DL; - + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -124,69 +122,73 @@ InsertBranch(MachineBasicBlock &MBB, llvm_unreachable("Implement conditional branches!"); } -static bool inClass(const TargetRegisterClass &Test, - unsigned Reg, - const TargetRegisterClass *RC) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return Test.contains(Reg); - else - return &Test==RC || Test.hasSubClass(RC); -} - -bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, - unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (inClass(BF::ALLRegClass, DestReg, DestRC) && - inClass(BF::ALLRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVE), DestReg).addReg(SrcReg); - return true; +void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (BF::ALLRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(BF::MOVE), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - if (inClass(BF::D16RegClass, DestReg, DestRC) && - inClass(BF::D16RegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg).addReg(SrcReg).addImm(0); - return true; + if (BF::D16RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + return; } - if (inClass(BF::AnyCCRegClass, SrcReg, SrcRC) && - inClass(BF::DRegClass, DestReg, DestRC)) { - if (inClass(BF::NotCCRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg).addReg(SrcReg); + if (BF::DRegClass.contains(DestReg)) { + if (SrcReg == BF::NCC) { + BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0); - } else { - BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg).addReg(SrcReg); + return; + } + if (SrcReg == BF::CC) { + BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return true; } - if (inClass(BF::AnyCCRegClass, DestReg, DestRC) && - inClass(BF::DRegClass, SrcReg, SrcRC)) { - if (inClass(BF::NotCCRegClass, DestReg, DestRC)) - BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg).addReg(SrcReg); - else - BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg).addReg(SrcReg); - return true; + if (BF::DRegClass.contains(SrcReg)) { + if (DestReg == BF::NCC) { + BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0); + return; + } + if (DestReg == BF::CC) { + BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } } - if (inClass(BF::NotCCRegClass, DestReg, DestRC) && - inClass(BF::JustCCRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg).addReg(SrcReg); - return true; + + if (DestReg == BF::NCC && SrcReg == BF::CC) { + BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - if (inClass(BF::JustCCRegClass, DestReg, DestRC) && - inClass(BF::NotCCRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg).addReg(SrcReg); - return true; + if (DestReg == BF::CC && SrcReg == BF::NCC) { + BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - llvm_unreachable((std::string("Bad regclasses for reg-to-reg copy: ")+ - SrcRC->getName() + " -> " + DestRC->getName()).c_str()); - return false; + llvm_unreachable("Bad reg-to-reg copy"); +} + +static bool inClass(const TargetRegisterClass &Test, + unsigned Reg, + const TargetRegisterClass *RC) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Test.contains(Reg); + else + return &Test==RC || Test.hasSubClass(RC); } void diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h index c1dcd58..6c35917 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.h +++ b/lib/Target/Blackfin/BlackfinInstrInfo.h @@ -44,14 +44,13 @@ namespace llvm { InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td index 5cf350a..8034a7f 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -488,7 +488,7 @@ def MOVE: F1<(outs ALL:$dst), (ins ALL:$src), "$dst = $src;", []>; -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc), "if $cc $dst = $src2;", [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>; @@ -645,7 +645,7 @@ def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2), // Table C-15. Bit Operations Instructions //===----------------------------------------------------------------------===// -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2), "bitclr($dst, $src2);", [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>; @@ -691,7 +691,7 @@ multiclass SHIFT32<SDNode opnode, string ops> { } let Defs = [AZ, AN, V, VS], - isTwoAddress = 1 in { + Constraints = "$src = $dst" in { defm SRA : SHIFT32<sra, ">>>">; defm SRL : SHIFT32<srl, ">>">; defm SLL : SHIFT32<shl, "<<">; @@ -748,7 +748,7 @@ def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2), "$dst = $src1 + $src2;", [(set D16:$dst, (add D16:$src1, D16:$src2))]>; -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2), "$dst += $src2;", [(set D:$dst, (add D:$src1, imm7:$src2))]>; @@ -775,7 +775,7 @@ def NEG: F1<(outs D:$dst), (ins D:$src), def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2), "$dst = $src1 + $src2;", []>; -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2), "$dst += $src2;", []>; @@ -802,7 +802,7 @@ def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2), } -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2), "$dst *= $src2;", [(set D:$dst, (mul D:$src1, D:$src2))]>; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 5153ace..06e95de 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -48,17 +48,6 @@ BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -const TargetRegisterClass* const *BlackfinRegisterInfo:: -getCalleeSavedRegClasses(const MachineFunction *MF) const { - using namespace BF; - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &PRegClass, - &DRegClass, &DRegClass, &DRegClass, &DRegClass, - &PRegClass, &PRegClass, &PRegClass, - 0 }; - return CalleeSavedRegClasses; -} - BitVector BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const { using namespace BF; @@ -86,25 +75,6 @@ BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -const TargetRegisterClass* -BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { - assert(isPhysicalRegister(reg) && "reg must be a physical register"); - - // Pick the smallest register class of the right type that contains - // this physreg. - const TargetRegisterClass* BestRC = 0; - for (regclass_iterator I = regclass_begin(), E = regclass_end(); - I != E; ++I) { - const TargetRegisterClass* RC = *I; - if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && - (!BestRC || RC->getNumRegs() < BestRC->getNumRegs())) - BestRC = RC; - } - - assert(BestRC && "Couldn't find the register class"); - return BestRC; -} - // hasFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index 03c5450..ead0b4a 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -33,9 +33,6 @@ namespace llvm { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; // getSubReg implemented by tablegen @@ -44,9 +41,6 @@ namespace llvm { return &BF::PRegClass; } - const TargetRegisterClass *getPhysicalRegisterRegClass(unsigned reg, - EVT VT) const; - bool hasFP(const MachineFunction &MF) const; // bool hasReservedCallFrame(MachineFunction &MF) const; diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 55b8aaa..e8d8474 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -264,7 +264,7 @@ namespace { // static const AllocaInst *isDirectAlloca(const Value *V) { const AllocaInst *AI = dyn_cast<AllocaInst>(V); - if (!AI) return false; + if (!AI) return 0; if (AI->isArrayAllocation()) return 0; // FIXME: we can also inline fixed size array allocas! if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock()) @@ -2889,7 +2889,7 @@ void CWriter::visitCallInst(CallInst &I) { bool hasByVal = I.hasByValArgument(); bool isStructRet = I.hasStructRetAttr(); if (isStructRet) { - writeOperandDeref(I.getOperand(1)); + writeOperandDeref(I.getArgOperand(0)); Out << " = "; } @@ -2944,8 +2944,8 @@ void CWriter::visitCallInst(CallInst &I) { } unsigned NumDeclaredParams = FTy->getNumParams(); - - CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end(); + CallSite CS(&I); + CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); unsigned ArgNo = 0; if (isStructRet) { // Skip struct return argument. ++AI; @@ -2999,7 +2999,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << "0; "; Out << "va_start(*(va_list*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; // Output the last argument to the enclosing function. if (I.getParent()->getParent()->arg_empty()) @@ -3009,9 +3009,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << ')'; return true; case Intrinsic::vaend: - if (!isa<ConstantPointerNull>(I.getOperand(1))) { + if (!isa<ConstantPointerNull>(I.getArgOperand(0))) { Out << "0; va_end(*(va_list*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; } else { Out << "va_end(*(va_list*)0)"; @@ -3020,47 +3020,47 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, case Intrinsic::vacopy: Out << "0; "; Out << "va_copy(*(va_list*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", *(va_list*)"; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ')'; return true; case Intrinsic::returnaddress: Out << "__builtin_return_address("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; return true; case Intrinsic::frameaddress: Out << "__builtin_frame_address("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; return true; case Intrinsic::powi: Out << "__builtin_powi("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ')'; return true; case Intrinsic::setjmp: Out << "setjmp(*(jmp_buf*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; return true; case Intrinsic::longjmp: Out << "longjmp(*(jmp_buf*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ')'; return true; case Intrinsic::prefetch: Out << "LLVM_PREFETCH((const void *)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ", "; - writeOperand(I.getOperand(3)); + writeOperand(I.getArgOperand(2)); Out << ")"; return true; case Intrinsic::stacksave: @@ -3077,7 +3077,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, printType(Out, I.getType()); Out << ')'; // Multiple GCC builtins multiplex onto this intrinsic. - switch (cast<ConstantInt>(I.getOperand(3))->getZExtValue()) { + switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) { default: llvm_unreachable("Invalid llvm.x86.sse.cmp!"); case 0: Out << "__builtin_ia32_cmpeq"; break; case 1: Out << "__builtin_ia32_cmplt"; break; @@ -3098,9 +3098,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << 'd'; Out << "("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ")"; return true; case Intrinsic::ppc_altivec_lvsl: @@ -3108,7 +3108,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, printType(Out, I.getType()); Out << ')'; Out << "__builtin_altivec_lvsl(0, (void*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ")"; return true; } @@ -3221,7 +3221,7 @@ void CWriter::visitInlineAsm(CallInst &CI) { DestVal = ResultVals[ValueCount].first; DestValNo = ResultVals[ValueCount].second; } else - DestVal = CI.getOperand(ValueCount-ResultVals.size()+1); + DestVal = CI.getArgOperand(ValueCount-ResultVals.size()); if (I->isEarlyClobber) C = "&"+C; @@ -3255,7 +3255,7 @@ void CWriter::visitInlineAsm(CallInst &CI) { } assert(ValueCount >= ResultVals.size() && "Input can't refer to result"); - Value *SrcVal = CI.getOperand(ValueCount-ResultVals.size()+1); + Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size()); Out << "\"" << C << "\"("; if (!I->isIndirect) diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td index 10dc837..ec2f663 100644 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -34,76 +34,19 @@ def RetCC_SPU : CallingConv<[ //===----------------------------------------------------------------------===// // CellSPU Argument Calling Conventions -// (note: this isn't used, but presumably should be at some point when other -// targets do.) //===----------------------------------------------------------------------===// -/* -def CC_SPU : CallingConv<[ - CCIfType<[i8], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[i16], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[i64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[f64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[v16i8, v8i16, v4i32, v4f32, v2i64, v2f64], - CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - +def CCC_SPU : CallingConv<[ + CCIfType<[i8, i16, i32, i64, i128, f32, f64, + v16i8, v8i16, v4i32, v4f32, v2i64, v2f64], + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74, + R75, R76, R77, R78, R79]>>, // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, @@ -112,4 +55,3 @@ def CC_SPU : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>> ]>; -*/ diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameInfo.h index e8ca333..f511acd 100644 --- a/lib/Target/CellSPU/SPUFrameInfo.h +++ b/lib/Target/CellSPU/SPUFrameInfo.h @@ -53,10 +53,6 @@ namespace llvm { static int minStackSize() { return (2 * stackSlotSize()); } - //! Frame size required to spill all registers plus frame info - static int fullSpillSize() { - return (SPURegisterInfo::getNumArgRegs() * stackSlotSize()); - } //! Convert frame index to stack offset static int FItoStackOffset(int frame_index) { return frame_index * stackSlotSize(); diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 9afdb2b..9b8c2dd 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -275,7 +275,6 @@ namespace { SDNode *emitBuildVector(SDNode *bvNode) { EVT vecVT = bvNode->getValueType(0); - EVT eltVT = vecVT.getVectorElementType(); DebugLoc dl = bvNode->getDebugLoc(); // Check to see if this vector can be represented as a CellSPU immediate @@ -606,18 +605,14 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, Base = CurDAG->getTargetConstant(0, N.getValueType()); Index = N; return true; - } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) { + } else if (Opc == ISD::Register + ||Opc == ISD::CopyFromReg + ||Opc == ISD::UNDEF) { unsigned OpOpc = Op->getOpcode(); if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { // Direct load/store without getelementptr - SDValue Addr, Offs; - - // Get the register from CopyFromReg - if (Opc == ISD::CopyFromReg) - Addr = N.getOperand(1); - else - Addr = N; // Register + SDValue Offs; Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2)); @@ -626,7 +621,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, Offs = CurDAG->getTargetConstant(0, Offs.getValueType()); Base = Offs; - Index = Addr; + Index = N; return true; } } else { diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 081e8d0..ece19b9 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -953,7 +953,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); + SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + PtrVT, GSDN->getOffset()); const TargetMachine &TM = DAG.getTarget(); SDValue Zero = DAG.getConstant(0, PtrVT); // FIXME there is no actual debug info here @@ -1013,22 +1014,26 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, MachineRegisterInfo &RegInfo = MF.getRegInfo(); SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>(); - const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); - const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); - unsigned ArgOffset = SPUFrameInfo::minStackSize(); unsigned ArgRegIdx = 0; unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + // FIXME: allow for other calling conventions + CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); + // Add DAG nodes to load the arguments or copy them out of registers. for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; SDValue ArgVal; + CCValAssign &VA = ArgLocs[ArgNo]; - if (ArgRegIdx < NumArgRegs) { + if (VA.isRegLoc()) { const TargetRegisterClass *ArgRegClass; switch (ObjectVT.getSimpleVT().SimpleTy) { @@ -1067,14 +1072,14 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, } unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); - RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); + RegInfo.addLiveIn(VA.getLocReg(), VReg); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++ArgRegIdx; } else { // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type // or we're forced to do vararg - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false); + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); ArgOffset += StackSlotSize; @@ -1087,16 +1092,31 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // vararg handling: if (isVarArg) { - // unsigned int ptr_size = PtrVT.getSizeInBits() / 8; + // FIXME: we should be able to query the argument registers from + // tablegen generated code. + static const unsigned ArgRegs[] = { + SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, + SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, + SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, + SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, + SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, + SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, + SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, + SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, + SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, + SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, + SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 + }; + // size of ArgRegs array + unsigned NumArgRegs = 77; + // We will spill (79-3)+1 registers to the stack SmallVector<SDValue, 79-3+1> MemOps; // Create the frame slot - for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { FuncInfo->setVarArgsFrameIndex( - MFI->CreateFixedObject(StackSlotSize, ArgOffset, - true, false)); + MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); @@ -1135,6 +1155,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -1144,8 +1165,15 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); unsigned NumOps = Outs.size(); unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); - const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); - const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + // FIXME: allow for other calling conventions + CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); + + const unsigned NumArgRegs = ArgLocs.size(); + // Handy pointer type EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -1165,8 +1193,9 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // And the arguments passed on the stack SmallVector<SDValue, 8> MemOpChains; - for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; + for (; ArgRegIdx != NumOps; ++ArgRegIdx) { + SDValue Arg = OutVals[ArgRegIdx]; + CCValAssign &VA = ArgLocs[ArgRegIdx]; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. @@ -1180,24 +1209,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case MVT::i32: case MVT::i64: case MVT::i128: - if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); - } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, - false, false, 0)); - ArgOffset += StackSlotSize; - } - break; case MVT::f32: case MVT::f64: - if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); - } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, - false, false, 0)); - ArgOffset += StackSlotSize; - } - break; case MVT::v2i64: case MVT::v2f64: case MVT::v4f32: @@ -1205,7 +1218,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case MVT::v8i16: case MVT::v16i8: if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, false, false, 0)); @@ -1249,7 +1262,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const GlobalValue *GV = G->getGlobal(); EVT CalleeVT = Callee.getValueType(); SDValue Zero = DAG.getConstant(0, PtrVT); - SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); if (!ST->usingLargeMem()) { // Turn calls to targets that are defined (i.e., have bodies) into BRSL @@ -1355,6 +1368,7 @@ SDValue SPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; @@ -1376,7 +1390,7 @@ SPUTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); Flag = Chain.getValue(1); } @@ -1746,15 +1760,20 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { unsigned V0Elt = 0; bool monotonic = true; bool rotate = true; + EVT maskVT; // which of the c?d instructions to use if (EltVT == MVT::i8) { V2EltIdx0 = 16; + maskVT = MVT::v16i8; } else if (EltVT == MVT::i16) { V2EltIdx0 = 8; + maskVT = MVT::v8i16; } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { V2EltIdx0 = 4; + maskVT = MVT::v4i32; } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { V2EltIdx0 = 2; + maskVT = MVT::v2i64; } else llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); @@ -1786,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { } else { rotate = false; } - } else if (PrevElt == 0) { + } else if (i == 0) { // First time through, need to keep track of previous element PrevElt = SrcElt; } else { @@ -1798,18 +1817,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (EltsFromV2 == 1 && monotonic) { // Compute mask and shuffle - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - // Initialize temporary register to 0 - SDValue InitTempReg = - DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT)); - // Copy register's contents as index in SHUFFLE_MASK: - SDValue ShufMaskOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32, - DAG.getTargetConstant(V2Elt, MVT::i32), - DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT)); + + // As SHUFFLE_MASK becomes a c?d instruction, feed it an address + // R1 ($sp) is used here only as it is guaranteed to have last bits zero + SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + DAG.getConstant(V2Elt, MVT::i32)); + SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, + maskVT, Pointer); + // Use shuffle mask in SHUFB synthetic instruction: return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, ShufMaskOp); @@ -2056,14 +2073,19 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); - ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); - assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); + // use 0 when the lane to insert to is 'undef' + int64_t Idx=0; + if (IdxOp.getOpcode() != ISD::UNDEF) { + ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); + assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); + Idx = (CN->getSExtValue()); + } EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Use $sp ($1) because it's always 16-byte aligned and it's available: SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(CN->getSExtValue(), PtrVT)); + DAG.getConstant(Idx, PtrVT)); SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); SDValue result = @@ -2862,7 +2884,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const case SPUISD::IndirectAddr: { if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (CN != 0 && CN->getZExtValue() == 0) { + if (CN != 0 && CN->isNullValue()) { // (SPUindirect (SPUaform <addr>, 0), 0) -> // (SPUaform <addr>, 0) @@ -3056,12 +3078,10 @@ SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, void SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { // Default, for the time being, to the base class handler - TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory, - Ops, DAG); + TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG); } /// isLegalAddressImmediate - Return true if the integer value can be used diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 9ebd442..6d3c90b 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -134,7 +134,6 @@ namespace llvm { EVT VT) const; void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; @@ -160,6 +159,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -168,6 +168,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 4c53c98..69aa088 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -164,11 +164,9 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && "invalid SPU OR<type>_<vec> or LR instruction!"); - if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { sourceReg = MI.getOperand(1).getReg(); destReg = MI.getOperand(0).getReg(); return true; - } break; } case SPU::ORv16i8: @@ -251,40 +249,18 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } -bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const +void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { // We support cross register class moves for our aliases, such as R3 in any // reg class to any other reg class containing R3. This is required because // we instruction select bitconvert i64 -> f64 as a noop for example, so our // types have no specific meaning. - if (DestRC == SPU::R8CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R16CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr16), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R32CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr32), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R32FPRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRf32), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R64CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr64), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R64FPRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRf64), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::GPRCRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr128), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::VECREGRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRv16i8), DestReg).addReg(SrcReg); - } else { - // Attempt to copy unknown/unsupported register class! - return false; - } - - return true; + BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } void @@ -356,88 +332,6 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx); } -//! Return true if the specified load or store can be folded -bool -SPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { - if (Ops.size() != 1) return false; - - // Make sure this is a reg-reg copy. - unsigned Opc = MI->getOpcode(); - - switch (Opc) { - case SPU::ORv16i8: - case SPU::ORv8i16: - case SPU::ORv4i32: - case SPU::ORv2i64: - case SPU::ORr8: - case SPU::ORr16: - case SPU::ORr32: - case SPU::ORr64: - case SPU::ORf32: - case SPU::ORf64: - if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) - return true; - break; - } - - return false; -} - -/// foldMemoryOperand - SPU, like PPC, can only fold spills into -/// copy instructions, turning them into load/store instructions. -MachineInstr * -SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const -{ - if (Ops.size() != 1) return 0; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = 0; - - switch (Opc) { - case SPU::ORv16i8: - case SPU::ORv8i16: - case SPU::ORv4i32: - case SPU::ORv2i64: - case SPU::ORr8: - case SPU::ORr16: - case SPU::ORr32: - case SPU::ORr64: - case SPU::ORf32: - case SPU::ORf64: - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - if (FrameIndex < SPUFrameInfo::maxFrameOffset()) { - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), - get(SPU::STQDr32)); - - MIB.addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef)); - NewMI = addFrameReference(MIB, FrameIndex); - } - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)); - - MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)); - Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset()) - ? SPU::STQDr32 : SPU::STQXr32; - NewMI = addFrameReference(MIB, FrameIndex); - break; - } - } - - return NewMI; -} - //! Branch analysis /*! \note This code was kiped from PPC. There may be more branch analysis for @@ -554,9 +448,8 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -566,14 +459,14 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (FBB == 0) { if (Cond.empty()) { // Unconditional branch - MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(SPU::BR)); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(SPU::BR)); MIB.addMBB(TBB); DEBUG(errs() << "Inserted one-way uncond branch: "); DEBUG((*MIB).dump()); } else { // Conditional branch - MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm())); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); MIB.addReg(Cond[1].getReg()).addMBB(TBB); DEBUG(errs() << "Inserted one-way cond branch: "); @@ -581,8 +474,8 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } return 1; } else { - MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm())); - MachineInstrBuilder MIB2 = BuildMI(&MBB, dl, get(SPU::BR)); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR)); // Two-way Conditional Branch. MIB.addReg(Cond[1].getReg()).addMBB(TBB); diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index 6dabd7c..fbb1733 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -23,19 +23,6 @@ namespace llvm { class SPUInstrInfo : public TargetInstrInfoImpl { SPUTargetMachine &TM; const SPURegisterInfo RI; - protected: - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - public: explicit SPUInstrInfo(SPUTargetMachine &tm); @@ -56,12 +43,10 @@ namespace llvm { unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; //! Store a register to a stack slot, based on its register class. virtual void storeRegToStackSlot(MachineBasicBlock &MBB, @@ -77,11 +62,6 @@ namespace llvm { const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - //! Return true if the specified load or store can be folded - virtual - bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const; - //! Reverses a branch's condition, returning false on success. virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; @@ -94,8 +74,9 @@ namespace llvm { virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; }; } diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 846c7ed..647da30 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -21,7 +21,7 @@ def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>; def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq, [SDNPHasChain, SDNPOutFlag]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; //===----------------------------------------------------------------------===// // Operand constraints: //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index d8937ec..f7cfa42 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -191,33 +191,6 @@ SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, { } -// SPU's 128-bit registers used for argument passing: -static const unsigned SPU_ArgRegs[] = { - SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, - SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, - SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, - SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, - SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, - SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, - SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, - SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, - SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, - SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, - SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 -}; - -const unsigned * -SPURegisterInfo::getArgRegs() -{ - return SPU_ArgRegs; -} - -unsigned -SPURegisterInfo::getNumArgRegs() -{ - return sizeof(SPU_ArgRegs) / sizeof(SPU_ArgRegs[0]); -} - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * @@ -251,36 +224,6 @@ SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const return SPU_CalleeSaveRegs; } -const TargetRegisterClass* const* -SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const -{ - // Cell ABI Calling Convention - static const TargetRegisterClass * const SPU_CalleeSaveRegClasses[] = { - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, /* environment pointer */ - &SPU::GPRCRegClass, /* stack pointer */ - &SPU::GPRCRegClass, /* link register */ - 0 /* end */ - }; - - return SPU_CalleeSaveRegClasses; -} - /*! R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is generally unused) are the Cell's reserved registers diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 0a70318..7a6ae6d 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -49,10 +49,6 @@ namespace llvm { //! Return the array of callee-saved registers virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const; - //! Return the register class array of the callee-saved registers - virtual const TargetRegisterClass* const * - getCalleeSavedRegClasses(const MachineFunction *MF) const; - //! Allow for scavenging, so we can get scratch registers when needed. virtual bool requiresRegisterScavenging(const MachineFunction &MF) const { return true; } @@ -90,15 +86,6 @@ namespace llvm { // New methods added: //------------------------------------------------------------------------ - //! Return the array of argument passing registers - /*! - \note The size of this array is returned by getArgRegsSize(). - */ - static const unsigned *getArgRegs(); - - //! Return the size of the argument passing register array - static unsigned getNumArgRegs(); - //! Get DWARF debugging register number int getDwarfRegNum(unsigned RegNum, bool isEH) const; diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 45a0c84..145568a 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -99,11 +99,12 @@ namespace { ValueSet DefinedValues; ForwardRefMap ForwardRefs; bool is_inline; + unsigned indent_level; public: static char ID; explicit CppWriter(formatted_raw_ostream &o) : - ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false) {} + ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){} virtual const char *getPassName() const { return "C++ backend"; } @@ -120,6 +121,11 @@ namespace { void error(const std::string& msg); + + formatted_raw_ostream& nl(formatted_raw_ostream &Out, int delta = 0); + inline void in() { indent_level++; } + inline void out() { if (indent_level >0) indent_level--; } + private: void printLinkageType(GlobalValue::LinkageTypes LT); void printVisibilityType(GlobalValue::VisibilityTypes VisTypes); @@ -153,1857 +159,1856 @@ namespace { void printModuleBody(); }; +} // end anonymous namespace. + +formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) { + Out << '\n'; + if (delta >= 0 || indent_level >= unsigned(-delta)) + indent_level += delta; + Out.indent(indent_level); + return Out; +} + +static inline void sanitize(std::string &str) { + for (size_t i = 0; i < str.length(); ++i) + if (!isalnum(str[i]) && str[i] != '_') + str[i] = '_'; +} - static unsigned indent_level = 0; - inline formatted_raw_ostream& nl(formatted_raw_ostream& Out, int delta = 0) { - Out << "\n"; - if (delta >= 0 || indent_level >= unsigned(-delta)) - indent_level += delta; - for (unsigned i = 0; i < indent_level; ++i) - Out << " "; - return Out; +static std::string getTypePrefix(const Type *Ty) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: return "void_"; + case Type::IntegerTyID: + return "int" + utostr(cast<IntegerType>(Ty)->getBitWidth()) + "_"; + case Type::FloatTyID: return "float_"; + case Type::DoubleTyID: return "double_"; + case Type::LabelTyID: return "label_"; + case Type::FunctionTyID: return "func_"; + case Type::StructTyID: return "struct_"; + case Type::ArrayTyID: return "array_"; + case Type::PointerTyID: return "ptr_"; + case Type::VectorTyID: return "packed_"; + case Type::OpaqueTyID: return "opaque_"; + default: return "other_"; } + return "unknown_"; +} - inline void in() { indent_level++; } - inline void out() { if (indent_level >0) indent_level--; } +// Looks up the type in the symbol table and returns a pointer to its name or +// a null pointer if it wasn't found. Note that this isn't the same as the +// Mode::getTypeName function which will return an empty string, not a null +// pointer if the name is not found. +static const std::string * +findTypeName(const TypeSymbolTable& ST, const Type* Ty) { + TypeSymbolTable::const_iterator TI = ST.begin(); + TypeSymbolTable::const_iterator TE = ST.end(); + for (;TI != TE; ++TI) + if (TI->second == Ty) + return &(TI->first); + return 0; +} - inline void - sanitize(std::string& str) { - for (size_t i = 0; i < str.length(); ++i) - if (!isalnum(str[i]) && str[i] != '_') - str[i] = '_'; - } +void CppWriter::error(const std::string& msg) { + report_fatal_error(msg); +} - inline std::string - getTypePrefix(const Type* Ty ) { - switch (Ty->getTypeID()) { - case Type::VoidTyID: return "void_"; - case Type::IntegerTyID: - return std::string("int") + utostr(cast<IntegerType>(Ty)->getBitWidth()) + - "_"; - case Type::FloatTyID: return "float_"; - case Type::DoubleTyID: return "double_"; - case Type::LabelTyID: return "label_"; - case Type::FunctionTyID: return "func_"; - case Type::StructTyID: return "struct_"; - case Type::ArrayTyID: return "array_"; - case Type::PointerTyID: return "ptr_"; - case Type::VectorTyID: return "packed_"; - case Type::OpaqueTyID: return "opaque_"; - default: return "other_"; - } - return "unknown_"; - } - - // Looks up the type in the symbol table and returns a pointer to its name or - // a null pointer if it wasn't found. Note that this isn't the same as the - // Mode::getTypeName function which will return an empty string, not a null - // pointer if the name is not found. - inline const std::string* - findTypeName(const TypeSymbolTable& ST, const Type* Ty) { - TypeSymbolTable::const_iterator TI = ST.begin(); - TypeSymbolTable::const_iterator TE = ST.end(); - for (;TI != TE; ++TI) - if (TI->second == Ty) - return &(TI->first); - return 0; - } - - void CppWriter::error(const std::string& msg) { - report_fatal_error(msg); - } - - // printCFP - Print a floating point constant .. very carefully :) - // This makes sure that conversion to/from floating yields the same binary - // result so that we don't lose precision. - void CppWriter::printCFP(const ConstantFP *CFP) { - bool ignored; - APFloat APF = APFloat(CFP->getValueAPF()); // copy - if (CFP->getType() == Type::getFloatTy(CFP->getContext())) - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); - Out << "ConstantFP::get(mod->getContext(), "; - Out << "APFloat("; +// printCFP - Print a floating point constant .. very carefully :) +// This makes sure that conversion to/from floating yields the same binary +// result so that we don't lose precision. +void CppWriter::printCFP(const ConstantFP *CFP) { + bool ignored; + APFloat APF = APFloat(CFP->getValueAPF()); // copy + if (CFP->getType() == Type::getFloatTy(CFP->getContext())) + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); + Out << "ConstantFP::get(mod->getContext(), "; + Out << "APFloat("; #if HAVE_PRINTF_A - char Buffer[100]; - sprintf(Buffer, "%A", APF.convertToDouble()); - if ((!strncmp(Buffer, "0x", 2) || - !strncmp(Buffer, "-0x", 3) || - !strncmp(Buffer, "+0x", 3)) && - APF.bitwiseIsEqual(APFloat(atof(Buffer)))) { - if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) - Out << "BitsToDouble(" << Buffer << ")"; - else - Out << "BitsToFloat((float)" << Buffer << ")"; - Out << ")"; - } else { + char Buffer[100]; + sprintf(Buffer, "%A", APF.convertToDouble()); + if ((!strncmp(Buffer, "0x", 2) || + !strncmp(Buffer, "-0x", 3) || + !strncmp(Buffer, "+0x", 3)) && + APF.bitwiseIsEqual(APFloat(atof(Buffer)))) { + if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) + Out << "BitsToDouble(" << Buffer << ")"; + else + Out << "BitsToFloat((float)" << Buffer << ")"; + Out << ")"; + } else { #endif - std::string StrVal = ftostr(CFP->getValueAPF()); - - while (StrVal[0] == ' ') - StrVal.erase(StrVal.begin()); - - // Check to make sure that the stringized number is not some string like - // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex. - if (((StrVal[0] >= '0' && StrVal[0] <= '9') || - ((StrVal[0] == '-' || StrVal[0] == '+') && - (StrVal[1] >= '0' && StrVal[1] <= '9'))) && - (CFP->isExactlyValue(atof(StrVal.c_str())))) { - if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) - Out << StrVal; - else - Out << StrVal << "f"; - } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) - Out << "BitsToDouble(0x" - << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue()) - << "ULL) /* " << StrVal << " */"; + std::string StrVal = ftostr(CFP->getValueAPF()); + + while (StrVal[0] == ' ') + StrVal.erase(StrVal.begin()); + + // Check to make sure that the stringized number is not some string like + // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex. + if (((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) && + (CFP->isExactlyValue(atof(StrVal.c_str())))) { + if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) + Out << StrVal; else - Out << "BitsToFloat(0x" - << utohexstr((uint32_t)CFP->getValueAPF(). - bitcastToAPInt().getZExtValue()) - << "U) /* " << StrVal << " */"; - Out << ")"; + Out << StrVal << "f"; + } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) + Out << "BitsToDouble(0x" + << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue()) + << "ULL) /* " << StrVal << " */"; + else + Out << "BitsToFloat(0x" + << utohexstr((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue()) + << "U) /* " << StrVal << " */"; + Out << ")"; #if HAVE_PRINTF_A - } + } #endif - Out << ")"; + Out << ")"; +} + +void CppWriter::printCallingConv(CallingConv::ID cc){ + // Print the calling convention. + switch (cc) { + case CallingConv::C: Out << "CallingConv::C"; break; + case CallingConv::Fast: Out << "CallingConv::Fast"; break; + case CallingConv::Cold: Out << "CallingConv::Cold"; break; + case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break; + default: Out << cc; break; } +} - void CppWriter::printCallingConv(CallingConv::ID cc){ - // Print the calling convention. - switch (cc) { - case CallingConv::C: Out << "CallingConv::C"; break; - case CallingConv::Fast: Out << "CallingConv::Fast"; break; - case CallingConv::Cold: Out << "CallingConv::Cold"; break; - case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break; - default: Out << cc; break; - } +void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { + switch (LT) { + case GlobalValue::InternalLinkage: + Out << "GlobalValue::InternalLinkage"; break; + case GlobalValue::PrivateLinkage: + Out << "GlobalValue::PrivateLinkage"; break; + case GlobalValue::LinkerPrivateLinkage: + Out << "GlobalValue::LinkerPrivateLinkage"; break; + case GlobalValue::LinkerPrivateWeakLinkage: + Out << "GlobalValue::LinkerPrivateWeakLinkage"; break; + case GlobalValue::AvailableExternallyLinkage: + Out << "GlobalValue::AvailableExternallyLinkage "; break; + case GlobalValue::LinkOnceAnyLinkage: + Out << "GlobalValue::LinkOnceAnyLinkage "; break; + case GlobalValue::LinkOnceODRLinkage: + Out << "GlobalValue::LinkOnceODRLinkage "; break; + case GlobalValue::WeakAnyLinkage: + Out << "GlobalValue::WeakAnyLinkage"; break; + case GlobalValue::WeakODRLinkage: + Out << "GlobalValue::WeakODRLinkage"; break; + case GlobalValue::AppendingLinkage: + Out << "GlobalValue::AppendingLinkage"; break; + case GlobalValue::ExternalLinkage: + Out << "GlobalValue::ExternalLinkage"; break; + case GlobalValue::DLLImportLinkage: + Out << "GlobalValue::DLLImportLinkage"; break; + case GlobalValue::DLLExportLinkage: + Out << "GlobalValue::DLLExportLinkage"; break; + case GlobalValue::ExternalWeakLinkage: + Out << "GlobalValue::ExternalWeakLinkage"; break; + case GlobalValue::CommonLinkage: + Out << "GlobalValue::CommonLinkage"; break; } +} - void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { - switch (LT) { - case GlobalValue::InternalLinkage: - Out << "GlobalValue::InternalLinkage"; break; - case GlobalValue::PrivateLinkage: - Out << "GlobalValue::PrivateLinkage"; break; - case GlobalValue::LinkerPrivateLinkage: - Out << "GlobalValue::LinkerPrivateLinkage"; break; - case GlobalValue::AvailableExternallyLinkage: - Out << "GlobalValue::AvailableExternallyLinkage "; break; - case GlobalValue::LinkOnceAnyLinkage: - Out << "GlobalValue::LinkOnceAnyLinkage "; break; - case GlobalValue::LinkOnceODRLinkage: - Out << "GlobalValue::LinkOnceODRLinkage "; break; - case GlobalValue::WeakAnyLinkage: - Out << "GlobalValue::WeakAnyLinkage"; break; - case GlobalValue::WeakODRLinkage: - Out << "GlobalValue::WeakODRLinkage"; break; - case GlobalValue::AppendingLinkage: - Out << "GlobalValue::AppendingLinkage"; break; - case GlobalValue::ExternalLinkage: - Out << "GlobalValue::ExternalLinkage"; break; - case GlobalValue::DLLImportLinkage: - Out << "GlobalValue::DLLImportLinkage"; break; - case GlobalValue::DLLExportLinkage: - Out << "GlobalValue::DLLExportLinkage"; break; - case GlobalValue::ExternalWeakLinkage: - Out << "GlobalValue::ExternalWeakLinkage"; break; - case GlobalValue::CommonLinkage: - Out << "GlobalValue::CommonLinkage"; break; - } +void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { + switch (VisType) { + default: llvm_unreachable("Unknown GVar visibility"); + case GlobalValue::DefaultVisibility: + Out << "GlobalValue::DefaultVisibility"; + break; + case GlobalValue::HiddenVisibility: + Out << "GlobalValue::HiddenVisibility"; + break; + case GlobalValue::ProtectedVisibility: + Out << "GlobalValue::ProtectedVisibility"; + break; } +} - void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { - switch (VisType) { - default: llvm_unreachable("Unknown GVar visibility"); - case GlobalValue::DefaultVisibility: - Out << "GlobalValue::DefaultVisibility"; - break; - case GlobalValue::HiddenVisibility: - Out << "GlobalValue::HiddenVisibility"; - break; - case GlobalValue::ProtectedVisibility: - Out << "GlobalValue::ProtectedVisibility"; - break; +// printEscapedString - Print each character of the specified string, escaping +// it if it is not printable or if it is an escape char. +void CppWriter::printEscapedString(const std::string &Str) { + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + unsigned char C = Str[i]; + if (isprint(C) && C != '"' && C != '\\') { + Out << C; + } else { + Out << "\\x" + << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')) + << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); } } +} - // printEscapedString - Print each character of the specified string, escaping - // it if it is not printable or if it is an escape char. - void CppWriter::printEscapedString(const std::string &Str) { - for (unsigned i = 0, e = Str.size(); i != e; ++i) { - unsigned char C = Str[i]; - if (isprint(C) && C != '"' && C != '\\') { - Out << C; - } else { - Out << "\\x" - << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')) - << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); - } +std::string CppWriter::getCppName(const Type* Ty) { + // First, handle the primitive types .. easy + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())"; + case Type::IntegerTyID: { + unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth(); + return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")"; + } + case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())"; + case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())"; + case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())"; + case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())"; + default: + error("Invalid primitive type"); + break; } + // shouldn't be returned, but make it sensible + return "Type::getVoidTy(mod->getContext())"; } - std::string CppWriter::getCppName(const Type* Ty) { - // First, handle the primitive types .. easy - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { - switch (Ty->getTypeID()) { - case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())"; - case Type::IntegerTyID: { - unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth(); - return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")"; - } - case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())"; - case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())"; - case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())"; - case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())"; - default: - error("Invalid primitive type"); - break; - } - // shouldn't be returned, but make it sensible - return "Type::getVoidTy(mod->getContext())"; - } + // Now, see if we've seen the type before and return that + TypeMap::iterator I = TypeNames.find(Ty); + if (I != TypeNames.end()) + return I->second; + + // Okay, let's build a new name for this type. Start with a prefix + const char* prefix = 0; + switch (Ty->getTypeID()) { + case Type::FunctionTyID: prefix = "FuncTy_"; break; + case Type::StructTyID: prefix = "StructTy_"; break; + case Type::ArrayTyID: prefix = "ArrayTy_"; break; + case Type::PointerTyID: prefix = "PointerTy_"; break; + case Type::OpaqueTyID: prefix = "OpaqueTy_"; break; + case Type::VectorTyID: prefix = "VectorTy_"; break; + default: prefix = "OtherTy_"; break; // prevent breakage + } - // Now, see if we've seen the type before and return that - TypeMap::iterator I = TypeNames.find(Ty); - if (I != TypeNames.end()) - return I->second; + // See if the type has a name in the symboltable and build accordingly + const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty); + std::string name; + if (tName) + name = std::string(prefix) + *tName; + else + name = std::string(prefix) + utostr(uniqueNum++); + sanitize(name); + + // Save the name + return TypeNames[Ty] = name; +} - // Okay, let's build a new name for this type. Start with a prefix - const char* prefix = 0; - switch (Ty->getTypeID()) { - case Type::FunctionTyID: prefix = "FuncTy_"; break; - case Type::StructTyID: prefix = "StructTy_"; break; - case Type::ArrayTyID: prefix = "ArrayTy_"; break; - case Type::PointerTyID: prefix = "PointerTy_"; break; - case Type::OpaqueTyID: prefix = "OpaqueTy_"; break; - case Type::VectorTyID: prefix = "VectorTy_"; break; - default: prefix = "OtherTy_"; break; // prevent breakage - } +void CppWriter::printCppName(const Type* Ty) { + printEscapedString(getCppName(Ty)); +} - // See if the type has a name in the symboltable and build accordingly - const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty); - std::string name; - if (tName) - name = std::string(prefix) + *tName; - else - name = std::string(prefix) + utostr(uniqueNum++); - sanitize(name); - - // Save the name - return TypeNames[Ty] = name; - } - - void CppWriter::printCppName(const Type* Ty) { - printEscapedString(getCppName(Ty)); - } - - std::string CppWriter::getCppName(const Value* val) { - std::string name; - ValueMap::iterator I = ValueNames.find(val); - if (I != ValueNames.end() && I->first == val) - return I->second; - - if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) { - name = std::string("gvar_") + - getTypePrefix(GV->getType()->getElementType()); - } else if (isa<Function>(val)) { - name = std::string("func_"); - } else if (const Constant* C = dyn_cast<Constant>(val)) { - name = std::string("const_") + getTypePrefix(C->getType()); - } else if (const Argument* Arg = dyn_cast<Argument>(val)) { - if (is_inline) { - unsigned argNum = std::distance(Arg->getParent()->arg_begin(), - Function::const_arg_iterator(Arg)) + 1; - name = std::string("arg_") + utostr(argNum); - NameSet::iterator NI = UsedNames.find(name); - if (NI != UsedNames.end()) - name += std::string("_") + utostr(uniqueNum++); - UsedNames.insert(name); - return ValueNames[val] = name; - } else { - name = getTypePrefix(val->getType()); - } +std::string CppWriter::getCppName(const Value* val) { + std::string name; + ValueMap::iterator I = ValueNames.find(val); + if (I != ValueNames.end() && I->first == val) + return I->second; + + if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) { + name = std::string("gvar_") + + getTypePrefix(GV->getType()->getElementType()); + } else if (isa<Function>(val)) { + name = std::string("func_"); + } else if (const Constant* C = dyn_cast<Constant>(val)) { + name = std::string("const_") + getTypePrefix(C->getType()); + } else if (const Argument* Arg = dyn_cast<Argument>(val)) { + if (is_inline) { + unsigned argNum = std::distance(Arg->getParent()->arg_begin(), + Function::const_arg_iterator(Arg)) + 1; + name = std::string("arg_") + utostr(argNum); + NameSet::iterator NI = UsedNames.find(name); + if (NI != UsedNames.end()) + name += std::string("_") + utostr(uniqueNum++); + UsedNames.insert(name); + return ValueNames[val] = name; } else { name = getTypePrefix(val->getType()); } - if (val->hasName()) - name += val->getName(); - else - name += utostr(uniqueNum++); - sanitize(name); - NameSet::iterator NI = UsedNames.find(name); - if (NI != UsedNames.end()) - name += std::string("_") + utostr(uniqueNum++); - UsedNames.insert(name); - return ValueNames[val] = name; + } else { + name = getTypePrefix(val->getType()); } + if (val->hasName()) + name += val->getName(); + else + name += utostr(uniqueNum++); + sanitize(name); + NameSet::iterator NI = UsedNames.find(name); + if (NI != UsedNames.end()) + name += std::string("_") + utostr(uniqueNum++); + UsedNames.insert(name); + return ValueNames[val] = name; +} - void CppWriter::printCppName(const Value* val) { - printEscapedString(getCppName(val)); - } +void CppWriter::printCppName(const Value* val) { + printEscapedString(getCppName(val)); +} - void CppWriter::printAttributes(const AttrListPtr &PAL, - const std::string &name) { - Out << "AttrListPtr " << name << "_PAL;"; - nl(Out); - if (!PAL.isEmpty()) { - Out << '{'; in(); nl(Out); - Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out); - Out << "AttributeWithIndex PAWI;"; nl(Out); - for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { - unsigned index = PAL.getSlot(i).Index; - Attributes attrs = PAL.getSlot(i).Attrs; - Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 "; +void CppWriter::printAttributes(const AttrListPtr &PAL, + const std::string &name) { + Out << "AttrListPtr " << name << "_PAL;"; + nl(Out); + if (!PAL.isEmpty()) { + Out << '{'; in(); nl(Out); + Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out); + Out << "AttributeWithIndex PAWI;"; nl(Out); + for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { + unsigned index = PAL.getSlot(i).Index; + Attributes attrs = PAL.getSlot(i).Attrs; + Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 "; #define HANDLE_ATTR(X) \ - if (attrs & Attribute::X) \ - Out << " | Attribute::" #X; \ - attrs &= ~Attribute::X; - - HANDLE_ATTR(SExt); - HANDLE_ATTR(ZExt); - HANDLE_ATTR(NoReturn); - HANDLE_ATTR(InReg); - HANDLE_ATTR(StructRet); - HANDLE_ATTR(NoUnwind); - HANDLE_ATTR(NoAlias); - HANDLE_ATTR(ByVal); - HANDLE_ATTR(Nest); - HANDLE_ATTR(ReadNone); - HANDLE_ATTR(ReadOnly); - HANDLE_ATTR(InlineHint); - HANDLE_ATTR(NoInline); - HANDLE_ATTR(AlwaysInline); - HANDLE_ATTR(OptimizeForSize); - HANDLE_ATTR(StackProtect); - HANDLE_ATTR(StackProtectReq); - HANDLE_ATTR(NoCapture); + if (attrs & Attribute::X) \ + Out << " | Attribute::" #X; \ + attrs &= ~Attribute::X; + + HANDLE_ATTR(SExt); + HANDLE_ATTR(ZExt); + HANDLE_ATTR(NoReturn); + HANDLE_ATTR(InReg); + HANDLE_ATTR(StructRet); + HANDLE_ATTR(NoUnwind); + HANDLE_ATTR(NoAlias); + HANDLE_ATTR(ByVal); + HANDLE_ATTR(Nest); + HANDLE_ATTR(ReadNone); + HANDLE_ATTR(ReadOnly); + HANDLE_ATTR(InlineHint); + HANDLE_ATTR(NoInline); + HANDLE_ATTR(AlwaysInline); + HANDLE_ATTR(OptimizeForSize); + HANDLE_ATTR(StackProtect); + HANDLE_ATTR(StackProtectReq); + HANDLE_ATTR(NoCapture); #undef HANDLE_ATTR - assert(attrs == 0 && "Unhandled attribute!"); - Out << ";"; - nl(Out); - Out << "Attrs.push_back(PAWI);"; - nl(Out); - } - Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());"; + assert(attrs == 0 && "Unhandled attribute!"); + Out << ";"; + nl(Out); + Out << "Attrs.push_back(PAWI);"; nl(Out); - out(); nl(Out); - Out << '}'; nl(Out); } + Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());"; + nl(Out); + out(); nl(Out); + Out << '}'; nl(Out); } +} - bool CppWriter::printTypeInternal(const Type* Ty) { - // We don't print definitions for primitive types - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) - return false; - - // If we already defined this type, we don't need to define it again. - if (DefinedTypes.find(Ty) != DefinedTypes.end()) - return false; - - // Everything below needs the name for the type so get it now. - std::string typeName(getCppName(Ty)); - - // Search the type stack for recursion. If we find it, then generate this - // as an OpaqueType, but make sure not to do this multiple times because - // the type could appear in multiple places on the stack. Once the opaque - // definition is issued, it must not be re-issued. Consequently we have to - // check the UnresolvedTypes list as well. - TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(), - Ty); - if (TI != TypeStack.end()) { - TypeMap::const_iterator I = UnresolvedTypes.find(Ty); - if (I == UnresolvedTypes.end()) { - Out << "PATypeHolder " << typeName; - Out << "_fwd = OpaqueType::get(mod->getContext());"; - nl(Out); - UnresolvedTypes[Ty] = typeName; - } - return true; - } +bool CppWriter::printTypeInternal(const Type* Ty) { + // We don't print definitions for primitive types + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) + return false; - // We're going to print a derived type which, by definition, contains other - // types. So, push this one we're printing onto the type stack to assist with - // recursive definitions. - TypeStack.push_back(Ty); + // If we already defined this type, we don't need to define it again. + if (DefinedTypes.find(Ty) != DefinedTypes.end()) + return false; - // Print the type definition - switch (Ty->getTypeID()) { - case Type::FunctionTyID: { - const FunctionType* FT = cast<FunctionType>(Ty); - Out << "std::vector<const Type*>" << typeName << "_args;"; + // Everything below needs the name for the type so get it now. + std::string typeName(getCppName(Ty)); + + // Search the type stack for recursion. If we find it, then generate this + // as an OpaqueType, but make sure not to do this multiple times because + // the type could appear in multiple places on the stack. Once the opaque + // definition is issued, it must not be re-issued. Consequently we have to + // check the UnresolvedTypes list as well. + TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(), + Ty); + if (TI != TypeStack.end()) { + TypeMap::const_iterator I = UnresolvedTypes.find(Ty); + if (I == UnresolvedTypes.end()) { + Out << "PATypeHolder " << typeName; + Out << "_fwd = OpaqueType::get(mod->getContext());"; nl(Out); - FunctionType::param_iterator PI = FT->param_begin(); - FunctionType::param_iterator PE = FT->param_end(); - for (; PI != PE; ++PI) { - const Type* argTy = static_cast<const Type*>(*PI); - bool isForward = printTypeInternal(argTy); - std::string argName(getCppName(argTy)); - Out << typeName << "_args.push_back(" << argName; - if (isForward) - Out << "_fwd"; - Out << ");"; - nl(Out); - } - bool isForward = printTypeInternal(FT->getReturnType()); - std::string retTypeName(getCppName(FT->getReturnType())); - Out << "FunctionType* " << typeName << " = FunctionType::get("; - in(); nl(Out) << "/*Result=*/" << retTypeName; + UnresolvedTypes[Ty] = typeName; + } + return true; + } + + // We're going to print a derived type which, by definition, contains other + // types. So, push this one we're printing onto the type stack to assist with + // recursive definitions. + TypeStack.push_back(Ty); + + // Print the type definition + switch (Ty->getTypeID()) { + case Type::FunctionTyID: { + const FunctionType* FT = cast<FunctionType>(Ty); + Out << "std::vector<const Type*>" << typeName << "_args;"; + nl(Out); + FunctionType::param_iterator PI = FT->param_begin(); + FunctionType::param_iterator PE = FT->param_end(); + for (; PI != PE; ++PI) { + const Type* argTy = static_cast<const Type*>(*PI); + bool isForward = printTypeInternal(argTy); + std::string argName(getCppName(argTy)); + Out << typeName << "_args.push_back(" << argName; if (isForward) Out << "_fwd"; - Out << ","; - nl(Out) << "/*Params=*/" << typeName << "_args,"; - nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");"; - out(); - nl(Out); - break; - } - case Type::StructTyID: { - const StructType* ST = cast<StructType>(Ty); - Out << "std::vector<const Type*>" << typeName << "_fields;"; - nl(Out); - StructType::element_iterator EI = ST->element_begin(); - StructType::element_iterator EE = ST->element_end(); - for (; EI != EE; ++EI) { - const Type* fieldTy = static_cast<const Type*>(*EI); - bool isForward = printTypeInternal(fieldTy); - std::string fieldName(getCppName(fieldTy)); - Out << typeName << "_fields.push_back(" << fieldName; - if (isForward) - Out << "_fwd"; - Out << ");"; - nl(Out); - } - Out << "StructType* " << typeName << " = StructType::get(" - << "mod->getContext(), " - << typeName << "_fields, /*isPacked=*/" - << (ST->isPacked() ? "true" : "false") << ");"; - nl(Out); - break; - } - case Type::ArrayTyID: { - const ArrayType* AT = cast<ArrayType>(Ty); - const Type* ET = AT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "ArrayType* " << typeName << " = ArrayType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(AT->getNumElements()) << ");"; - nl(Out); - break; - } - case Type::PointerTyID: { - const PointerType* PT = cast<PointerType>(Ty); - const Type* ET = PT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "PointerType* " << typeName << " = PointerType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(PT->getAddressSpace()) << ");"; - nl(Out); - break; - } - case Type::VectorTyID: { - const VectorType* PT = cast<VectorType>(Ty); - const Type* ET = PT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "VectorType* " << typeName << " = VectorType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(PT->getNumElements()) << ");"; - nl(Out); - break; - } - case Type::OpaqueTyID: { - Out << "OpaqueType* " << typeName; - Out << " = OpaqueType::get(mod->getContext());"; + Out << ");"; nl(Out); - break; - } - default: - error("Invalid TypeID"); } - - // If the type had a name, make sure we recreate it. - const std::string* progTypeName = - findTypeName(TheModule->getTypeSymbolTable(),Ty); - if (progTypeName) { - Out << "mod->addTypeName(\"" << *progTypeName << "\", " - << typeName << ");"; + bool isForward = printTypeInternal(FT->getReturnType()); + std::string retTypeName(getCppName(FT->getReturnType())); + Out << "FunctionType* " << typeName << " = FunctionType::get("; + in(); nl(Out) << "/*Result=*/" << retTypeName; + if (isForward) + Out << "_fwd"; + Out << ","; + nl(Out) << "/*Params=*/" << typeName << "_args,"; + nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");"; + out(); + nl(Out); + break; + } + case Type::StructTyID: { + const StructType* ST = cast<StructType>(Ty); + Out << "std::vector<const Type*>" << typeName << "_fields;"; + nl(Out); + StructType::element_iterator EI = ST->element_begin(); + StructType::element_iterator EE = ST->element_end(); + for (; EI != EE; ++EI) { + const Type* fieldTy = static_cast<const Type*>(*EI); + bool isForward = printTypeInternal(fieldTy); + std::string fieldName(getCppName(fieldTy)); + Out << typeName << "_fields.push_back(" << fieldName; + if (isForward) + Out << "_fwd"; + Out << ");"; nl(Out); } + Out << "StructType* " << typeName << " = StructType::get(" + << "mod->getContext(), " + << typeName << "_fields, /*isPacked=*/" + << (ST->isPacked() ? "true" : "false") << ");"; + nl(Out); + break; + } + case Type::ArrayTyID: { + const ArrayType* AT = cast<ArrayType>(Ty); + const Type* ET = AT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "ArrayType* " << typeName << " = ArrayType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(AT->getNumElements()) << ");"; + nl(Out); + break; + } + case Type::PointerTyID: { + const PointerType* PT = cast<PointerType>(Ty); + const Type* ET = PT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "PointerType* " << typeName << " = PointerType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(PT->getAddressSpace()) << ");"; + nl(Out); + break; + } + case Type::VectorTyID: { + const VectorType* PT = cast<VectorType>(Ty); + const Type* ET = PT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "VectorType* " << typeName << " = VectorType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(PT->getNumElements()) << ");"; + nl(Out); + break; + } + case Type::OpaqueTyID: { + Out << "OpaqueType* " << typeName; + Out << " = OpaqueType::get(mod->getContext());"; + nl(Out); + break; + } + default: + error("Invalid TypeID"); + } - // Pop us off the type stack - TypeStack.pop_back(); + // If the type had a name, make sure we recreate it. + const std::string* progTypeName = + findTypeName(TheModule->getTypeSymbolTable(),Ty); + if (progTypeName) { + Out << "mod->addTypeName(\"" << *progTypeName << "\", " + << typeName << ");"; + nl(Out); + } - // Indicate that this type is now defined. - DefinedTypes.insert(Ty); + // Pop us off the type stack + TypeStack.pop_back(); - // Early resolve as many unresolved types as possible. Search the unresolved - // types map for the type we just printed. Now that its definition is complete - // we can resolve any previous references to it. This prevents a cascade of - // unresolved types. - TypeMap::iterator I = UnresolvedTypes.find(Ty); - if (I != UnresolvedTypes.end()) { - Out << "cast<OpaqueType>(" << I->second - << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");"; - nl(Out); - Out << I->second << " = cast<"; - switch (Ty->getTypeID()) { - case Type::FunctionTyID: Out << "FunctionType"; break; - case Type::ArrayTyID: Out << "ArrayType"; break; - case Type::StructTyID: Out << "StructType"; break; - case Type::VectorTyID: Out << "VectorType"; break; - case Type::PointerTyID: Out << "PointerType"; break; - case Type::OpaqueTyID: Out << "OpaqueType"; break; - default: Out << "NoSuchDerivedType"; break; - } - Out << ">(" << I->second << "_fwd.get());"; - nl(Out); nl(Out); - UnresolvedTypes.erase(I); - } + // Indicate that this type is now defined. + DefinedTypes.insert(Ty); - // Finally, separate the type definition from other with a newline. + // Early resolve as many unresolved types as possible. Search the unresolved + // types map for the type we just printed. Now that its definition is complete + // we can resolve any previous references to it. This prevents a cascade of + // unresolved types. + TypeMap::iterator I = UnresolvedTypes.find(Ty); + if (I != UnresolvedTypes.end()) { + Out << "cast<OpaqueType>(" << I->second + << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");"; nl(Out); - - // We weren't a recursive type - return false; + Out << I->second << " = cast<"; + switch (Ty->getTypeID()) { + case Type::FunctionTyID: Out << "FunctionType"; break; + case Type::ArrayTyID: Out << "ArrayType"; break; + case Type::StructTyID: Out << "StructType"; break; + case Type::VectorTyID: Out << "VectorType"; break; + case Type::PointerTyID: Out << "PointerType"; break; + case Type::OpaqueTyID: Out << "OpaqueType"; break; + default: Out << "NoSuchDerivedType"; break; + } + Out << ">(" << I->second << "_fwd.get());"; + nl(Out); nl(Out); + UnresolvedTypes.erase(I); } - // Prints a type definition. Returns true if it could not resolve all the - // types in the definition but had to use a forward reference. - void CppWriter::printType(const Type* Ty) { - assert(TypeStack.empty()); - TypeStack.clear(); - printTypeInternal(Ty); - assert(TypeStack.empty()); - } - - void CppWriter::printTypes(const Module* M) { - // Walk the symbol table and print out all its types - const TypeSymbolTable& symtab = M->getTypeSymbolTable(); - for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end(); - TI != TE; ++TI) { - - // For primitive types and types already defined, just add a name - TypeMap::const_iterator TNI = TypeNames.find(TI->second); - if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() || - TNI != TypeNames.end()) { - Out << "mod->addTypeName(\""; - printEscapedString(TI->first); - Out << "\", " << getCppName(TI->second) << ");"; - nl(Out); - // For everything else, define the type - } else { - printType(TI->second); - } - } + // Finally, separate the type definition from other with a newline. + nl(Out); - // Add all of the global variables to the value table... - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - if (I->hasInitializer()) - printType(I->getInitializer()->getType()); - printType(I->getType()); + // We weren't a recursive type + return false; +} + +// Prints a type definition. Returns true if it could not resolve all the +// types in the definition but had to use a forward reference. +void CppWriter::printType(const Type* Ty) { + assert(TypeStack.empty()); + TypeStack.clear(); + printTypeInternal(Ty); + assert(TypeStack.empty()); +} + +void CppWriter::printTypes(const Module* M) { + // Walk the symbol table and print out all its types + const TypeSymbolTable& symtab = M->getTypeSymbolTable(); + for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end(); + TI != TE; ++TI) { + + // For primitive types and types already defined, just add a name + TypeMap::const_iterator TNI = TypeNames.find(TI->second); + if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() || + TNI != TypeNames.end()) { + Out << "mod->addTypeName(\""; + printEscapedString(TI->first); + Out << "\", " << getCppName(TI->second) << ");"; + nl(Out); + // For everything else, define the type + } else { + printType(TI->second); } + } - // Add all the functions to the table - for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); - FI != FE; ++FI) { - printType(FI->getReturnType()); - printType(FI->getFunctionType()); - // Add all the function arguments - for (Function::const_arg_iterator AI = FI->arg_begin(), - AE = FI->arg_end(); AI != AE; ++AI) { - printType(AI->getType()); - } + // Add all of the global variables to the value table... + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + if (I->hasInitializer()) + printType(I->getInitializer()->getType()); + printType(I->getType()); + } - // Add all of the basic blocks and instructions - for (Function::const_iterator BB = FI->begin(), - E = FI->end(); BB != E; ++BB) { - printType(BB->getType()); - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; - ++I) { - printType(I->getType()); - for (unsigned i = 0; i < I->getNumOperands(); ++i) - printType(I->getOperand(i)->getType()); - } + // Add all the functions to the table + for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) { + printType(FI->getReturnType()); + printType(FI->getFunctionType()); + // Add all the function arguments + for (Function::const_arg_iterator AI = FI->arg_begin(), + AE = FI->arg_end(); AI != AE; ++AI) { + printType(AI->getType()); + } + + // Add all of the basic blocks and instructions + for (Function::const_iterator BB = FI->begin(), + E = FI->end(); BB != E; ++BB) { + printType(BB->getType()); + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; + ++I) { + printType(I->getType()); + for (unsigned i = 0; i < I->getNumOperands(); ++i) + printType(I->getOperand(i)->getType()); } } } +} - // printConstant - Print out a constant pool entry... - void CppWriter::printConstant(const Constant *CV) { - // First, if the constant is actually a GlobalValue (variable or function) - // or its already in the constant list then we've printed it already and we - // can just return. - if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end()) - return; +// printConstant - Print out a constant pool entry... +void CppWriter::printConstant(const Constant *CV) { + // First, if the constant is actually a GlobalValue (variable or function) + // or its already in the constant list then we've printed it already and we + // can just return. + if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end()) + return; - std::string constName(getCppName(CV)); - std::string typeName(getCppName(CV->getType())); + std::string constName(getCppName(CV)); + std::string typeName(getCppName(CV->getType())); - if (isa<GlobalValue>(CV)) { - // Skip variables and functions, we emit them elsewhere - return; - } + if (isa<GlobalValue>(CV)) { + // Skip variables and functions, we emit them elsewhere + return; + } - if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - std::string constValue = CI->getValue().toString(10, true); - Out << "ConstantInt* " << constName - << " = ConstantInt::get(mod->getContext(), APInt(" - << cast<IntegerType>(CI->getType())->getBitWidth() - << ", StringRef(\"" << constValue << "\"), 10));"; - } else if (isa<ConstantAggregateZero>(CV)) { - Out << "ConstantAggregateZero* " << constName - << " = ConstantAggregateZero::get(" << typeName << ");"; - } else if (isa<ConstantPointerNull>(CV)) { - Out << "ConstantPointerNull* " << constName - << " = ConstantPointerNull::get(" << typeName << ");"; - } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { - Out << "ConstantFP* " << constName << " = "; - printCFP(CFP); - Out << ";"; - } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) { - if (CA->isString() && - CA->getType()->getElementType() == - Type::getInt8Ty(CA->getContext())) { - Out << "Constant* " << constName << - " = ConstantArray::get(mod->getContext(), \""; - std::string tmp = CA->getAsString(); - bool nullTerminate = false; - if (tmp[tmp.length()-1] == 0) { - tmp.erase(tmp.length()-1); - nullTerminate = true; - } - printEscapedString(tmp); - // Determine if we want null termination or not. - if (nullTerminate) - Out << "\", true"; // Indicate that the null terminator should be - // added. - else - Out << "\", false";// No null terminator - Out << ");"; - } else { - Out << "std::vector<Constant*> " << constName << "_elems;"; - nl(Out); - unsigned N = CA->getNumOperands(); - for (unsigned i = 0; i < N; ++i) { - printConstant(CA->getOperand(i)); // recurse to print operands - Out << constName << "_elems.push_back(" - << getCppName(CA->getOperand(i)) << ");"; - nl(Out); - } - Out << "Constant* " << constName << " = ConstantArray::get(" - << typeName << ", " << constName << "_elems);"; - } - } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) { - Out << "std::vector<Constant*> " << constName << "_fields;"; - nl(Out); - unsigned N = CS->getNumOperands(); - for (unsigned i = 0; i < N; i++) { - printConstant(CS->getOperand(i)); - Out << constName << "_fields.push_back(" - << getCppName(CS->getOperand(i)) << ");"; - nl(Out); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + std::string constValue = CI->getValue().toString(10, true); + Out << "ConstantInt* " << constName + << " = ConstantInt::get(mod->getContext(), APInt(" + << cast<IntegerType>(CI->getType())->getBitWidth() + << ", StringRef(\"" << constValue << "\"), 10));"; + } else if (isa<ConstantAggregateZero>(CV)) { + Out << "ConstantAggregateZero* " << constName + << " = ConstantAggregateZero::get(" << typeName << ");"; + } else if (isa<ConstantPointerNull>(CV)) { + Out << "ConstantPointerNull* " << constName + << " = ConstantPointerNull::get(" << typeName << ");"; + } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { + Out << "ConstantFP* " << constName << " = "; + printCFP(CFP); + Out << ";"; + } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) { + if (CA->isString() && + CA->getType()->getElementType() == + Type::getInt8Ty(CA->getContext())) { + Out << "Constant* " << constName << + " = ConstantArray::get(mod->getContext(), \""; + std::string tmp = CA->getAsString(); + bool nullTerminate = false; + if (tmp[tmp.length()-1] == 0) { + tmp.erase(tmp.length()-1); + nullTerminate = true; } - Out << "Constant* " << constName << " = ConstantStruct::get(" - << typeName << ", " << constName << "_fields);"; - } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { + printEscapedString(tmp); + // Determine if we want null termination or not. + if (nullTerminate) + Out << "\", true"; // Indicate that the null terminator should be + // added. + else + Out << "\", false";// No null terminator + Out << ");"; + } else { Out << "std::vector<Constant*> " << constName << "_elems;"; nl(Out); - unsigned N = CP->getNumOperands(); + unsigned N = CA->getNumOperands(); for (unsigned i = 0; i < N; ++i) { - printConstant(CP->getOperand(i)); + printConstant(CA->getOperand(i)); // recurse to print operands Out << constName << "_elems.push_back(" - << getCppName(CP->getOperand(i)) << ");"; + << getCppName(CA->getOperand(i)) << ");"; nl(Out); } - Out << "Constant* " << constName << " = ConstantVector::get(" + Out << "Constant* " << constName << " = ConstantArray::get(" << typeName << ", " << constName << "_elems);"; - } else if (isa<UndefValue>(CV)) { - Out << "UndefValue* " << constName << " = UndefValue::get(" - << typeName << ");"; - } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { - if (CE->getOpcode() == Instruction::GetElementPtr) { - Out << "std::vector<Constant*> " << constName << "_indices;"; + } + } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) { + Out << "std::vector<Constant*> " << constName << "_fields;"; + nl(Out); + unsigned N = CS->getNumOperands(); + for (unsigned i = 0; i < N; i++) { + printConstant(CS->getOperand(i)); + Out << constName << "_fields.push_back(" + << getCppName(CS->getOperand(i)) << ");"; + nl(Out); + } + Out << "Constant* " << constName << " = ConstantStruct::get(" + << typeName << ", " << constName << "_fields);"; + } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { + Out << "std::vector<Constant*> " << constName << "_elems;"; + nl(Out); + unsigned N = CP->getNumOperands(); + for (unsigned i = 0; i < N; ++i) { + printConstant(CP->getOperand(i)); + Out << constName << "_elems.push_back(" + << getCppName(CP->getOperand(i)) << ");"; + nl(Out); + } + Out << "Constant* " << constName << " = ConstantVector::get(" + << typeName << ", " << constName << "_elems);"; + } else if (isa<UndefValue>(CV)) { + Out << "UndefValue* " << constName << " = UndefValue::get(" + << typeName << ");"; + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + Out << "std::vector<Constant*> " << constName << "_indices;"; + nl(Out); + printConstant(CE->getOperand(0)); + for (unsigned i = 1; i < CE->getNumOperands(); ++i ) { + printConstant(CE->getOperand(i)); + Out << constName << "_indices.push_back(" + << getCppName(CE->getOperand(i)) << ");"; nl(Out); - printConstant(CE->getOperand(0)); - for (unsigned i = 1; i < CE->getNumOperands(); ++i ) { - printConstant(CE->getOperand(i)); - Out << constName << "_indices.push_back(" - << getCppName(CE->getOperand(i)) << ");"; - nl(Out); - } - Out << "Constant* " << constName - << " = ConstantExpr::getGetElementPtr(" - << getCppName(CE->getOperand(0)) << ", " - << "&" << constName << "_indices[0], " - << constName << "_indices.size()" - << ");"; - } else if (CE->isCast()) { - printConstant(CE->getOperand(0)); - Out << "Constant* " << constName << " = ConstantExpr::getCast("; - switch (CE->getOpcode()) { - default: llvm_unreachable("Invalid cast opcode"); - case Instruction::Trunc: Out << "Instruction::Trunc"; break; - case Instruction::ZExt: Out << "Instruction::ZExt"; break; - case Instruction::SExt: Out << "Instruction::SExt"; break; - case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break; - case Instruction::FPExt: Out << "Instruction::FPExt"; break; - case Instruction::FPToUI: Out << "Instruction::FPToUI"; break; - case Instruction::FPToSI: Out << "Instruction::FPToSI"; break; - case Instruction::UIToFP: Out << "Instruction::UIToFP"; break; - case Instruction::SIToFP: Out << "Instruction::SIToFP"; break; - case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break; - case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break; - case Instruction::BitCast: Out << "Instruction::BitCast"; break; - } - Out << ", " << getCppName(CE->getOperand(0)) << ", " - << getCppName(CE->getType()) << ");"; - } else { - unsigned N = CE->getNumOperands(); - for (unsigned i = 0; i < N; ++i ) { - printConstant(CE->getOperand(i)); + } + Out << "Constant* " << constName + << " = ConstantExpr::getGetElementPtr(" + << getCppName(CE->getOperand(0)) << ", " + << "&" << constName << "_indices[0], " + << constName << "_indices.size()" + << ");"; + } else if (CE->isCast()) { + printConstant(CE->getOperand(0)); + Out << "Constant* " << constName << " = ConstantExpr::getCast("; + switch (CE->getOpcode()) { + default: llvm_unreachable("Invalid cast opcode"); + case Instruction::Trunc: Out << "Instruction::Trunc"; break; + case Instruction::ZExt: Out << "Instruction::ZExt"; break; + case Instruction::SExt: Out << "Instruction::SExt"; break; + case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break; + case Instruction::FPExt: Out << "Instruction::FPExt"; break; + case Instruction::FPToUI: Out << "Instruction::FPToUI"; break; + case Instruction::FPToSI: Out << "Instruction::FPToSI"; break; + case Instruction::UIToFP: Out << "Instruction::UIToFP"; break; + case Instruction::SIToFP: Out << "Instruction::SIToFP"; break; + case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break; + case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break; + case Instruction::BitCast: Out << "Instruction::BitCast"; break; + } + Out << ", " << getCppName(CE->getOperand(0)) << ", " + << getCppName(CE->getType()) << ");"; + } else { + unsigned N = CE->getNumOperands(); + for (unsigned i = 0; i < N; ++i ) { + printConstant(CE->getOperand(i)); + } + Out << "Constant* " << constName << " = ConstantExpr::"; + switch (CE->getOpcode()) { + case Instruction::Add: Out << "getAdd("; break; + case Instruction::FAdd: Out << "getFAdd("; break; + case Instruction::Sub: Out << "getSub("; break; + case Instruction::FSub: Out << "getFSub("; break; + case Instruction::Mul: Out << "getMul("; break; + case Instruction::FMul: Out << "getFMul("; break; + case Instruction::UDiv: Out << "getUDiv("; break; + case Instruction::SDiv: Out << "getSDiv("; break; + case Instruction::FDiv: Out << "getFDiv("; break; + case Instruction::URem: Out << "getURem("; break; + case Instruction::SRem: Out << "getSRem("; break; + case Instruction::FRem: Out << "getFRem("; break; + case Instruction::And: Out << "getAnd("; break; + case Instruction::Or: Out << "getOr("; break; + case Instruction::Xor: Out << "getXor("; break; + case Instruction::ICmp: + Out << "getICmp(ICmpInst::ICMP_"; + switch (CE->getPredicate()) { + case ICmpInst::ICMP_EQ: Out << "EQ"; break; + case ICmpInst::ICMP_NE: Out << "NE"; break; + case ICmpInst::ICMP_SLT: Out << "SLT"; break; + case ICmpInst::ICMP_ULT: Out << "ULT"; break; + case ICmpInst::ICMP_SGT: Out << "SGT"; break; + case ICmpInst::ICMP_UGT: Out << "UGT"; break; + case ICmpInst::ICMP_SLE: Out << "SLE"; break; + case ICmpInst::ICMP_ULE: Out << "ULE"; break; + case ICmpInst::ICMP_SGE: Out << "SGE"; break; + case ICmpInst::ICMP_UGE: Out << "UGE"; break; + default: error("Invalid ICmp Predicate"); } - Out << "Constant* " << constName << " = ConstantExpr::"; - switch (CE->getOpcode()) { - case Instruction::Add: Out << "getAdd("; break; - case Instruction::FAdd: Out << "getFAdd("; break; - case Instruction::Sub: Out << "getSub("; break; - case Instruction::FSub: Out << "getFSub("; break; - case Instruction::Mul: Out << "getMul("; break; - case Instruction::FMul: Out << "getFMul("; break; - case Instruction::UDiv: Out << "getUDiv("; break; - case Instruction::SDiv: Out << "getSDiv("; break; - case Instruction::FDiv: Out << "getFDiv("; break; - case Instruction::URem: Out << "getURem("; break; - case Instruction::SRem: Out << "getSRem("; break; - case Instruction::FRem: Out << "getFRem("; break; - case Instruction::And: Out << "getAnd("; break; - case Instruction::Or: Out << "getOr("; break; - case Instruction::Xor: Out << "getXor("; break; - case Instruction::ICmp: - Out << "getICmp(ICmpInst::ICMP_"; - switch (CE->getPredicate()) { - case ICmpInst::ICMP_EQ: Out << "EQ"; break; - case ICmpInst::ICMP_NE: Out << "NE"; break; - case ICmpInst::ICMP_SLT: Out << "SLT"; break; - case ICmpInst::ICMP_ULT: Out << "ULT"; break; - case ICmpInst::ICMP_SGT: Out << "SGT"; break; - case ICmpInst::ICMP_UGT: Out << "UGT"; break; - case ICmpInst::ICMP_SLE: Out << "SLE"; break; - case ICmpInst::ICMP_ULE: Out << "ULE"; break; - case ICmpInst::ICMP_SGE: Out << "SGE"; break; - case ICmpInst::ICMP_UGE: Out << "UGE"; break; - default: error("Invalid ICmp Predicate"); - } - break; - case Instruction::FCmp: - Out << "getFCmp(FCmpInst::FCMP_"; - switch (CE->getPredicate()) { - case FCmpInst::FCMP_FALSE: Out << "FALSE"; break; - case FCmpInst::FCMP_ORD: Out << "ORD"; break; - case FCmpInst::FCMP_UNO: Out << "UNO"; break; - case FCmpInst::FCMP_OEQ: Out << "OEQ"; break; - case FCmpInst::FCMP_UEQ: Out << "UEQ"; break; - case FCmpInst::FCMP_ONE: Out << "ONE"; break; - case FCmpInst::FCMP_UNE: Out << "UNE"; break; - case FCmpInst::FCMP_OLT: Out << "OLT"; break; - case FCmpInst::FCMP_ULT: Out << "ULT"; break; - case FCmpInst::FCMP_OGT: Out << "OGT"; break; - case FCmpInst::FCMP_UGT: Out << "UGT"; break; - case FCmpInst::FCMP_OLE: Out << "OLE"; break; - case FCmpInst::FCMP_ULE: Out << "ULE"; break; - case FCmpInst::FCMP_OGE: Out << "OGE"; break; - case FCmpInst::FCMP_UGE: Out << "UGE"; break; - case FCmpInst::FCMP_TRUE: Out << "TRUE"; break; - default: error("Invalid FCmp Predicate"); - } - break; - case Instruction::Shl: Out << "getShl("; break; - case Instruction::LShr: Out << "getLShr("; break; - case Instruction::AShr: Out << "getAShr("; break; - case Instruction::Select: Out << "getSelect("; break; - case Instruction::ExtractElement: Out << "getExtractElement("; break; - case Instruction::InsertElement: Out << "getInsertElement("; break; - case Instruction::ShuffleVector: Out << "getShuffleVector("; break; - default: - error("Invalid constant expression"); - break; + break; + case Instruction::FCmp: + Out << "getFCmp(FCmpInst::FCMP_"; + switch (CE->getPredicate()) { + case FCmpInst::FCMP_FALSE: Out << "FALSE"; break; + case FCmpInst::FCMP_ORD: Out << "ORD"; break; + case FCmpInst::FCMP_UNO: Out << "UNO"; break; + case FCmpInst::FCMP_OEQ: Out << "OEQ"; break; + case FCmpInst::FCMP_UEQ: Out << "UEQ"; break; + case FCmpInst::FCMP_ONE: Out << "ONE"; break; + case FCmpInst::FCMP_UNE: Out << "UNE"; break; + case FCmpInst::FCMP_OLT: Out << "OLT"; break; + case FCmpInst::FCMP_ULT: Out << "ULT"; break; + case FCmpInst::FCMP_OGT: Out << "OGT"; break; + case FCmpInst::FCMP_UGT: Out << "UGT"; break; + case FCmpInst::FCMP_OLE: Out << "OLE"; break; + case FCmpInst::FCMP_ULE: Out << "ULE"; break; + case FCmpInst::FCMP_OGE: Out << "OGE"; break; + case FCmpInst::FCMP_UGE: Out << "UGE"; break; + case FCmpInst::FCMP_TRUE: Out << "TRUE"; break; + default: error("Invalid FCmp Predicate"); } - Out << getCppName(CE->getOperand(0)); - for (unsigned i = 1; i < CE->getNumOperands(); ++i) - Out << ", " << getCppName(CE->getOperand(i)); - Out << ");"; + break; + case Instruction::Shl: Out << "getShl("; break; + case Instruction::LShr: Out << "getLShr("; break; + case Instruction::AShr: Out << "getAShr("; break; + case Instruction::Select: Out << "getSelect("; break; + case Instruction::ExtractElement: Out << "getExtractElement("; break; + case Instruction::InsertElement: Out << "getInsertElement("; break; + case Instruction::ShuffleVector: Out << "getShuffleVector("; break; + default: + error("Invalid constant expression"); + break; } - } else { - error("Bad Constant"); - Out << "Constant* " << constName << " = 0; "; + Out << getCppName(CE->getOperand(0)); + for (unsigned i = 1; i < CE->getNumOperands(); ++i) + Out << ", " << getCppName(CE->getOperand(i)); + Out << ");"; } - nl(Out); + } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) { + Out << "Constant* " << constName << " = "; + Out << "BlockAddress::get(" << getOpName(BA->getBasicBlock()) << ");"; + } else { + error("Bad Constant"); + Out << "Constant* " << constName << " = 0; "; } + nl(Out); +} - void CppWriter::printConstants(const Module* M) { - // Traverse all the global variables looking for constant initializers - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) - if (I->hasInitializer()) - printConstant(I->getInitializer()); - - // Traverse the LLVM functions looking for constants - for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); - FI != FE; ++FI) { - // Add all of the basic blocks and instructions - for (Function::const_iterator BB = FI->begin(), - E = FI->end(); BB != E; ++BB) { - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; - ++I) { - for (unsigned i = 0; i < I->getNumOperands(); ++i) { - if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) { - printConstant(C); - } +void CppWriter::printConstants(const Module* M) { + // Traverse all the global variables looking for constant initializers + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) + if (I->hasInitializer()) + printConstant(I->getInitializer()); + + // Traverse the LLVM functions looking for constants + for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) { + // Add all of the basic blocks and instructions + for (Function::const_iterator BB = FI->begin(), + E = FI->end(); BB != E; ++BB) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; + ++I) { + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) { + printConstant(C); } } } } } +} - void CppWriter::printVariableUses(const GlobalVariable *GV) { - nl(Out) << "// Type Definitions"; - nl(Out); - printType(GV->getType()); - if (GV->hasInitializer()) { - Constant *Init = GV->getInitializer(); - printType(Init->getType()); - if (Function *F = dyn_cast<Function>(Init)) { - nl(Out)<< "/ Function Declarations"; nl(Out); - printFunctionHead(F); - } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) { - nl(Out) << "// Global Variable Declarations"; nl(Out); - printVariableHead(gv); - - nl(Out) << "// Global Variable Definitions"; nl(Out); - printVariableBody(gv); - } else { - nl(Out) << "// Constant Definitions"; nl(Out); - printConstant(Init); - } +void CppWriter::printVariableUses(const GlobalVariable *GV) { + nl(Out) << "// Type Definitions"; + nl(Out); + printType(GV->getType()); + if (GV->hasInitializer()) { + Constant *Init = GV->getInitializer(); + printType(Init->getType()); + if (Function *F = dyn_cast<Function>(Init)) { + nl(Out)<< "/ Function Declarations"; nl(Out); + printFunctionHead(F); + } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) { + nl(Out) << "// Global Variable Declarations"; nl(Out); + printVariableHead(gv); + + nl(Out) << "// Global Variable Definitions"; nl(Out); + printVariableBody(gv); + } else { + nl(Out) << "// Constant Definitions"; nl(Out); + printConstant(Init); } } +} - void CppWriter::printVariableHead(const GlobalVariable *GV) { - nl(Out) << "GlobalVariable* " << getCppName(GV); - if (is_inline) { - Out << " = mod->getGlobalVariable(mod->getContext(), "; - printEscapedString(GV->getName()); - Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)"; - nl(Out) << "if (!" << getCppName(GV) << ") {"; - in(); nl(Out) << getCppName(GV); - } - Out << " = new GlobalVariable(/*Module=*/*mod, "; - nl(Out) << "/*Type=*/"; - printCppName(GV->getType()->getElementType()); - Out << ","; - nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false"); - Out << ","; - nl(Out) << "/*Linkage=*/"; - printLinkageType(GV->getLinkage()); - Out << ","; - nl(Out) << "/*Initializer=*/0, "; - if (GV->hasInitializer()) { - Out << "// has initializer, specified below"; - } - nl(Out) << "/*Name=*/\""; +void CppWriter::printVariableHead(const GlobalVariable *GV) { + nl(Out) << "GlobalVariable* " << getCppName(GV); + if (is_inline) { + Out << " = mod->getGlobalVariable(mod->getContext(), "; printEscapedString(GV->getName()); + Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)"; + nl(Out) << "if (!" << getCppName(GV) << ") {"; + in(); nl(Out) << getCppName(GV); + } + Out << " = new GlobalVariable(/*Module=*/*mod, "; + nl(Out) << "/*Type=*/"; + printCppName(GV->getType()->getElementType()); + Out << ","; + nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false"); + Out << ","; + nl(Out) << "/*Linkage=*/"; + printLinkageType(GV->getLinkage()); + Out << ","; + nl(Out) << "/*Initializer=*/0, "; + if (GV->hasInitializer()) { + Out << "// has initializer, specified below"; + } + nl(Out) << "/*Name=*/\""; + printEscapedString(GV->getName()); + Out << "\");"; + nl(Out); + + if (GV->hasSection()) { + printCppName(GV); + Out << "->setSection(\""; + printEscapedString(GV->getSection()); Out << "\");"; nl(Out); - - if (GV->hasSection()) { - printCppName(GV); - Out << "->setSection(\""; - printEscapedString(GV->getSection()); - Out << "\");"; - nl(Out); - } - if (GV->getAlignment()) { - printCppName(GV); - Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");"; - nl(Out); - } - if (GV->getVisibility() != GlobalValue::DefaultVisibility) { - printCppName(GV); - Out << "->setVisibility("; - printVisibilityType(GV->getVisibility()); - Out << ");"; - nl(Out); - } - if (GV->isThreadLocal()) { - printCppName(GV); - Out << "->setThreadLocal(true);"; - nl(Out); - } - if (is_inline) { - out(); Out << "}"; nl(Out); - } } - - void CppWriter::printVariableBody(const GlobalVariable *GV) { - if (GV->hasInitializer()) { - printCppName(GV); - Out << "->setInitializer("; - Out << getCppName(GV->getInitializer()) << ");"; - nl(Out); - } + if (GV->getAlignment()) { + printCppName(GV); + Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");"; + nl(Out); } + if (GV->getVisibility() != GlobalValue::DefaultVisibility) { + printCppName(GV); + Out << "->setVisibility("; + printVisibilityType(GV->getVisibility()); + Out << ");"; + nl(Out); + } + if (GV->isThreadLocal()) { + printCppName(GV); + Out << "->setThreadLocal(true);"; + nl(Out); + } + if (is_inline) { + out(); Out << "}"; nl(Out); + } +} - std::string CppWriter::getOpName(Value* V) { - if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end()) - return getCppName(V); - - // See if its alread in the map of forward references, if so just return the - // name we already set up for it - ForwardRefMap::const_iterator I = ForwardRefs.find(V); - if (I != ForwardRefs.end()) - return I->second; - - // This is a new forward reference. Generate a unique name for it - std::string result(std::string("fwdref_") + utostr(uniqueNum++)); - - // Yes, this is a hack. An Argument is the smallest instantiable value that - // we can make as a placeholder for the real value. We'll replace these - // Argument instances later. - Out << "Argument* " << result << " = new Argument(" - << getCppName(V->getType()) << ");"; +void CppWriter::printVariableBody(const GlobalVariable *GV) { + if (GV->hasInitializer()) { + printCppName(GV); + Out << "->setInitializer("; + Out << getCppName(GV->getInitializer()) << ");"; nl(Out); - ForwardRefs[V] = result; - return result; } +} - // printInstruction - This member is called for each Instruction in a function. - void CppWriter::printInstruction(const Instruction *I, - const std::string& bbname) { - std::string iName(getCppName(I)); +std::string CppWriter::getOpName(Value* V) { + if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end()) + return getCppName(V); - // Before we emit this instruction, we need to take care of generating any - // forward references. So, we get the names of all the operands in advance - const unsigned Ops(I->getNumOperands()); - std::string* opNames = new std::string[Ops]; - for (unsigned i = 0; i < Ops; i++) { - opNames[i] = getOpName(I->getOperand(i)); - } + // See if its alread in the map of forward references, if so just return the + // name we already set up for it + ForwardRefMap::const_iterator I = ForwardRefs.find(V); + if (I != ForwardRefs.end()) + return I->second; - switch (I->getOpcode()) { - default: - error("Invalid instruction"); - break; + // This is a new forward reference. Generate a unique name for it + std::string result(std::string("fwdref_") + utostr(uniqueNum++)); - case Instruction::Ret: { - const ReturnInst* ret = cast<ReturnInst>(I); - Out << "ReturnInst::Create(mod->getContext(), " - << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");"; - break; + // Yes, this is a hack. An Argument is the smallest instantiable value that + // we can make as a placeholder for the real value. We'll replace these + // Argument instances later. + Out << "Argument* " << result << " = new Argument(" + << getCppName(V->getType()) << ");"; + nl(Out); + ForwardRefs[V] = result; + return result; +} + +// printInstruction - This member is called for each Instruction in a function. +void CppWriter::printInstruction(const Instruction *I, + const std::string& bbname) { + std::string iName(getCppName(I)); + + // Before we emit this instruction, we need to take care of generating any + // forward references. So, we get the names of all the operands in advance + const unsigned Ops(I->getNumOperands()); + std::string* opNames = new std::string[Ops]; + for (unsigned i = 0; i < Ops; i++) + opNames[i] = getOpName(I->getOperand(i)); + + switch (I->getOpcode()) { + default: + error("Invalid instruction"); + break; + + case Instruction::Ret: { + const ReturnInst* ret = cast<ReturnInst>(I); + Out << "ReturnInst::Create(mod->getContext(), " + << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");"; + break; + } + case Instruction::Br: { + const BranchInst* br = cast<BranchInst>(I); + Out << "BranchInst::Create(" ; + if (br->getNumOperands() == 3) { + Out << opNames[2] << ", " + << opNames[1] << ", " + << opNames[0] << ", "; + + } else if (br->getNumOperands() == 1) { + Out << opNames[0] << ", "; + } else { + error("Branch with 2 operands?"); } - case Instruction::Br: { - const BranchInst* br = cast<BranchInst>(I); - Out << "BranchInst::Create(" ; - if (br->getNumOperands() == 3 ) { - Out << opNames[2] << ", " - << opNames[1] << ", " - << opNames[0] << ", "; - - } else if (br->getNumOperands() == 1) { - Out << opNames[0] << ", "; - } else { - error("Branch with 2 operands?"); - } - Out << bbname << ");"; - break; + Out << bbname << ");"; + break; + } + case Instruction::Switch: { + const SwitchInst *SI = cast<SwitchInst>(I); + Out << "SwitchInst* " << iName << " = SwitchInst::Create(" + << opNames[0] << ", " + << opNames[1] << ", " + << SI->getNumCases() << ", " << bbname << ");"; + nl(Out); + for (unsigned i = 2; i != SI->getNumOperands(); i += 2) { + Out << iName << "->addCase(" + << opNames[i] << ", " + << opNames[i+1] << ");"; + nl(Out); } - case Instruction::Switch: { - const SwitchInst *SI = cast<SwitchInst>(I); - Out << "SwitchInst* " << iName << " = SwitchInst::Create(" - << opNames[0] << ", " - << opNames[1] << ", " - << SI->getNumCases() << ", " << bbname << ");"; + break; + } + case Instruction::IndirectBr: { + const IndirectBrInst *IBI = cast<IndirectBrInst>(I); + Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create(" + << opNames[0] << ", " << IBI->getNumDestinations() << ");"; + nl(Out); + for (unsigned i = 1; i != IBI->getNumOperands(); ++i) { + Out << iName << "->addDestination(" << opNames[i] << ");"; nl(Out); - for (unsigned i = 2; i != SI->getNumOperands(); i += 2) { - Out << iName << "->addCase(" - << opNames[i] << ", " - << opNames[i+1] << ");"; - nl(Out); - } - break; } - case Instruction::IndirectBr: { - const IndirectBrInst *IBI = cast<IndirectBrInst>(I); - Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create(" - << opNames[0] << ", " << IBI->getNumDestinations() << ");"; + break; + } + case Instruction::Invoke: { + const InvokeInst* inv = cast<InvokeInst>(I); + Out << "std::vector<Value*> " << iName << "_params;"; + nl(Out); + for (unsigned i = 0; i < inv->getNumArgOperands(); ++i) { + Out << iName << "_params.push_back(" + << getOpName(inv->getArgOperand(i)) << ");"; nl(Out); - for (unsigned i = 1; i != IBI->getNumOperands(); ++i) { - Out << iName << "->addDestination(" << opNames[i] << ");"; - nl(Out); - } - break; } - case Instruction::Invoke: { - const InvokeInst* inv = cast<InvokeInst>(I); - Out << "std::vector<Value*> " << iName << "_params;"; + // FIXME: This shouldn't use magic numbers -3, -2, and -1. + Out << "InvokeInst *" << iName << " = InvokeInst::Create(" + << getOpName(inv->getCalledFunction()) << ", " + << getOpName(inv->getNormalDest()) << ", " + << getOpName(inv->getUnwindDest()) << ", " + << iName << "_params.begin(), " + << iName << "_params.end(), \""; + printEscapedString(inv->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->setCallingConv("; + printCallingConv(inv->getCallingConv()); + Out << ");"; + printAttributes(inv->getAttributes(), iName); + Out << iName << "->setAttributes(" << iName << "_PAL);"; + nl(Out); + break; + } + case Instruction::Unwind: { + Out << "new UnwindInst(" + << bbname << ");"; + break; + } + case Instruction::Unreachable: { + Out << "new UnreachableInst(" + << "mod->getContext(), " + << bbname << ");"; + break; + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr:{ + Out << "BinaryOperator* " << iName << " = BinaryOperator::Create("; + switch (I->getOpcode()) { + case Instruction::Add: Out << "Instruction::Add"; break; + case Instruction::FAdd: Out << "Instruction::FAdd"; break; + case Instruction::Sub: Out << "Instruction::Sub"; break; + case Instruction::FSub: Out << "Instruction::FSub"; break; + case Instruction::Mul: Out << "Instruction::Mul"; break; + case Instruction::FMul: Out << "Instruction::FMul"; break; + case Instruction::UDiv:Out << "Instruction::UDiv"; break; + case Instruction::SDiv:Out << "Instruction::SDiv"; break; + case Instruction::FDiv:Out << "Instruction::FDiv"; break; + case Instruction::URem:Out << "Instruction::URem"; break; + case Instruction::SRem:Out << "Instruction::SRem"; break; + case Instruction::FRem:Out << "Instruction::FRem"; break; + case Instruction::And: Out << "Instruction::And"; break; + case Instruction::Or: Out << "Instruction::Or"; break; + case Instruction::Xor: Out << "Instruction::Xor"; break; + case Instruction::Shl: Out << "Instruction::Shl"; break; + case Instruction::LShr:Out << "Instruction::LShr"; break; + case Instruction::AShr:Out << "Instruction::AShr"; break; + default: Out << "Instruction::BadOpCode"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::FCmp: { + Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", "; + switch (cast<FCmpInst>(I)->getPredicate()) { + case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break; + case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break; + case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break; + case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break; + case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break; + case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break; + case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break; + case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break; + case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break; + case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break; + case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break; + case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break; + case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break; + case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break; + case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break; + case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break; + default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\");"; + break; + } + case Instruction::ICmp: { + Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", "; + switch (cast<ICmpInst>(I)->getPredicate()) { + case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break; + case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break; + case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break; + case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break; + case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break; + case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break; + case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break; + case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break; + case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break; + case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break; + default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\");"; + break; + } + case Instruction::Alloca: { + const AllocaInst* allocaI = cast<AllocaInst>(I); + Out << "AllocaInst* " << iName << " = new AllocaInst(" + << getCppName(allocaI->getAllocatedType()) << ", "; + if (allocaI->isArrayAllocation()) + Out << opNames[0] << ", "; + Out << "\""; + printEscapedString(allocaI->getName()); + Out << "\", " << bbname << ");"; + if (allocaI->getAlignment()) + nl(Out) << iName << "->setAlignment(" + << allocaI->getAlignment() << ");"; + break; + } + case Instruction::Load: { + const LoadInst* load = cast<LoadInst>(I); + Out << "LoadInst* " << iName << " = new LoadInst(" + << opNames[0] << ", \""; + printEscapedString(load->getName()); + Out << "\", " << (load->isVolatile() ? "true" : "false" ) + << ", " << bbname << ");"; + break; + } + case Instruction::Store: { + const StoreInst* store = cast<StoreInst>(I); + Out << " new StoreInst(" + << opNames[0] << ", " + << opNames[1] << ", " + << (store->isVolatile() ? "true" : "false") + << ", " << bbname << ");"; + break; + } + case Instruction::GetElementPtr: { + const GetElementPtrInst* gep = cast<GetElementPtrInst>(I); + if (gep->getNumOperands() <= 2) { + Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create(" + << opNames[0]; + if (gep->getNumOperands() == 2) + Out << ", " << opNames[1]; + } else { + Out << "std::vector<Value*> " << iName << "_indices;"; nl(Out); - for (unsigned i = 0; i < inv->getNumOperands() - 3; ++i) { - Out << iName << "_params.push_back(" + for (unsigned i = 1; i < gep->getNumOperands(); ++i ) { + Out << iName << "_indices.push_back(" << opNames[i] << ");"; nl(Out); } - Out << "InvokeInst *" << iName << " = InvokeInst::Create(" - << opNames[Ops - 3] << ", " - << opNames[Ops - 2] << ", " - << opNames[Ops - 1] << ", " - << iName << "_params.begin(), " << iName << "_params.end(), \""; - printEscapedString(inv->getName()); - Out << "\", " << bbname << ");"; - nl(Out) << iName << "->setCallingConv("; - printCallingConv(inv->getCallingConv()); - Out << ");"; - printAttributes(inv->getAttributes(), iName); - Out << iName << "->setAttributes(" << iName << "_PAL);"; + Out << "Instruction* " << iName << " = GetElementPtrInst::Create(" + << opNames[0] << ", " << iName << "_indices.begin(), " + << iName << "_indices.end()"; + } + Out << ", \""; + printEscapedString(gep->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::PHI: { + const PHINode* phi = cast<PHINode>(I); + + Out << "PHINode* " << iName << " = PHINode::Create(" + << getCppName(phi->getType()) << ", \""; + printEscapedString(phi->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->reserveOperandSpace(" + << phi->getNumIncomingValues() + << ");"; + nl(Out); + for (unsigned i = 0; i < phi->getNumOperands(); i+=2) { + Out << iName << "->addIncoming(" + << opNames[i] << ", " << opNames[i+1] << ");"; nl(Out); - break; - } - case Instruction::Unwind: { - Out << "new UnwindInst(" - << bbname << ");"; - break; - } - case Instruction::Unreachable: { - Out << "new UnreachableInst(" - << "mod->getContext(), " - << bbname << ");"; - break; - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr:{ - Out << "BinaryOperator* " << iName << " = BinaryOperator::Create("; - switch (I->getOpcode()) { - case Instruction::Add: Out << "Instruction::Add"; break; - case Instruction::FAdd: Out << "Instruction::FAdd"; break; - case Instruction::Sub: Out << "Instruction::Sub"; break; - case Instruction::FSub: Out << "Instruction::FSub"; break; - case Instruction::Mul: Out << "Instruction::Mul"; break; - case Instruction::FMul: Out << "Instruction::FMul"; break; - case Instruction::UDiv:Out << "Instruction::UDiv"; break; - case Instruction::SDiv:Out << "Instruction::SDiv"; break; - case Instruction::FDiv:Out << "Instruction::FDiv"; break; - case Instruction::URem:Out << "Instruction::URem"; break; - case Instruction::SRem:Out << "Instruction::SRem"; break; - case Instruction::FRem:Out << "Instruction::FRem"; break; - case Instruction::And: Out << "Instruction::And"; break; - case Instruction::Or: Out << "Instruction::Or"; break; - case Instruction::Xor: Out << "Instruction::Xor"; break; - case Instruction::Shl: Out << "Instruction::Shl"; break; - case Instruction::LShr:Out << "Instruction::LShr"; break; - case Instruction::AShr:Out << "Instruction::AShr"; break; - default: Out << "Instruction::BadOpCode"; break; - } - Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; - printEscapedString(I->getName()); - Out << "\", " << bbname << ");"; - break; } - case Instruction::FCmp: { - Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", "; - switch (cast<FCmpInst>(I)->getPredicate()) { - case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break; - case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break; - case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break; - case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break; - case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break; - case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break; - case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break; - case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break; - case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break; - case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break; - case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break; - case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break; - case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break; - case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break; - case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break; - case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break; - default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break; - } - Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; - printEscapedString(I->getName()); - Out << "\");"; - break; - } - case Instruction::ICmp: { - Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", "; - switch (cast<ICmpInst>(I)->getPredicate()) { - case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break; - case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break; - case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break; - case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break; - case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break; - case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break; - case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break; - case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break; - case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break; - case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break; - default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break; - } - Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; - printEscapedString(I->getName()); - Out << "\");"; - break; - } - case Instruction::Alloca: { - const AllocaInst* allocaI = cast<AllocaInst>(I); - Out << "AllocaInst* " << iName << " = new AllocaInst(" - << getCppName(allocaI->getAllocatedType()) << ", "; - if (allocaI->isArrayAllocation()) - Out << opNames[0] << ", "; - Out << "\""; - printEscapedString(allocaI->getName()); - Out << "\", " << bbname << ");"; - if (allocaI->getAlignment()) - nl(Out) << iName << "->setAlignment(" - << allocaI->getAlignment() << ");"; - break; - } - case Instruction::Load:{ - const LoadInst* load = cast<LoadInst>(I); - Out << "LoadInst* " << iName << " = new LoadInst(" - << opNames[0] << ", \""; - printEscapedString(load->getName()); - Out << "\", " << (load->isVolatile() ? "true" : "false" ) - << ", " << bbname << ");"; - break; - } - case Instruction::Store: { - const StoreInst* store = cast<StoreInst>(I); - Out << " new StoreInst(" - << opNames[0] << ", " - << opNames[1] << ", " - << (store->isVolatile() ? "true" : "false") - << ", " << bbname << ");"; - break; - } - case Instruction::GetElementPtr: { - const GetElementPtrInst* gep = cast<GetElementPtrInst>(I); - if (gep->getNumOperands() <= 2) { - Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create(" - << opNames[0]; - if (gep->getNumOperands() == 2) - Out << ", " << opNames[1]; - } else { - Out << "std::vector<Value*> " << iName << "_indices;"; - nl(Out); - for (unsigned i = 1; i < gep->getNumOperands(); ++i ) { - Out << iName << "_indices.push_back(" - << opNames[i] << ");"; - nl(Out); - } - Out << "Instruction* " << iName << " = GetElementPtrInst::Create(" - << opNames[0] << ", " << iName << "_indices.begin(), " - << iName << "_indices.end()"; - } - Out << ", \""; - printEscapedString(gep->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::PHI: { - const PHINode* phi = cast<PHINode>(I); - - Out << "PHINode* " << iName << " = PHINode::Create(" - << getCppName(phi->getType()) << ", \""; - printEscapedString(phi->getName()); - Out << "\", " << bbname << ");"; - nl(Out) << iName << "->reserveOperandSpace(" - << phi->getNumIncomingValues() - << ");"; + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: { + const CastInst* cst = cast<CastInst>(I); + Out << "CastInst* " << iName << " = new "; + switch (I->getOpcode()) { + case Instruction::Trunc: Out << "TruncInst"; break; + case Instruction::ZExt: Out << "ZExtInst"; break; + case Instruction::SExt: Out << "SExtInst"; break; + case Instruction::FPTrunc: Out << "FPTruncInst"; break; + case Instruction::FPExt: Out << "FPExtInst"; break; + case Instruction::FPToUI: Out << "FPToUIInst"; break; + case Instruction::FPToSI: Out << "FPToSIInst"; break; + case Instruction::UIToFP: Out << "UIToFPInst"; break; + case Instruction::SIToFP: Out << "SIToFPInst"; break; + case Instruction::PtrToInt: Out << "PtrToIntInst"; break; + case Instruction::IntToPtr: Out << "IntToPtrInst"; break; + case Instruction::BitCast: Out << "BitCastInst"; break; + default: assert(!"Unreachable"); break; + } + Out << "(" << opNames[0] << ", " + << getCppName(cst->getType()) << ", \""; + printEscapedString(cst->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::Call: { + const CallInst* call = cast<CallInst>(I); + if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) { + Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get(" + << getCppName(ila->getFunctionType()) << ", \"" + << ila->getAsmString() << "\", \"" + << ila->getConstraintString() << "\"," + << (ila->hasSideEffects() ? "true" : "false") << ");"; nl(Out); - for (unsigned i = 0; i < phi->getNumOperands(); i+=2) { - Out << iName << "->addIncoming(" - << opNames[i] << ", " << opNames[i+1] << ");"; - nl(Out); - } - break; - } - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: { - const CastInst* cst = cast<CastInst>(I); - Out << "CastInst* " << iName << " = new "; - switch (I->getOpcode()) { - case Instruction::Trunc: Out << "TruncInst"; break; - case Instruction::ZExt: Out << "ZExtInst"; break; - case Instruction::SExt: Out << "SExtInst"; break; - case Instruction::FPTrunc: Out << "FPTruncInst"; break; - case Instruction::FPExt: Out << "FPExtInst"; break; - case Instruction::FPToUI: Out << "FPToUIInst"; break; - case Instruction::FPToSI: Out << "FPToSIInst"; break; - case Instruction::UIToFP: Out << "UIToFPInst"; break; - case Instruction::SIToFP: Out << "SIToFPInst"; break; - case Instruction::PtrToInt: Out << "PtrToIntInst"; break; - case Instruction::IntToPtr: Out << "IntToPtrInst"; break; - case Instruction::BitCast: Out << "BitCastInst"; break; - default: assert(!"Unreachable"); break; - } - Out << "(" << opNames[0] << ", " - << getCppName(cst->getType()) << ", \""; - printEscapedString(cst->getName()); - Out << "\", " << bbname << ");"; - break; } - case Instruction::Call:{ - const CallInst* call = cast<CallInst>(I); - if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) { - Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get(" - << getCppName(ila->getFunctionType()) << ", \"" - << ila->getAsmString() << "\", \"" - << ila->getConstraintString() << "\"," - << (ila->hasSideEffects() ? "true" : "false") << ");"; - nl(Out); - } - if (call->getNumOperands() > 2) { - Out << "std::vector<Value*> " << iName << "_params;"; + if (call->getNumArgOperands() > 1) { + Out << "std::vector<Value*> " << iName << "_params;"; + nl(Out); + for (unsigned i = 0; i < call->getNumArgOperands(); ++i) { + Out << iName << "_params.push_back(" << opNames[i] << ");"; nl(Out); - for (unsigned i = 1; i < call->getNumOperands(); ++i) { - Out << iName << "_params.push_back(" << opNames[i] << ");"; - nl(Out); - } - Out << "CallInst* " << iName << " = CallInst::Create(" - << opNames[0] << ", " << iName << "_params.begin(), " - << iName << "_params.end(), \""; - } else if (call->getNumOperands() == 2) { - Out << "CallInst* " << iName << " = CallInst::Create(" - << opNames[0] << ", " << opNames[1] << ", \""; - } else { - Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[0] - << ", \""; } - printEscapedString(call->getName()); - Out << "\", " << bbname << ");"; - nl(Out) << iName << "->setCallingConv("; - printCallingConv(call->getCallingConv()); - Out << ");"; - nl(Out) << iName << "->setTailCall(" - << (call->isTailCall() ? "true":"false"); - Out << ");"; - printAttributes(call->getAttributes(), iName); - Out << iName << "->setAttributes(" << iName << "_PAL);"; - nl(Out); - break; - } - case Instruction::Select: { - const SelectInst* sel = cast<SelectInst>(I); - Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create("; - Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \""; - printEscapedString(sel->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::UserOp1: - /// FALL THROUGH - case Instruction::UserOp2: { - /// FIXME: What should be done here? - break; - } - case Instruction::VAArg: { - const VAArgInst* va = cast<VAArgInst>(I); - Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst(" - << opNames[0] << ", " << getCppName(va->getType()) << ", \""; - printEscapedString(va->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::ExtractElement: { - const ExtractElementInst* eei = cast<ExtractElementInst>(I); - Out << "ExtractElementInst* " << getCppName(eei) - << " = new ExtractElementInst(" << opNames[0] - << ", " << opNames[1] << ", \""; - printEscapedString(eei->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::InsertElement: { - const InsertElementInst* iei = cast<InsertElementInst>(I); - Out << "InsertElementInst* " << getCppName(iei) - << " = InsertElementInst::Create(" << opNames[0] - << ", " << opNames[1] << ", " << opNames[2] << ", \""; - printEscapedString(iei->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::ShuffleVector: { - const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I); - Out << "ShuffleVectorInst* " << getCppName(svi) - << " = new ShuffleVectorInst(" << opNames[0] - << ", " << opNames[1] << ", " << opNames[2] << ", \""; - printEscapedString(svi->getName()); - Out << "\", " << bbname << ");"; - break; + Out << "CallInst* " << iName << " = CallInst::Create(" + << opNames[call->getNumArgOperands()] << ", " << iName << "_params.begin(), " + << iName << "_params.end(), \""; + } else if (call->getNumArgOperands() == 1) { + Out << "CallInst* " << iName << " = CallInst::Create(" + << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \""; + } else { + Out << "CallInst* " << iName << " = CallInst::Create(" + << opNames[call->getNumArgOperands()] << ", \""; } - case Instruction::ExtractValue: { - const ExtractValueInst *evi = cast<ExtractValueInst>(I); - Out << "std::vector<unsigned> " << iName << "_indices;"; + printEscapedString(call->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->setCallingConv("; + printCallingConv(call->getCallingConv()); + Out << ");"; + nl(Out) << iName << "->setTailCall(" + << (call->isTailCall() ? "true" : "false"); + Out << ");"; + nl(Out); + printAttributes(call->getAttributes(), iName); + Out << iName << "->setAttributes(" << iName << "_PAL);"; + nl(Out); + break; + } + case Instruction::Select: { + const SelectInst* sel = cast<SelectInst>(I); + Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create("; + Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(sel->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::UserOp1: + /// FALL THROUGH + case Instruction::UserOp2: { + /// FIXME: What should be done here? + break; + } + case Instruction::VAArg: { + const VAArgInst* va = cast<VAArgInst>(I); + Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst(" + << opNames[0] << ", " << getCppName(va->getType()) << ", \""; + printEscapedString(va->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ExtractElement: { + const ExtractElementInst* eei = cast<ExtractElementInst>(I); + Out << "ExtractElementInst* " << getCppName(eei) + << " = new ExtractElementInst(" << opNames[0] + << ", " << opNames[1] << ", \""; + printEscapedString(eei->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::InsertElement: { + const InsertElementInst* iei = cast<InsertElementInst>(I); + Out << "InsertElementInst* " << getCppName(iei) + << " = InsertElementInst::Create(" << opNames[0] + << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(iei->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ShuffleVector: { + const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I); + Out << "ShuffleVectorInst* " << getCppName(svi) + << " = new ShuffleVectorInst(" << opNames[0] + << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(svi->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ExtractValue: { + const ExtractValueInst *evi = cast<ExtractValueInst>(I); + Out << "std::vector<unsigned> " << iName << "_indices;"; + nl(Out); + for (unsigned i = 0; i < evi->getNumIndices(); ++i) { + Out << iName << "_indices.push_back(" + << evi->idx_begin()[i] << ");"; nl(Out); - for (unsigned i = 0; i < evi->getNumIndices(); ++i) { - Out << iName << "_indices.push_back(" - << evi->idx_begin()[i] << ");"; - nl(Out); - } - Out << "ExtractValueInst* " << getCppName(evi) - << " = ExtractValueInst::Create(" << opNames[0] - << ", " - << iName << "_indices.begin(), " << iName << "_indices.end(), \""; - printEscapedString(evi->getName()); - Out << "\", " << bbname << ");"; - break; } - case Instruction::InsertValue: { - const InsertValueInst *ivi = cast<InsertValueInst>(I); - Out << "std::vector<unsigned> " << iName << "_indices;"; + Out << "ExtractValueInst* " << getCppName(evi) + << " = ExtractValueInst::Create(" << opNames[0] + << ", " + << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + printEscapedString(evi->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::InsertValue: { + const InsertValueInst *ivi = cast<InsertValueInst>(I); + Out << "std::vector<unsigned> " << iName << "_indices;"; + nl(Out); + for (unsigned i = 0; i < ivi->getNumIndices(); ++i) { + Out << iName << "_indices.push_back(" + << ivi->idx_begin()[i] << ");"; nl(Out); - for (unsigned i = 0; i < ivi->getNumIndices(); ++i) { - Out << iName << "_indices.push_back(" - << ivi->idx_begin()[i] << ");"; - nl(Out); - } - Out << "InsertValueInst* " << getCppName(ivi) - << " = InsertValueInst::Create(" << opNames[0] - << ", " << opNames[1] << ", " - << iName << "_indices.begin(), " << iName << "_indices.end(), \""; - printEscapedString(ivi->getName()); - Out << "\", " << bbname << ");"; - break; } + Out << "InsertValueInst* " << getCppName(ivi) + << " = InsertValueInst::Create(" << opNames[0] + << ", " << opNames[1] << ", " + << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + printEscapedString(ivi->getName()); + Out << "\", " << bbname << ");"; + break; + } } DefinedValues.insert(I); nl(Out); delete [] opNames; } - // Print out the types, constants and declarations needed by one function - void CppWriter::printFunctionUses(const Function* F) { - nl(Out) << "// Type Definitions"; nl(Out); - if (!is_inline) { - // Print the function's return type - printType(F->getReturnType()); +// Print out the types, constants and declarations needed by one function +void CppWriter::printFunctionUses(const Function* F) { + nl(Out) << "// Type Definitions"; nl(Out); + if (!is_inline) { + // Print the function's return type + printType(F->getReturnType()); - // Print the function's function type - printType(F->getFunctionType()); + // Print the function's function type + printType(F->getFunctionType()); - // Print the types of each of the function's arguments - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - printType(AI->getType()); - } + // Print the types of each of the function's arguments + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + printType(AI->getType()); } + } - // Print type definitions for every type referenced by an instruction and - // make a note of any global values or constants that are referenced - SmallPtrSet<GlobalValue*,64> gvs; - SmallPtrSet<Constant*,64> consts; - for (Function::const_iterator BB = F->begin(), BE = F->end(); - BB != BE; ++BB){ - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - // Print the type of the instruction itself - printType(I->getType()); + // Print type definitions for every type referenced by an instruction and + // make a note of any global values or constants that are referenced + SmallPtrSet<GlobalValue*,64> gvs; + SmallPtrSet<Constant*,64> consts; + for (Function::const_iterator BB = F->begin(), BE = F->end(); + BB != BE; ++BB){ + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + // Print the type of the instruction itself + printType(I->getType()); - // Print the type of each of the instruction's operands - for (unsigned i = 0; i < I->getNumOperands(); ++i) { - Value* operand = I->getOperand(i); - printType(operand->getType()); - - // If the operand references a GVal or Constant, make a note of it - if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) { - gvs.insert(GV); - if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - if (GVar->hasInitializer()) - consts.insert(GVar->getInitializer()); - } else if (Constant* C = dyn_cast<Constant>(operand)) - consts.insert(C); - } + // Print the type of each of the instruction's operands + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + Value* operand = I->getOperand(i); + printType(operand->getType()); + + // If the operand references a GVal or Constant, make a note of it + if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) { + gvs.insert(GV); + if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + if (GVar->hasInitializer()) + consts.insert(GVar->getInitializer()); + } else if (Constant* C = dyn_cast<Constant>(operand)) + consts.insert(C); } } + } - // Print the function declarations for any functions encountered - nl(Out) << "// Function Declarations"; nl(Out); - for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (Function* Fun = dyn_cast<Function>(*I)) { - if (!is_inline || Fun != F) - printFunctionHead(Fun); - } + // Print the function declarations for any functions encountered + nl(Out) << "// Function Declarations"; nl(Out); + for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (Function* Fun = dyn_cast<Function>(*I)) { + if (!is_inline || Fun != F) + printFunctionHead(Fun); } + } - // Print the global variable declarations for any variables encountered - nl(Out) << "// Global Variable Declarations"; nl(Out); - for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I)) - printVariableHead(F); - } + // Print the global variable declarations for any variables encountered + nl(Out) << "// Global Variable Declarations"; nl(Out); + for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I)) + printVariableHead(F); + } - // Print the constants found - nl(Out) << "// Constant Definitions"; nl(Out); - for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(), - E = consts.end(); I != E; ++I) { - printConstant(*I); - } +// Print the constants found + nl(Out) << "// Constant Definitions"; nl(Out); + for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(), + E = consts.end(); I != E; ++I) { + printConstant(*I); + } - // Process the global variables definitions now that all the constants have - // been emitted. These definitions just couple the gvars with their constant - // initializers. - nl(Out) << "// Global Variable Definitions"; nl(Out); - for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I)) - printVariableBody(GV); - } + // Process the global variables definitions now that all the constants have + // been emitted. These definitions just couple the gvars with their constant + // initializers. + nl(Out) << "// Global Variable Definitions"; nl(Out); + for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I)) + printVariableBody(GV); } +} - void CppWriter::printFunctionHead(const Function* F) { - nl(Out) << "Function* " << getCppName(F); - if (is_inline) { - Out << " = mod->getFunction(\""; - printEscapedString(F->getName()); - Out << "\", " << getCppName(F->getFunctionType()) << ");"; - nl(Out) << "if (!" << getCppName(F) << ") {"; - nl(Out) << getCppName(F); - } - Out<< " = Function::Create("; - nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ","; - nl(Out) << "/*Linkage=*/"; - printLinkageType(F->getLinkage()); - Out << ","; - nl(Out) << "/*Name=*/\""; +void CppWriter::printFunctionHead(const Function* F) { + nl(Out) << "Function* " << getCppName(F); + if (is_inline) { + Out << " = mod->getFunction(\""; printEscapedString(F->getName()); - Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : ""); - nl(Out,-1); + Out << "\", " << getCppName(F->getFunctionType()) << ");"; + nl(Out) << "if (!" << getCppName(F) << ") {"; + nl(Out) << getCppName(F); + } + Out<< " = Function::Create("; + nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ","; + nl(Out) << "/*Linkage=*/"; + printLinkageType(F->getLinkage()); + Out << ","; + nl(Out) << "/*Name=*/\""; + printEscapedString(F->getName()); + Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : ""); + nl(Out,-1); + printCppName(F); + Out << "->setCallingConv("; + printCallingConv(F->getCallingConv()); + Out << ");"; + nl(Out); + if (F->hasSection()) { + printCppName(F); + Out << "->setSection(\"" << F->getSection() << "\");"; + nl(Out); + } + if (F->getAlignment()) { + printCppName(F); + Out << "->setAlignment(" << F->getAlignment() << ");"; + nl(Out); + } + if (F->getVisibility() != GlobalValue::DefaultVisibility) { printCppName(F); - Out << "->setCallingConv("; - printCallingConv(F->getCallingConv()); + Out << "->setVisibility("; + printVisibilityType(F->getVisibility()); Out << ");"; nl(Out); - if (F->hasSection()) { - printCppName(F); - Out << "->setSection(\"" << F->getSection() << "\");"; - nl(Out); - } - if (F->getAlignment()) { - printCppName(F); - Out << "->setAlignment(" << F->getAlignment() << ");"; - nl(Out); - } - if (F->getVisibility() != GlobalValue::DefaultVisibility) { - printCppName(F); - Out << "->setVisibility("; - printVisibilityType(F->getVisibility()); - Out << ");"; - nl(Out); - } - if (F->hasGC()) { - printCppName(F); - Out << "->setGC(\"" << F->getGC() << "\");"; - nl(Out); - } - if (is_inline) { - Out << "}"; - nl(Out); - } - printAttributes(F->getAttributes(), getCppName(F)); + } + if (F->hasGC()) { printCppName(F); - Out << "->setAttributes(" << getCppName(F) << "_PAL);"; + Out << "->setGC(\"" << F->getGC() << "\");"; nl(Out); } + if (is_inline) { + Out << "}"; + nl(Out); + } + printAttributes(F->getAttributes(), getCppName(F)); + printCppName(F); + Out << "->setAttributes(" << getCppName(F) << "_PAL);"; + nl(Out); +} - void CppWriter::printFunctionBody(const Function *F) { - if (F->isDeclaration()) - return; // external functions have no bodies. - - // Clear the DefinedValues and ForwardRefs maps because we can't have - // cross-function forward refs - ForwardRefs.clear(); - DefinedValues.clear(); +void CppWriter::printFunctionBody(const Function *F) { + if (F->isDeclaration()) + return; // external functions have no bodies. - // Create all the argument values - if (!is_inline) { - if (!F->arg_empty()) { - Out << "Function::arg_iterator args = " << getCppName(F) - << "->arg_begin();"; - nl(Out); - } - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - Out << "Value* " << getCppName(AI) << " = args++;"; - nl(Out); - if (AI->hasName()) { - Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");"; - nl(Out); - } - } - } + // Clear the DefinedValues and ForwardRefs maps because we can't have + // cross-function forward refs + ForwardRefs.clear(); + DefinedValues.clear(); - // Create all the basic blocks - nl(Out); - for (Function::const_iterator BI = F->begin(), BE = F->end(); - BI != BE; ++BI) { - std::string bbname(getCppName(BI)); - Out << "BasicBlock* " << bbname << - " = BasicBlock::Create(mod->getContext(), \""; - if (BI->hasName()) - printEscapedString(BI->getName()); - Out << "\"," << getCppName(BI->getParent()) << ",0);"; + // Create all the argument values + if (!is_inline) { + if (!F->arg_empty()) { + Out << "Function::arg_iterator args = " << getCppName(F) + << "->arg_begin();"; nl(Out); } - - // Output all of its basic blocks... for the function - for (Function::const_iterator BI = F->begin(), BE = F->end(); - BI != BE; ++BI) { - std::string bbname(getCppName(BI)); - nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")"; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + Out << "Value* " << getCppName(AI) << " = args++;"; nl(Out); - - // Output all of the instructions in the basic block... - for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); - I != E; ++I) { - printInstruction(I,bbname); + if (AI->hasName()) { + Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");"; + nl(Out); } } + } - // Loop over the ForwardRefs and resolve them now that all instructions - // are generated. - if (!ForwardRefs.empty()) { - nl(Out) << "// Resolve Forward References"; - nl(Out); - } + // Create all the basic blocks + nl(Out); + for (Function::const_iterator BI = F->begin(), BE = F->end(); + BI != BE; ++BI) { + std::string bbname(getCppName(BI)); + Out << "BasicBlock* " << bbname << + " = BasicBlock::Create(mod->getContext(), \""; + if (BI->hasName()) + printEscapedString(BI->getName()); + Out << "\"," << getCppName(BI->getParent()) << ",0);"; + nl(Out); + } - while (!ForwardRefs.empty()) { - ForwardRefMap::iterator I = ForwardRefs.begin(); - Out << I->second << "->replaceAllUsesWith(" - << getCppName(I->first) << "); delete " << I->second << ";"; - nl(Out); - ForwardRefs.erase(I); + // Output all of its basic blocks... for the function + for (Function::const_iterator BI = F->begin(), BE = F->end(); + BI != BE; ++BI) { + std::string bbname(getCppName(BI)); + nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")"; + nl(Out); + + // Output all of the instructions in the basic block... + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); + I != E; ++I) { + printInstruction(I,bbname); } } - void CppWriter::printInline(const std::string& fname, - const std::string& func) { - const Function* F = TheModule->getFunction(func); - if (!F) { - error(std::string("Function '") + func + "' not found in input module"); - return; - } - if (F->isDeclaration()) { - error(std::string("Function '") + func + "' is external!"); - return; - } - nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *" - << getCppName(F); - unsigned arg_count = 1; - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - Out << ", Value* arg_" << arg_count; - } - Out << ") {"; + // Loop over the ForwardRefs and resolve them now that all instructions + // are generated. + if (!ForwardRefs.empty()) { + nl(Out) << "// Resolve Forward References"; nl(Out); - is_inline = true; - printFunctionUses(F); - printFunctionBody(F); - is_inline = false; - Out << "return " << getCppName(F->begin()) << ";"; - nl(Out) << "}"; + } + + while (!ForwardRefs.empty()) { + ForwardRefMap::iterator I = ForwardRefs.begin(); + Out << I->second << "->replaceAllUsesWith(" + << getCppName(I->first) << "); delete " << I->second << ";"; nl(Out); + ForwardRefs.erase(I); } +} - void CppWriter::printModuleBody() { - // Print out all the type definitions - nl(Out) << "// Type Definitions"; nl(Out); - printTypes(TheModule); - - // Functions can call each other and global variables can reference them so - // define all the functions first before emitting their function bodies. - nl(Out) << "// Function Declarations"; nl(Out); - for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); - I != E; ++I) - printFunctionHead(I); - - // Process the global variables declarations. We can't initialze them until - // after the constants are printed so just print a header for each global - nl(Out) << "// Global Variable Declarations\n"; nl(Out); - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - printVariableHead(I); - } +void CppWriter::printInline(const std::string& fname, + const std::string& func) { + const Function* F = TheModule->getFunction(func); + if (!F) { + error(std::string("Function '") + func + "' not found in input module"); + return; + } + if (F->isDeclaration()) { + error(std::string("Function '") + func + "' is external!"); + return; + } + nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *" + << getCppName(F); + unsigned arg_count = 1; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + Out << ", Value* arg_" << arg_count; + } + Out << ") {"; + nl(Out); + is_inline = true; + printFunctionUses(F); + printFunctionBody(F); + is_inline = false; + Out << "return " << getCppName(F->begin()) << ";"; + nl(Out) << "}"; + nl(Out); +} - // Print out all the constants definitions. Constants don't recurse except - // through GlobalValues. All GlobalValues have been declared at this point - // so we can proceed to generate the constants. - nl(Out) << "// Constant Definitions"; nl(Out); - printConstants(TheModule); - - // Process the global variables definitions now that all the constants have - // been emitted. These definitions just couple the gvars with their constant - // initializers. - nl(Out) << "// Global Variable Definitions"; nl(Out); - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - printVariableBody(I); - } +void CppWriter::printModuleBody() { + // Print out all the type definitions + nl(Out) << "// Type Definitions"; nl(Out); + printTypes(TheModule); + + // Functions can call each other and global variables can reference them so + // define all the functions first before emitting their function bodies. + nl(Out) << "// Function Declarations"; nl(Out); + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) + printFunctionHead(I); + + // Process the global variables declarations. We can't initialze them until + // after the constants are printed so just print a header for each global + nl(Out) << "// Global Variable Declarations\n"; nl(Out); + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + printVariableHead(I); + } - // Finally, we can safely put out all of the function bodies. - nl(Out) << "// Function Definitions"; nl(Out); - for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); - I != E; ++I) { - if (!I->isDeclaration()) { - nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I) - << ")"; - nl(Out) << "{"; - nl(Out,1); - printFunctionBody(I); - nl(Out,-1) << "}"; - nl(Out); - } - } + // Print out all the constants definitions. Constants don't recurse except + // through GlobalValues. All GlobalValues have been declared at this point + // so we can proceed to generate the constants. + nl(Out) << "// Constant Definitions"; nl(Out); + printConstants(TheModule); + + // Process the global variables definitions now that all the constants have + // been emitted. These definitions just couple the gvars with their constant + // initializers. + nl(Out) << "// Global Variable Definitions"; nl(Out); + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + printVariableBody(I); } - void CppWriter::printProgram(const std::string& fname, - const std::string& mName) { - Out << "#include <llvm/LLVMContext.h>\n"; - Out << "#include <llvm/Module.h>\n"; - Out << "#include <llvm/DerivedTypes.h>\n"; - Out << "#include <llvm/Constants.h>\n"; - Out << "#include <llvm/GlobalVariable.h>\n"; - Out << "#include <llvm/Function.h>\n"; - Out << "#include <llvm/CallingConv.h>\n"; - Out << "#include <llvm/BasicBlock.h>\n"; - Out << "#include <llvm/Instructions.h>\n"; - Out << "#include <llvm/InlineAsm.h>\n"; - Out << "#include <llvm/Support/FormattedStream.h>\n"; - Out << "#include <llvm/Support/MathExtras.h>\n"; - Out << "#include <llvm/Pass.h>\n"; - Out << "#include <llvm/PassManager.h>\n"; - Out << "#include <llvm/ADT/SmallVector.h>\n"; - Out << "#include <llvm/Analysis/Verifier.h>\n"; - Out << "#include <llvm/Assembly/PrintModulePass.h>\n"; - Out << "#include <algorithm>\n"; - Out << "using namespace llvm;\n\n"; - Out << "Module* " << fname << "();\n\n"; - Out << "int main(int argc, char**argv) {\n"; - Out << " Module* Mod = " << fname << "();\n"; - Out << " verifyModule(*Mod, PrintMessageAction);\n"; - Out << " PassManager PM;\n"; - Out << " PM.add(createPrintModulePass(&outs()));\n"; - Out << " PM.run(*Mod);\n"; - Out << " return 0;\n"; - Out << "}\n\n"; - printModule(fname,mName); - } - - void CppWriter::printModule(const std::string& fname, - const std::string& mName) { - nl(Out) << "Module* " << fname << "() {"; - nl(Out,1) << "// Module Construction"; - nl(Out) << "Module* mod = new Module(\""; - printEscapedString(mName); - Out << "\", getGlobalContext());"; - if (!TheModule->getTargetTriple().empty()) { - nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");"; - } - if (!TheModule->getTargetTriple().empty()) { - nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple() - << "\");"; + // Finally, we can safely put out all of the function bodies. + nl(Out) << "// Function Definitions"; nl(Out); + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) { + if (!I->isDeclaration()) { + nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I) + << ")"; + nl(Out) << "{"; + nl(Out,1); + printFunctionBody(I); + nl(Out,-1) << "}"; + nl(Out); } + } +} - if (!TheModule->getModuleInlineAsm().empty()) { - nl(Out) << "mod->setModuleInlineAsm(\""; - printEscapedString(TheModule->getModuleInlineAsm()); - Out << "\");"; - } - nl(Out); +void CppWriter::printProgram(const std::string& fname, + const std::string& mName) { + Out << "#include <llvm/LLVMContext.h>\n"; + Out << "#include <llvm/Module.h>\n"; + Out << "#include <llvm/DerivedTypes.h>\n"; + Out << "#include <llvm/Constants.h>\n"; + Out << "#include <llvm/GlobalVariable.h>\n"; + Out << "#include <llvm/Function.h>\n"; + Out << "#include <llvm/CallingConv.h>\n"; + Out << "#include <llvm/BasicBlock.h>\n"; + Out << "#include <llvm/Instructions.h>\n"; + Out << "#include <llvm/InlineAsm.h>\n"; + Out << "#include <llvm/Support/FormattedStream.h>\n"; + Out << "#include <llvm/Support/MathExtras.h>\n"; + Out << "#include <llvm/Pass.h>\n"; + Out << "#include <llvm/PassManager.h>\n"; + Out << "#include <llvm/ADT/SmallVector.h>\n"; + Out << "#include <llvm/Analysis/Verifier.h>\n"; + Out << "#include <llvm/Assembly/PrintModulePass.h>\n"; + Out << "#include <algorithm>\n"; + Out << "using namespace llvm;\n\n"; + Out << "Module* " << fname << "();\n\n"; + Out << "int main(int argc, char**argv) {\n"; + Out << " Module* Mod = " << fname << "();\n"; + Out << " verifyModule(*Mod, PrintMessageAction);\n"; + Out << " PassManager PM;\n"; + Out << " PM.add(createPrintModulePass(&outs()));\n"; + Out << " PM.run(*Mod);\n"; + Out << " return 0;\n"; + Out << "}\n\n"; + printModule(fname,mName); +} - // Loop over the dependent libraries and emit them. - Module::lib_iterator LI = TheModule->lib_begin(); - Module::lib_iterator LE = TheModule->lib_end(); - while (LI != LE) { - Out << "mod->addLibrary(\"" << *LI << "\");"; - nl(Out); - ++LI; - } - printModuleBody(); - nl(Out) << "return mod;"; - nl(Out,-1) << "}"; +void CppWriter::printModule(const std::string& fname, + const std::string& mName) { + nl(Out) << "Module* " << fname << "() {"; + nl(Out,1) << "// Module Construction"; + nl(Out) << "Module* mod = new Module(\""; + printEscapedString(mName); + Out << "\", getGlobalContext());"; + if (!TheModule->getTargetTriple().empty()) { + nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");"; + } + if (!TheModule->getTargetTriple().empty()) { + nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple() + << "\");"; + } + + if (!TheModule->getModuleInlineAsm().empty()) { + nl(Out) << "mod->setModuleInlineAsm(\""; + printEscapedString(TheModule->getModuleInlineAsm()); + Out << "\");"; + } + nl(Out); + + // Loop over the dependent libraries and emit them. + Module::lib_iterator LI = TheModule->lib_begin(); + Module::lib_iterator LE = TheModule->lib_end(); + while (LI != LE) { + Out << "mod->addLibrary(\"" << *LI << "\");"; nl(Out); + ++LI; } + printModuleBody(); + nl(Out) << "return mod;"; + nl(Out,-1) << "}"; + nl(Out); +} + +void CppWriter::printContents(const std::string& fname, + const std::string& mName) { + Out << "\nModule* " << fname << "(Module *mod) {\n"; + Out << "\nmod->setModuleIdentifier(\""; + printEscapedString(mName); + Out << "\");\n"; + printModuleBody(); + Out << "\nreturn mod;\n"; + Out << "\n}\n"; +} - void CppWriter::printContents(const std::string& fname, - const std::string& mName) { - Out << "\nModule* " << fname << "(Module *mod) {\n"; - Out << "\nmod->setModuleIdentifier(\""; - printEscapedString(mName); - Out << "\");\n"; - printModuleBody(); - Out << "\nreturn mod;\n"; - Out << "\n}\n"; +void CppWriter::printFunction(const std::string& fname, + const std::string& funcName) { + const Function* F = TheModule->getFunction(funcName); + if (!F) { + error(std::string("Function '") + funcName + "' not found in input module"); + return; } + Out << "\nFunction* " << fname << "(Module *mod) {\n"; + printFunctionUses(F); + printFunctionHead(F); + printFunctionBody(F); + Out << "return " << getCppName(F) << ";\n"; + Out << "}\n"; +} - void CppWriter::printFunction(const std::string& fname, - const std::string& funcName) { - const Function* F = TheModule->getFunction(funcName); - if (!F) { - error(std::string("Function '") + funcName + "' not found in input module"); - return; - } - Out << "\nFunction* " << fname << "(Module *mod) {\n"; - printFunctionUses(F); - printFunctionHead(F); - printFunctionBody(F); - Out << "return " << getCppName(F) << ";\n"; - Out << "}\n"; - } - - void CppWriter::printFunctions() { - const Module::FunctionListType &funcs = TheModule->getFunctionList(); - Module::const_iterator I = funcs.begin(); - Module::const_iterator IE = funcs.end(); - - for (; I != IE; ++I) { - const Function &func = *I; - if (!func.isDeclaration()) { - std::string name("define_"); - name += func.getName(); - printFunction(name, func.getName()); - } +void CppWriter::printFunctions() { + const Module::FunctionListType &funcs = TheModule->getFunctionList(); + Module::const_iterator I = funcs.begin(); + Module::const_iterator IE = funcs.end(); + + for (; I != IE; ++I) { + const Function &func = *I; + if (!func.isDeclaration()) { + std::string name("define_"); + name += func.getName(); + printFunction(name, func.getName()); } } +} - void CppWriter::printVariable(const std::string& fname, - const std::string& varName) { - const GlobalVariable* GV = TheModule->getNamedGlobal(varName); +void CppWriter::printVariable(const std::string& fname, + const std::string& varName) { + const GlobalVariable* GV = TheModule->getNamedGlobal(varName); - if (!GV) { - error(std::string("Variable '") + varName + "' not found in input module"); - return; - } - Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n"; - printVariableUses(GV); - printVariableHead(GV); - printVariableBody(GV); - Out << "return " << getCppName(GV) << ";\n"; - Out << "}\n"; - } - - void CppWriter::printType(const std::string& fname, - const std::string& typeName) { - const Type* Ty = TheModule->getTypeByName(typeName); - if (!Ty) { - error(std::string("Type '") + typeName + "' not found in input module"); - return; - } - Out << "\nType* " << fname << "(Module *mod) {\n"; - printType(Ty); - Out << "return " << getCppName(Ty) << ";\n"; - Out << "}\n"; - } - - bool CppWriter::runOnModule(Module &M) { - TheModule = &M; - - // Emit a header - Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n"; - - // Get the name of the function we're supposed to generate - std::string fname = FuncName.getValue(); - - // Get the name of the thing we are to generate - std::string tgtname = NameToGenerate.getValue(); - if (GenerationType == GenModule || - GenerationType == GenContents || - GenerationType == GenProgram || - GenerationType == GenFunctions) { - if (tgtname == "!bad!") { - if (M.getModuleIdentifier() == "-") - tgtname = "<stdin>"; - else - tgtname = M.getModuleIdentifier(); - } - } else if (tgtname == "!bad!") - error("You must use the -for option with -gen-{function,variable,type}"); - - switch (WhatToGenerate(GenerationType)) { - case GenProgram: - if (fname.empty()) - fname = "makeLLVMModule"; - printProgram(fname,tgtname); - break; - case GenModule: - if (fname.empty()) - fname = "makeLLVMModule"; - printModule(fname,tgtname); - break; - case GenContents: - if (fname.empty()) - fname = "makeLLVMModuleContents"; - printContents(fname,tgtname); - break; - case GenFunction: - if (fname.empty()) - fname = "makeLLVMFunction"; - printFunction(fname,tgtname); - break; - case GenFunctions: - printFunctions(); - break; - case GenInline: - if (fname.empty()) - fname = "makeLLVMInline"; - printInline(fname,tgtname); - break; - case GenVariable: - if (fname.empty()) - fname = "makeLLVMVariable"; - printVariable(fname,tgtname); - break; - case GenType: - if (fname.empty()) - fname = "makeLLVMType"; - printType(fname,tgtname); - break; - default: - error("Invalid generation option"); - } + if (!GV) { + error(std::string("Variable '") + varName + "' not found in input module"); + return; + } + Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n"; + printVariableUses(GV); + printVariableHead(GV); + printVariableBody(GV); + Out << "return " << getCppName(GV) << ";\n"; + Out << "}\n"; +} - return false; +void CppWriter::printType(const std::string& fname, + const std::string& typeName) { + const Type* Ty = TheModule->getTypeByName(typeName); + if (!Ty) { + error(std::string("Type '") + typeName + "' not found in input module"); + return; } + Out << "\nType* " << fname << "(Module *mod) {\n"; + printType(Ty); + Out << "return " << getCppName(Ty) << ";\n"; + Out << "}\n"; +} + +bool CppWriter::runOnModule(Module &M) { + TheModule = &M; + + // Emit a header + Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n"; + + // Get the name of the function we're supposed to generate + std::string fname = FuncName.getValue(); + + // Get the name of the thing we are to generate + std::string tgtname = NameToGenerate.getValue(); + if (GenerationType == GenModule || + GenerationType == GenContents || + GenerationType == GenProgram || + GenerationType == GenFunctions) { + if (tgtname == "!bad!") { + if (M.getModuleIdentifier() == "-") + tgtname = "<stdin>"; + else + tgtname = M.getModuleIdentifier(); + } + } else if (tgtname == "!bad!") + error("You must use the -for option with -gen-{function,variable,type}"); + + switch (WhatToGenerate(GenerationType)) { + case GenProgram: + if (fname.empty()) + fname = "makeLLVMModule"; + printProgram(fname,tgtname); + break; + case GenModule: + if (fname.empty()) + fname = "makeLLVMModule"; + printModule(fname,tgtname); + break; + case GenContents: + if (fname.empty()) + fname = "makeLLVMModuleContents"; + printContents(fname,tgtname); + break; + case GenFunction: + if (fname.empty()) + fname = "makeLLVMFunction"; + printFunction(fname,tgtname); + break; + case GenFunctions: + printFunctions(); + break; + case GenInline: + if (fname.empty()) + fname = "makeLLVMInline"; + printInline(fname,tgtname); + break; + case GenVariable: + if (fname.empty()) + fname = "makeLLVMVariable"; + printVariable(fname,tgtname); + break; + case GenType: + if (fname.empty()) + fname = "makeLLVMType"; + printType(fname,tgtname); + break; + default: + error("Invalid generation option"); + } + + return false; } char CppWriter::ID = 0; diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp index e42e9b3..b6e4d65 100644 --- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp @@ -145,8 +145,9 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { const MachineFrameInfo *MFI = MF->getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(CSI[i].getReg()); - if (CSI[i].getRegClass() == MBlaze::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg); + if (MBlaze::CPURegsRegisterClass->contains(Reg)) CPUBitmask |= (1 << RegNum); } diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 23889b1..1730b68 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -234,6 +234,24 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineRegisterInfo &R = F->getRegInfo(); MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loop); + F->insert(It, finish); + + // Update machine-CFG edges by transfering adding all successors and + // remaining instructions from the current block to the new block which + // will contain the Phi node for the select. + finish->splice(finish->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + finish->transferSuccessorsAndUpdatePHIs(BB); + + // Add the true and fallthrough blocks as its successors. + BB->addSuccessor(loop); + BB->addSuccessor(finish); + + // Next, add the finish block as a successor of the loop block + loop->addSuccessor(finish); + loop->addSuccessor(loop); unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT) @@ -249,26 +267,6 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(IAMT) .addMBB(finish); - F->insert(It, loop); - F->insert(It, finish); - - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - finish->addSuccessor(*i); - - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); - BB->addSuccessor(loop); - BB->addSuccessor(finish); - - // Next, add the finish block as a successor of the loop block - loop->addSuccessor(finish); - loop->addSuccessor(loop); - unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); BuildMI(loop, dl, TII->get(MBlaze::PHI), DST) @@ -298,12 +296,13 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(NAMT) .addMBB(loop); - BuildMI(finish, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + BuildMI(*finish, finish->begin(), dl, + TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) .addReg(IVAL).addMBB(BB) .addReg(NDST).addMBB(loop); // The pseudo instruction is no longer needed so remove it - F->DeleteMachineInstr(MI); + MI->eraseFromParent(); return finish; } @@ -338,27 +337,23 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case MBlazeCC::LE: Opc = MBlaze::BGTID; break; } - BuildMI(BB, dl, TII->get(Opc)) - .addReg(MI->getOperand(3).getReg()) - .addMBB(dneBB); - F->insert(It, flsBB); F->insert(It, dneBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - dneBB->addSuccessor(*i); + // Transfer the remainder of BB and its successor edges to dneBB. + dneBB->splice(dneBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + dneBB->transferSuccessorsAndUpdatePHIs(BB); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); BB->addSuccessor(flsBB); BB->addSuccessor(dneBB); flsBB->addSuccessor(dneBB); + BuildMI(BB, dl, TII->get(Opc)) + .addReg(MI->getOperand(3).getReg()) + .addMBB(dneBB); + // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... @@ -366,11 +361,12 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB) // .addReg(MI->getOperand(2).getReg()).addMBB(BB); - BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + BuildMI(*dneBB, dneBB->begin(), dl, + TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(flsBB) .addReg(MI->getOperand(1).getReg()).addMBB(BB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return dneBB; } } @@ -408,7 +404,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA); } @@ -439,10 +435,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue MBlazeTargetLowering:: LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; - EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); const Constant *C = N->getConstVal(); - SDValue Zero = DAG.getConstant(0, PtrVT); DebugLoc dl = Op.getDebugLoc(); SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), @@ -531,6 +525,7 @@ SDValue MBlazeTargetLowering:: LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -562,7 +557,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -590,7 +585,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // Create the frame index object for this incoming parameter LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, - LastArgStackLoc, true, false); + LastArgStackLoc, true); SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); @@ -623,7 +618,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // node so that legalize doesn't hack it. unsigned char OpFlag = MBlazeII::MO_NO_FLAG; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), @@ -779,7 +774,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, 0, true, false); + int FI = MFI->CreateFixedObject(ArgSize, 0, true); MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+ (FirstStackArgLoc + VA.getLocMemOffset()))); @@ -810,7 +805,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, unsigned LiveReg = MF.addLiveIn(Reg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); - int FI = MFI->CreateFixedObject(4, 0, true, false); + int FI = MFI->CreateFixedObject(4, 0, true); MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, @@ -841,6 +836,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, SDValue MBlazeTargetLowering:: LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location @@ -869,7 +865,7 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index 9f9ac89..5ec2563 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -109,6 +109,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -117,6 +118,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual MachineBasicBlock * diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 4c4d86b..6ff5825 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -110,15 +110,13 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { BuildMI(MBB, MI, DL, get(MBlaze::NOP)); } -bool MBlazeInstrInfo:: -copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { +void MBlazeInstrInfo:: +copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg) - .addReg(SrcReg).addReg(MBlaze::R0); - return true; + .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0); } void MBlazeInstrInfo:: @@ -141,54 +139,17 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(0).addFrameIndex(FI); } -MachineInstr *MBlazeInstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, int FI) const { - if (Ops.size() != 1) return NULL; - - MachineInstr *NewMI = NULL; - - switch (MI->getOpcode()) { - case MBlaze::OR: - case MBlaze::ADD: - if ((MI->getOperand(0).isReg()) && - (MI->getOperand(2).isReg()) && - (MI->getOperand(2).getReg() == MBlaze::R0) && - (MI->getOperand(1).isReg())) { - if (Ops[0] == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::SW)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::LW)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } - } - break; - } - - return NewMI; -} - //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// unsigned MBlazeInstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Can only insert uncond branches so far. assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); - BuildMI(&MBB, DebugLoc(), get(MBlaze::BRI)).addMBB(TBB); + BuildMI(&MBB, DL, get(MBlaze::BRI)).addMBB(TBB); return 1; } @@ -209,12 +170,8 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20, - MBlaze::CPURegsRegisterClass, - MBlaze::CPURegsRegisterClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global base register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalBaseReg).addReg(MBlaze::R20); RegInfo.addLiveIn(MBlaze::R20); MBlazeFI->setGlobalBaseReg(GlobalBaseReg); diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index c9fdc88..f074370 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -198,13 +198,12 @@ public: /// Branch Analysis virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -217,18 +216,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - /// Insert nop instruction when hazard condition is found virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index f15eea9..8cafa8c 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -148,22 +148,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -/// MBlaze Callee Saved Register Classes -const TargetRegisterClass* const* MBlazeRegisterInfo:: -getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRC[] = { - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - 0 - }; - - return CalleeSavedRC; -} - BitVector MBlazeRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index b618bf4..af97b0e 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -54,9 +54,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction* MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index 3de173c..8f97d25 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -808,7 +808,7 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) { std::string Name; switch (Inst->getIntrinsicID()) { case Intrinsic::vastart: - Name = getValueName(Inst->getOperand(1)); + Name = getValueName(Inst->getArgOperand(0)); Name.insert(Name.length()-1,"$valist"); // Obtain the argument handle. printSimpleInstruction("ldloca",Name.c_str()); @@ -817,20 +817,20 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) { "instance void [mscorlib]System.ArgIterator::.ctor" "(valuetype [mscorlib]System.RuntimeArgumentHandle)"); // Save as pointer type "void*" - printValueLoad(Inst->getOperand(1)); + printValueLoad(Inst->getArgOperand(0)); printSimpleInstruction("ldloca",Name.c_str()); printIndirectSave(PointerType::getUnqual( IntegerType::get(Inst->getContext(), 8))); break; case Intrinsic::vaend: // Close argument list handle. - printIndirectLoad(Inst->getOperand(1)); + printIndirectLoad(Inst->getArgOperand(0)); printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()"); break; case Intrinsic::vacopy: // Copy "ArgIterator" valuetype. - printIndirectLoad(Inst->getOperand(1)); - printIndirectLoad(Inst->getOperand(2)); + printIndirectLoad(Inst->getArgOperand(0)); + printIndirectLoad(Inst->getArgOperand(1)); printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator"); break; default: @@ -845,10 +845,11 @@ void MSILWriter::printCallInstruction(const Instruction* Inst) { // Handle intrinsic function. printIntrinsicCall(cast<IntrinsicInst>(Inst)); } else { + const CallInst *CI = cast<CallInst>(Inst); // Load arguments to stack and call function. - for (int I = 1, E = Inst->getNumOperands(); I!=E; ++I) - printValueLoad(Inst->getOperand(I)); - printFunctionCall(Inst->getOperand(0),Inst); + for (int I = 0, E = CI->getNumArgOperands(); I!=E; ++I) + printValueLoad(CI->getArgOperand(I)); + printFunctionCall(CI->getCalledFunction(), Inst); } } @@ -1002,8 +1003,8 @@ void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) { std::string Label = "leave$normal_"+utostr(getUniqID()); Out << ".try {\n"; // Load arguments - for (int I = 3, E = Inst->getNumOperands(); I!=E; ++I) - printValueLoad(Inst->getOperand(I)); + for (int I = 0, E = Inst->getNumArgOperands(); I!=E; ++I) + printValueLoad(Inst->getArgOperand(I)); // Print call instruction printFunctionCall(Inst->getOperand(0),Inst); // Save function result and leave "try" block @@ -1280,7 +1281,7 @@ void MSILWriter::printLocalVariables(const Function& F) { case Intrinsic::vaend: case Intrinsic::vacopy: isVaList = true; - VaList = Inst->getOperand(1); + VaList = Inst->getArgOperand(0); break; default: isVaList = false; diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 7b328bb..3395e9f 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -272,7 +272,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, AM.Base.Reg; if (AM.GV) - Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i16, AM.Disp, + Disp = CurDAG->getTargetGlobalAddress(AM.GV, Op->getDebugLoc(), + MVT::i16, AM.Disp, 0/*AM.SymbolFlags*/); else if (AM.CP) Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16, diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 403400e..a1703a3 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -278,6 +278,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -290,7 +291,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CallingConv::Fast: case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); case CallingConv::MSP430_INTR: report_fatal_error("ISRs cannot be called directly"); return SDValue(); @@ -369,7 +370,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, << "\n"; } // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false); + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -387,6 +388,7 @@ SDValue MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location @@ -421,7 +423,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // Guarantee that all emitted copies are stuck together, // avoiding something bad. @@ -447,6 +449,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -471,7 +474,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -529,7 +532,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i16); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16); @@ -642,7 +645,8 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); // Create the TargetGlobalAddress node, folding in the constant offset. - SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + SDValue Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + getPointerTy(), Offset); return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(), getPointerTy(), Result); } @@ -888,7 +892,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, - true, false); + true); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -1070,7 +1074,10 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, // Update machine-CFG edges by transferring all successors of the current // block to the block containing instructions after shift. - RemBB->transferSuccessors(BB); + RemBB->splice(RemBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + RemBB->transferSuccessorsAndUpdatePHIs(BB); // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB BB->addSuccessor(LoopBB); @@ -1116,11 +1123,11 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, // RemBB: // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] - BuildMI(RemBB, dl, TII.get(MSP430::PHI), DstReg) + BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg) .addReg(SrcReg).addMBB(BB) .addReg(ShiftReg2).addMBB(LoopBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return RemBB; } @@ -1158,18 +1165,22 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); - BuildMI(BB, dl, TII.get(MSP430::JCC)) - .addMBB(copy1MBB) - .addImm(MI->getOperand(3).getImm()); F->insert(I, copy0MBB); F->insert(I, copy1MBB); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. - copy1MBB->transferSuccessors(BB); + copy1MBB->splice(copy1MBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + copy1MBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(copy1MBB); + BuildMI(BB, dl, TII.get(MSP430::JCC)) + .addMBB(copy1MBB) + .addImm(MI->getOperand(3).getImm()); + // copy0MBB: // %FalseValue = ... // # fallthrough to copy1MBB @@ -1182,11 +1193,11 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = copy1MBB; - BuildMI(BB, dl, TII.get(MSP430::PHI), + BuildMI(*BB, BB->begin(), dl, TII.get(MSP430::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 01c5071..673c543 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -127,6 +127,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -155,6 +156,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -163,6 +165,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 18226ab..df28d07 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -83,27 +83,20 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, llvm_unreachable("Cannot store this register to stack slot!"); } -bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC == SrcRC) { - unsigned Opc; - if (DestRC == &MSP430::GR16RegClass) { - Opc = MSP430::MOV16rr; - } else if (DestRC == &MSP430::GR8RegClass) { - Opc = MSP430::MOV8rr; - } else { - return false; - } - - BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg); - return true; - } +void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (MSP430::GR16RegClass.contains(DestReg, SrcReg)) + Opc = MSP430::MOV16rr; + else if (MSP430::GR8RegClass.contains(DestReg, SrcReg)) + Opc = MSP430::MOV8rr; + else + llvm_unreachable("Impossible reg-to-reg copy"); - return false; + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } bool @@ -330,10 +323,8 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc operand - DebugLoc DL; - + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index 842b4cb..ebbda1a 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -49,11 +49,10 @@ public: /// virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; } - bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; bool isMoveInstr(const MachineInstr& MI, unsigned &SrcReg, unsigned &DstReg, @@ -93,7 +92,8 @@ public: unsigned RemoveBranch(MachineBasicBlock &MBB) const; unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; }; diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 6b9a2f2..8792b22 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -25,13 +25,16 @@ class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>; def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>; def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; -def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; +def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; def SDT_MSP430Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>]>; -def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, +def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; -def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisI8<2>]>; +def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, + SDTCisI8<2>]>; //===----------------------------------------------------------------------===// // MSP430 Specific Node Definitions. @@ -46,7 +49,7 @@ def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>; def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>; def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>; def MSP430callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart, [SDNPHasChain, SDNPOutFlag]>; @@ -55,8 +58,10 @@ def MSP430callseq_end : [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>; def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>; -def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>; -def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>; +def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, + [SDNPHasChain, SDNPInFlag]>; +def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, + [SDNPInFlag]>; def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>; def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>; def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>; @@ -117,14 +122,14 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), } let usesCustomInserter = 1 in { - def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cc), + def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc), "# Select8 PSEUDO", [(set GR8:$dst, - (MSP430selectcc GR8:$src1, GR8:$src2, imm:$cc))]>; - def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cc), + (MSP430selectcc GR8:$src, GR8:$src2, imm:$cc))]>; + def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR16:$src2, i8imm:$cc), "# Select16 PSEUDO", [(set GR16:$dst, - (MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>; + (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>; let Defs = [SRW] in { def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), "# Shl8 PSEUDO", @@ -330,60 +335,60 @@ def MOV16mm : I16mm<0x0, //===----------------------------------------------------------------------===// // Arithmetic Instructions -let isTwoAddress = 1 in { +let Constraints = "$src = $dst" in { let Defs = [SRW] in { let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y def ADD8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "add.b\t{$src2, $dst}", - [(set GR8:$dst, (add GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (add GR8:$src, GR8:$src2)), (implicit SRW)]>; def ADD16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "add.w\t{$src2, $dst}", - [(set GR16:$dst, (add GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (add GR16:$src, GR16:$src2)), (implicit SRW)]>; } def ADD8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "add.b\t{$src2, $dst}", - [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (add GR8:$src, (load addr:$src2))), (implicit SRW)]>; def ADD16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "add.w\t{$src2, $dst}", - [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (add GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "add.b\t{@$base+, $dst}", []>; def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "add.w\t{@$base+, $dst}", []>; } def ADD8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "add.b\t{$src2, $dst}", - [(set GR8:$dst, (add GR8:$src1, imm:$src2)), + [(set GR8:$dst, (add GR8:$src, imm:$src2)), (implicit SRW)]>; def ADD16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "add.w\t{$src2, $dst}", - [(set GR16:$dst, (add GR16:$src1, imm:$src2)), + [(set GR16:$dst, (add GR16:$src, imm:$src2)), (implicit SRW)]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def ADD8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "add.b\t{$src, $dst}", @@ -424,40 +429,40 @@ let Uses = [SRW] in { let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y def ADC8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "addc.b\t{$src2, $dst}", - [(set GR8:$dst, (adde GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (adde GR8:$src, GR8:$src2)), (implicit SRW)]>; def ADC16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "addc.w\t{$src2, $dst}", - [(set GR16:$dst, (adde GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (adde GR16:$src, GR16:$src2)), (implicit SRW)]>; } // isCommutable def ADC8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "addc.b\t{$src2, $dst}", - [(set GR8:$dst, (adde GR8:$src1, imm:$src2)), + [(set GR8:$dst, (adde GR8:$src, imm:$src2)), (implicit SRW)]>; def ADC16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "addc.w\t{$src2, $dst}", - [(set GR16:$dst, (adde GR16:$src1, imm:$src2)), + [(set GR16:$dst, (adde GR16:$src, imm:$src2)), (implicit SRW)]>; def ADC8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "addc.b\t{$src2, $dst}", - [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))), (implicit SRW)]>; def ADC16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "addc.w\t{$src2, $dst}", - [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))), (implicit SRW)]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def ADC8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "addc.b\t{$src, $dst}", @@ -498,52 +503,52 @@ def ADC16mm : I8mm<0x0, let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y def AND8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "and.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (and GR8:$src, GR8:$src2)), (implicit SRW)]>; def AND16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "and.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (and GR16:$src, GR16:$src2)), (implicit SRW)]>; } def AND8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "and.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, imm:$src2)), + [(set GR8:$dst, (and GR8:$src, imm:$src2)), (implicit SRW)]>; def AND16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "and.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, imm:$src2)), + [(set GR16:$dst, (and GR16:$src, imm:$src2)), (implicit SRW)]>; def AND8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "and.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (and GR8:$src, (load addr:$src2))), (implicit SRW)]>; def AND16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "and.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (and GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "and.b\t{@$base+, $dst}", []>; def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "and.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def AND8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "and.b\t{$src, $dst}", @@ -582,46 +587,46 @@ def AND16mm : I16mm<0x0, let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y def OR8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>; + [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>; def OR16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>; + [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>; } def OR8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>; + [(set GR8:$dst, (or GR8:$src, imm:$src2))]>; def OR16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>; + [(set GR16:$dst, (or GR16:$src, imm:$src2))]>; def OR8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>; + [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>; def OR16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>; + [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "bis.b\t{@$base+, $dst}", []>; def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "bis.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def OR8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "bis.b\t{$src, $dst}", @@ -654,24 +659,24 @@ def OR16mm : I16mm<0x0, // bic does not modify condition codes def BIC8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "bic.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, (not GR8:$src2)))]>; + [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>; def BIC16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "bic.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, (not GR16:$src2)))]>; + [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>; def BIC8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "bic.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, (not (i8 (load addr:$src2)))))]>; + [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>; def BIC16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "bic.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, (not (i16 (load addr:$src2)))))]>; + [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def BIC8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "bic.b\t{$src, $dst}", @@ -695,52 +700,52 @@ def BIC16mm : I16mm<0x0, let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y def XOR8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "xor.b\t{$src2, $dst}", - [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (xor GR8:$src, GR8:$src2)), (implicit SRW)]>; def XOR16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "xor.w\t{$src2, $dst}", - [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (xor GR16:$src, GR16:$src2)), (implicit SRW)]>; } def XOR8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "xor.b\t{$src2, $dst}", - [(set GR8:$dst, (xor GR8:$src1, imm:$src2)), + [(set GR8:$dst, (xor GR8:$src, imm:$src2)), (implicit SRW)]>; def XOR16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "xor.w\t{$src2, $dst}", - [(set GR16:$dst, (xor GR16:$src1, imm:$src2)), + [(set GR16:$dst, (xor GR16:$src, imm:$src2)), (implicit SRW)]>; def XOR8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "xor.b\t{$src2, $dst}", - [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))), (implicit SRW)]>; def XOR16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "xor.w\t{$src2, $dst}", - [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "xor.b\t{@$base+, $dst}", []>; def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "xor.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def XOR8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "xor.b\t{$src, $dst}", @@ -777,51 +782,51 @@ def XOR16mm : I16mm<0x0, def SUB8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "sub.b\t{$src2, $dst}", - [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (sub GR8:$src, GR8:$src2)), (implicit SRW)]>; def SUB16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "sub.w\t{$src2, $dst}", - [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (sub GR16:$src, GR16:$src2)), (implicit SRW)]>; def SUB8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "sub.b\t{$src2, $dst}", - [(set GR8:$dst, (sub GR8:$src1, imm:$src2)), + [(set GR8:$dst, (sub GR8:$src, imm:$src2)), (implicit SRW)]>; def SUB16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "sub.w\t{$src2, $dst}", - [(set GR16:$dst, (sub GR16:$src1, imm:$src2)), + [(set GR16:$dst, (sub GR16:$src, imm:$src2)), (implicit SRW)]>; def SUB8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "sub.b\t{$src2, $dst}", - [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))), (implicit SRW)]>; def SUB16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "sub.w\t{$src2, $dst}", - [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "sub.b\t{@$base+, $dst}", []>; def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "sub.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def SUB8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "sub.b\t{$src, $dst}", @@ -860,39 +865,39 @@ def SUB16mm : I16mm<0x0, let Uses = [SRW] in { def SBC8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "subc.b\t{$src2, $dst}", - [(set GR8:$dst, (sube GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (sube GR8:$src, GR8:$src2)), (implicit SRW)]>; def SBC16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "subc.w\t{$src2, $dst}", - [(set GR16:$dst, (sube GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (sube GR16:$src, GR16:$src2)), (implicit SRW)]>; def SBC8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "subc.b\t{$src2, $dst}", - [(set GR8:$dst, (sube GR8:$src1, imm:$src2)), + [(set GR8:$dst, (sube GR8:$src, imm:$src2)), (implicit SRW)]>; def SBC16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "subc.w\t{$src2, $dst}", - [(set GR16:$dst, (sube GR16:$src1, imm:$src2)), + [(set GR16:$dst, (sube GR16:$src, imm:$src2)), (implicit SRW)]>; def SBC8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "subc.b\t{$src2, $dst}", - [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))), (implicit SRW)]>; def SBC16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "subc.w\t{$src2, $dst}", - [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))), (implicit SRW)]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def SBC8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "subc.b\t{$src, $dst}", @@ -985,59 +990,59 @@ def SWPB16r : II16r<0x0, "swpb\t$dst", [(set GR16:$dst, (bswap GR16:$src))]>; -} // isTwoAddress = 1 +} // Constraints = "$src = $dst" // Integer comparisons let Defs = [SRW] in { def CMP8rr : I8rr<0x0, - (outs), (ins GR8:$src1, GR8:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp GR8:$src1, GR8:$src2), (implicit SRW)]>; + (outs), (ins GR8:$src, GR8:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>; def CMP16rr : I16rr<0x0, - (outs), (ins GR16:$src1, GR16:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp GR16:$src1, GR16:$src2), (implicit SRW)]>; + (outs), (ins GR16:$src, GR16:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>; def CMP8ri : I8ri<0x0, - (outs), (ins GR8:$src1, i8imm:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>; + (outs), (ins GR8:$src, i8imm:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>; def CMP16ri : I16ri<0x0, - (outs), (ins GR16:$src1, i16imm:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>; + (outs), (ins GR16:$src, i16imm:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>; def CMP8mi : I8mi<0x0, - (outs), (ins memsrc:$src1, i8imm:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), + (outs), (ins memsrc:$src, i8imm:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp (load addr:$src), (i8 imm:$src2)), (implicit SRW)]>; def CMP16mi : I16mi<0x0, - (outs), (ins memsrc:$src1, i16imm:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), + (outs), (ins memsrc:$src, i16imm:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp (load addr:$src), (i16 imm:$src2)), (implicit SRW)]>; def CMP8rm : I8rm<0x0, - (outs), (ins GR8:$src1, memsrc:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp GR8:$src1, (load addr:$src2)), + (outs), (ins GR8:$src, memsrc:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp GR8:$src, (load addr:$src2)), (implicit SRW)]>; def CMP16rm : I16rm<0x0, - (outs), (ins GR16:$src1, memsrc:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp GR16:$src1, (load addr:$src2)), + (outs), (ins GR16:$src, memsrc:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp GR16:$src, (load addr:$src2)), (implicit SRW)]>; def CMP8mr : I8mr<0x0, - (outs), (ins memsrc:$src1, GR8:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), GR8:$src2), + (outs), (ins memsrc:$src, GR8:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp (load addr:$src), GR8:$src2), (implicit SRW)]>; def CMP16mr : I16mr<0x0, - (outs), (ins memsrc:$src1, GR16:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), GR16:$src2), + (outs), (ins memsrc:$src, GR16:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp (load addr:$src), GR16:$src2), (implicit SRW)]>; @@ -1045,71 +1050,71 @@ def CMP16mr : I16mr<0x0, // Note that the C condition is set differently than when using CMP. let isCommutable = 1 in { def BIT8rr : I8rr<0x0, - (outs), (ins GR8:$src1, GR8:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su GR8:$src1, GR8:$src2), 0), + (outs), (ins GR8:$src, GR8:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0), (implicit SRW)]>; def BIT16rr : I16rr<0x0, - (outs), (ins GR16:$src1, GR16:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su GR16:$src1, GR16:$src2), 0), + (outs), (ins GR16:$src, GR16:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0), (implicit SRW)]>; } def BIT8ri : I8ri<0x0, - (outs), (ins GR8:$src1, i8imm:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su GR8:$src1, imm:$src2), 0), + (outs), (ins GR8:$src, i8imm:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su GR8:$src, imm:$src2), 0), (implicit SRW)]>; def BIT16ri : I16ri<0x0, - (outs), (ins GR16:$src1, i16imm:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su GR16:$src1, imm:$src2), 0), + (outs), (ins GR16:$src, i16imm:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su GR16:$src, imm:$src2), 0), (implicit SRW)]>; def BIT8rm : I8rm<0x0, - (outs), (ins GR8:$src1, memdst:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su GR8:$src1, (load addr:$src2)), 0), + (outs), (ins GR8:$src, memdst:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0), (implicit SRW)]>; def BIT16rm : I16rm<0x0, - (outs), (ins GR16:$src1, memdst:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su GR16:$src1, (load addr:$src2)), 0), + (outs), (ins GR16:$src, memdst:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0), (implicit SRW)]>; def BIT8mr : I8mr<0x0, - (outs), (ins memsrc:$src1, GR8:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), GR8:$src2), 0), + (outs), (ins memsrc:$src, GR8:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0), (implicit SRW)]>; def BIT16mr : I16mr<0x0, - (outs), (ins memsrc:$src1, GR16:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), GR16:$src2), 0), + (outs), (ins memsrc:$src, GR16:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0), (implicit SRW)]>; def BIT8mi : I8mi<0x0, - (outs), (ins memsrc:$src1, i8imm:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), (i8 imm:$src2)), 0), + (outs), (ins memsrc:$src, i8imm:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0), (implicit SRW)]>; def BIT16mi : I16mi<0x0, - (outs), (ins memsrc:$src1, i16imm:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), (i16 imm:$src2)), 0), + (outs), (ins memsrc:$src, i16imm:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0), (implicit SRW)]>; def BIT8mm : I8mm<0x0, - (outs), (ins memsrc:$src1, memsrc:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su (i8 (load addr:$src1)), + (outs), (ins memsrc:$src, memsrc:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su (i8 (load addr:$src)), (load addr:$src2)), 0), (implicit SRW)]>; def BIT16mm : I16mm<0x0, - (outs), (ins memsrc:$src1, memsrc:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su (i16 (load addr:$src1)), + (outs), (ins memsrc:$src, memsrc:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su (i16 (load addr:$src)), (load addr:$src2)), 0), (implicit SRW)]>; @@ -1134,12 +1139,12 @@ def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>; def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>; def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>; -def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)), - (ADD16ri GR16:$src1, tglobaladdr:$src2)>; -def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)), - (ADD16ri GR16:$src1, texternalsym:$src2)>; -def : Pat<(add GR16:$src1, (MSP430Wrapper tblockaddress:$src2)), - (ADD16ri GR16:$src1, tblockaddress:$src2)>; +def : Pat<(add GR16:$src, (MSP430Wrapper tglobaladdr :$src2)), + (ADD16ri GR16:$src, tglobaladdr:$src2)>; +def : Pat<(add GR16:$src, (MSP430Wrapper texternalsym:$src2)), + (ADD16ri GR16:$src, texternalsym:$src2)>; +def : Pat<(add GR16:$src, (MSP430Wrapper tblockaddress:$src2)), + (ADD16ri GR16:$src, tblockaddress:$src2)>; def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst), (MOV16mi addr:$dst, tglobaladdr:$src)>; @@ -1155,45 +1160,45 @@ def : Pat<(MSP430call (i16 texternalsym:$dst)), (CALLi texternalsym:$dst)>; // add and sub always produce carry -def : Pat<(addc GR16:$src1, GR16:$src2), - (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(addc GR16:$src1, (load addr:$src2)), - (ADD16rm GR16:$src1, addr:$src2)>; -def : Pat<(addc GR16:$src1, imm:$src2), - (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(addc GR16:$src, GR16:$src2), + (ADD16rr GR16:$src, GR16:$src2)>; +def : Pat<(addc GR16:$src, (load addr:$src2)), + (ADD16rm GR16:$src, addr:$src2)>; +def : Pat<(addc GR16:$src, imm:$src2), + (ADD16ri GR16:$src, imm:$src2)>; def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst), (ADD16mr addr:$dst, GR16:$src)>; def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst), (ADD16mm addr:$dst, addr:$src)>; -def : Pat<(addc GR8:$src1, GR8:$src2), - (ADD8rr GR8:$src1, GR8:$src2)>; -def : Pat<(addc GR8:$src1, (load addr:$src2)), - (ADD8rm GR8:$src1, addr:$src2)>; -def : Pat<(addc GR8:$src1, imm:$src2), - (ADD8ri GR8:$src1, imm:$src2)>; +def : Pat<(addc GR8:$src, GR8:$src2), + (ADD8rr GR8:$src, GR8:$src2)>; +def : Pat<(addc GR8:$src, (load addr:$src2)), + (ADD8rm GR8:$src, addr:$src2)>; +def : Pat<(addc GR8:$src, imm:$src2), + (ADD8ri GR8:$src, imm:$src2)>; def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst), (ADD8mr addr:$dst, GR8:$src)>; def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), (ADD8mm addr:$dst, addr:$src)>; -def : Pat<(subc GR16:$src1, GR16:$src2), - (SUB16rr GR16:$src1, GR16:$src2)>; -def : Pat<(subc GR16:$src1, (load addr:$src2)), - (SUB16rm GR16:$src1, addr:$src2)>; -def : Pat<(subc GR16:$src1, imm:$src2), - (SUB16ri GR16:$src1, imm:$src2)>; +def : Pat<(subc GR16:$src, GR16:$src2), + (SUB16rr GR16:$src, GR16:$src2)>; +def : Pat<(subc GR16:$src, (load addr:$src2)), + (SUB16rm GR16:$src, addr:$src2)>; +def : Pat<(subc GR16:$src, imm:$src2), + (SUB16ri GR16:$src, imm:$src2)>; def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst), (SUB16mr addr:$dst, GR16:$src)>; def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst), (SUB16mm addr:$dst, addr:$src)>; -def : Pat<(subc GR8:$src1, GR8:$src2), - (SUB8rr GR8:$src1, GR8:$src2)>; -def : Pat<(subc GR8:$src1, (load addr:$src2)), - (SUB8rm GR8:$src1, addr:$src2)>; -def : Pat<(subc GR8:$src1, imm:$src2), - (SUB8ri GR8:$src1, imm:$src2)>; +def : Pat<(subc GR8:$src, GR8:$src2), + (SUB8rr GR8:$src, GR8:$src2)>; +def : Pat<(subc GR8:$src, (load addr:$src2)), + (SUB8rm GR8:$src, addr:$src2)>; +def : Pat<(subc GR8:$src, imm:$src2), + (SUB8ri GR8:$src, imm:$src2)>; def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst), (SUB8mr addr:$dst, GR8:$src)>; def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), @@ -1201,6 +1206,6 @@ def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), // peephole patterns def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>; -def : Pat<(MSP430cmp (trunc (and_su GR16:$src1, GR16:$src2)), 0), - (BIT8rr (EXTRACT_SUBREG GR16:$src1, subreg_8bit), +def : Pat<(MSP430cmp (trunc (and_su GR16:$src, GR16:$src2)), 0), + (BIT8rr (EXTRACT_SUBREG GR16:$src, subreg_8bit), (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 0cae267..608ca49 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -71,48 +71,6 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } -const TargetRegisterClass *const * -MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - const Function* F = MF->getFunction(); - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesFP[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesIntr[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesIntrFP[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, 0 - }; - - if (hasFP(*MF)) - return (F->getCallingConv() == CallingConv::MSP430_INTR ? - CalleeSavedRegClassesIntrFP : CalleeSavedRegClassesFP); - else - return (F->getCallingConv() == CallingConv::MSP430_INTR ? - CalleeSavedRegClassesIntr : CalleeSavedRegClasses); -} - BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); @@ -270,8 +228,8 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const { // Create a frame entry for the FPW register that must be saved. if (hasFP(MF)) { - int ATTRIBUTE_UNUSED FrameIdx = - MF.getFrameInfo()->CreateFixedObject(2, -4, true, false); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); + (void)FrameIdx; assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && "Slot for FPW register must be last in order to be found!"); } diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index c8684df..6e58d31 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -36,9 +36,6 @@ public: /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const; diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 4ef017a..2037a91 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -180,7 +180,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, ManglerPrefixTy PrefixTy = Mangler::Default; if (GV->hasPrivateLinkage() || isImplicitlyPrivate) PrefixTy = Mangler::Private; - else if (GV->hasLinkerPrivateLinkage()) + else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage()) PrefixTy = Mangler::LinkerPrivate; // If this global has a name, handle it simply. diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 4d7fe4c..8ae05b7 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -133,8 +133,9 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { const MachineFrameInfo *MFI = MF->getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(CSI[i].getReg()); - if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg); + if (Mips::CPURegsRegisterClass->contains(Reg)) CPUBitmask |= (1 << RegNum); else FPUBitmask |= (1 << RegNum); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index e979c3f..b6ff2c3 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -284,6 +284,18 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); // Emit the right instruction according to the type of the operands compared if (isFPCmp) { @@ -296,20 +308,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg()) .addReg(Mips::ZERO).addMBB(sinkMBB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - sinkMBB->addSuccessor(*i); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -322,11 +320,12 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII->get(Mips::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } } @@ -490,21 +489,21 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, // %gp_rel relocation if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GPREL); SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); } // %hi/%lo relocation - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_ABS_HILO); SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } else { - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GOT); SDValue ResNode = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), GA, NULL, 0, @@ -768,6 +767,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -787,7 +787,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // the stack (even if less than 4 are used as arguments) if (Subtarget->isABI_O32()) { int VTsize = EVT(MVT::i32).getSizeInBits()/8; - MFI->CreateFixedObject(VTsize, (VTsize*3), true, false); + MFI->CreateFixedObject(VTsize, (VTsize*3), true); CCInfo.AnalyzeCallOperands(Outs, isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32); } else @@ -808,7 +808,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; CCValAssign &VA = ArgLocs[i]; // Promote the value if needed. @@ -857,7 +857,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // if O32 ABI is used. For EABI the first address is zero. LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, - LastArgStackLoc, true, false); + LastArgStackLoc, true); SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); @@ -889,7 +889,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // node so that legalize doesn't hack it. unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), @@ -929,7 +929,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the frame index only once. SPOffset here can be anything // (this will be fixed on processFunctionBeforeFrameFinalized) if (MipsFI->getGPStackOffset() == -1) { - FI = MFI->CreateFixedObject(4, 0, true, false); + FI = MFI->CreateFixedObject(4, 0, true); MipsFI->setGPFI(FI); } MipsFI->setGPStackOffset(LastArgStackLoc); @@ -1098,7 +1098,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, 0, true, false); + int FI = MFI->CreateFixedObject(ArgSize, 0, true); MipsFI->recordLoadArgsFI(FI, -(ArgSize+ (FirstStackArgLoc + VA.getLocMemOffset()))); @@ -1137,7 +1137,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); - int FI = MFI->CreateFixedObject(4, 0, true, false); + int FI = MFI->CreateFixedObject(4, 0, true); MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, @@ -1169,6 +1169,7 @@ SDValue MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of @@ -1198,7 +1199,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index f2de489..460747b 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -120,6 +120,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -128,6 +129,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual MachineBasicBlock * diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 4005e35..6c09a3e 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -127,61 +127,75 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const BuildMI(MBB, MI, DL, get(Mips::NOP)); } -bool MipsInstrInfo:: -copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { +void MipsInstrInfo:: +copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool DestCPU = Mips::CPURegsRegClass.contains(DestReg); + bool SrcCPU = Mips::CPURegsRegClass.contains(SrcReg); + + // CPU-CPU is the most common. + if (DestCPU && SrcCPU) { + BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - if (DestRC != SrcRC) { - - // Copy to/from FCR31 condition register - if ((DestRC == Mips::CPURegsRegisterClass) && - (SrcRC == Mips::CCRRegisterClass)) - BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg).addReg(SrcReg); - else if ((DestRC == Mips::CCRRegisterClass) && - (SrcRC == Mips::CPURegsRegisterClass)) - BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg).addReg(SrcReg); - - // Moves between coprocessors and cpu - else if ((DestRC == Mips::CPURegsRegisterClass) && - (SrcRC == Mips::FGR32RegisterClass)) - BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg).addReg(SrcReg); - else if ((DestRC == Mips::FGR32RegisterClass) && - (SrcRC == Mips::CPURegsRegisterClass)) - BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg); - - // Move from/to Hi/Lo registers - else if ((DestRC == Mips::HILORegisterClass) && - (SrcRC == Mips::CPURegsRegisterClass)) { - unsigned Opc = (DestReg == Mips::HI) ? Mips::MTHI : Mips::MTLO; - BuildMI(MBB, I, DL, get(Opc), DestReg); - } else if ((SrcRC == Mips::HILORegisterClass) && - (DestRC == Mips::CPURegsRegisterClass)) { - unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO; - BuildMI(MBB, I, DL, get(Opc), DestReg); - } else - // Can't copy this register - return false; + // Copy to CPU from other registers. + if (DestCPU) { + if (Mips::CCRRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (Mips::FGR32RegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (SrcReg == Mips::HI) + BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg); + else if (SrcReg == Mips::LO) + BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg); + else + llvm_unreachable("Copy to CPU from invalid register"); + return; + } - return true; + // Copy to other registers from CPU. + if (SrcCPU) { + if (Mips::CCRRegClass.contains(DestReg)) + BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (Mips::FGR32RegClass.contains(DestReg)) + BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (DestReg == Mips::HI) + BuildMI(MBB, I, DL, get(Mips::MTHI)) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (DestReg == Mips::LO) + BuildMI(MBB, I, DL, get(Mips::MTLO)) + .addReg(SrcReg, getKillRegState(KillSrc)); + else + llvm_unreachable("Copy from CPU to invalid register"); + return; } - if (DestRC == Mips::CPURegsRegisterClass) - BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO) - .addReg(SrcReg); - else if (DestRC == Mips::FGR32RegisterClass) - BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg).addReg(SrcReg); - else if (DestRC == Mips::AFGR64RegisterClass) - BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg).addReg(SrcReg); - else if (DestRC == Mips::CCRRegisterClass) - BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg).addReg(SrcReg); - else - // Can't copy this register - return false; + if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - return true; + if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + if (Mips::CCRRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + llvm_unreachable("Cannot copy registers"); } void MipsInstrInfo:: @@ -247,80 +261,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Register class not handled!"); } -MachineInstr *MipsInstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, int FI) const -{ - if (Ops.size() != 1) return NULL; - - MachineInstr *NewMI = NULL; - - switch (MI->getOpcode()) { - case Mips::ADDu: - if ((MI->getOperand(0).isReg()) && - (MI->getOperand(1).isReg()) && - (MI->getOperand(1).getReg() == Mips::ZERO) && - (MI->getOperand(2).isReg())) { - if (Ops[0] == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(2).getReg(); - bool isKill = MI->getOperand(2).isKill(); - bool isUndef = MI->getOperand(2).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::SW)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::LW)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } - } - break; - case Mips::FMOV_S32: - case Mips::FMOV_D32: - if ((MI->getOperand(0).isReg()) && - (MI->getOperand(1).isReg())) { - const TargetRegisterClass - *RC = RI.getRegClass(MI->getOperand(0).getReg()); - unsigned StoreOpc, LoadOpc; - bool IsMips1 = TM.getSubtarget<MipsSubtarget>().isMips1(); - - if (RC == Mips::FGR32RegisterClass) { - LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1; - } else { - assert(RC == Mips::AFGR64RegisterClass); - // Mips1 doesn't have ldc/sdc instructions. - if (IsMips1) break; - LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1; - } - - if (Ops[0] == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(2).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(StoreOpc)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI) ; - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(LoadOpc)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } - } - break; - } - - return NewMI; -} - //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// @@ -520,9 +460,8 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned MipsInstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) && @@ -531,18 +470,18 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (FBB == 0) { // One way branch. if (Cond.empty()) { // Unconditional branch? - BuildMI(&MBB, dl, get(Mips::J)).addMBB(TBB); + BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB); } else { // Conditional branch. unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm()); const TargetInstrDesc &TID = get(Opc); if (TID.getNumOperands() == 3) - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()) .addReg(Cond[2].getReg()) .addMBB(TBB); else - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()) .addMBB(TBB); } @@ -554,12 +493,12 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, const TargetInstrDesc &TID = get(Opc); if (TID.getNumOperands() == 3) - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg()) + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg()) .addMBB(TBB); else - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addMBB(TBB); - BuildMI(&MBB, dl, get(Mips::J)).addMBB(FBB); + BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB); return 2; } @@ -621,12 +560,8 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Mips::GP, - Mips::CPURegsRegisterClass, - Mips::CPURegsRegisterClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global base register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalBaseReg).addReg(Mips::GP); RegInfo.addLiveIn(Mips::GP); MipsFI->setGlobalBaseReg(GlobalBaseReg); diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 7919d9a..d6f87f9 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -204,13 +204,12 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -223,18 +222,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 2b9e941..5337c9f 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -541,7 +541,7 @@ let Predicates = [HasSwap] in { def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>; def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>; -let Predicates = [HasCondMov], isTwoAddress = 1 in { +let Predicates = [HasCondMov], Constraints = "$F = $dst" in { def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>; def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 5e719af..e15f0a5 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -116,34 +116,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const return BitMode32CalleeSavedRegs; } -/// Mips Callee Saved Register Classes -const TargetRegisterClass* const* -MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const -{ - static const TargetRegisterClass * const SingleFloatOnlyCalleeSavedRC[] = { - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0 - }; - - static const TargetRegisterClass * const BitMode32CalleeSavedRC[] = { - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0 - }; - - if (Subtarget.isSingleFloat()) - return SingleFloatOnlyCalleeSavedRC; - else - return BitMode32CalleeSavedRC; -} - BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { @@ -279,7 +251,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); for (unsigned i = 0, e = CSI.size(); i != e ; ++i) { - if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + if (!Mips::CPURegsRegisterClass->contains(Reg)) break; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopCPUSavedRegOff = StackOffset; @@ -311,7 +284,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const // Adjust FPU Callee Saved Registers Area. This Area must be // aligned to the default Stack Alignment requirements. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + if (Mips::CPURegsRegisterClass->contains(Reg)) continue; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopFPUSavedRegOff = StackOffset; @@ -528,4 +502,3 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const { } #include "MipsGenRegisterInfo.inc" - diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index bc857b8..b500a65 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -42,9 +42,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction* MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index f479f46..54a6a28 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -672,7 +672,8 @@ SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, // FIXME there isn't really debug info here DebugLoc dl = G->getDebugLoc(); - SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i8, + SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), N->getDebugLoc(), + MVT::i8, G->getOffset()); SDValue Offset = DAG.getConstant(0, MVT::i8); @@ -1120,6 +1121,7 @@ SDValue PIC16TargetLowering:: LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG) const { unsigned NumOps = Outs.size(); @@ -1136,7 +1138,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, unsigned RetVals = Ins.size(); for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) { // Get the arguments - Arg = Outs[i].Val; + Arg = OutVals[i]; Ops.clear(); Ops.push_back(Chain); @@ -1158,6 +1160,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue PIC16TargetLowering:: LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { unsigned NumOps = Outs.size(); std::string Name; @@ -1183,7 +1186,7 @@ LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); for (unsigned i=0, Offset = 0; i<NumOps; i++) { // Get the argument - Arg = Outs[i].Val; + Arg = OutVals[i]; StoreOffset = (Offset + AddressOffset); // Store the argument on frame @@ -1282,6 +1285,7 @@ SDValue PIC16TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // Number of values to return @@ -1298,7 +1302,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain, SDValue BS = DAG.getConstant(1, MVT::i8); SDValue RetVal; for(unsigned i=0;i<NumRet; ++i) { - RetVal = Outs[i].Val; + RetVal = OutVals[i]; Chain = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal, ES, BS, DAG.getConstant (i, MVT::i8)); @@ -1374,6 +1378,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -1428,7 +1433,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Considering the GlobalAddressNode case here. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, MVT::i8); + Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i8); Name = G->getGlobal()->getName(); } else {// Considering the ExternalSymbol case here ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Callee); @@ -1461,12 +1466,13 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue CallArgs; if (IsDirectCall) { CallArgs = LowerDirectCallArguments(ArgLabel, Chain, OperFlag, - Outs, dl, DAG); + Outs, OutVals, dl, DAG); Chain = getChain(CallArgs); OperFlag = getOutFlag(CallArgs); } else { CallArgs = LowerIndirectCallArguments(Chain, OperFlag, DataAddr_Lo, - DataAddr_Hi, Outs, Ins, dl, DAG); + DataAddr_Hi, Outs, OutVals, Ins, + dl, DAG); Chain = getChain(CallArgs); OperFlag = getOutFlag(CallArgs); } @@ -1791,14 +1797,14 @@ static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) { static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, unsigned &SPCC) { if (isa<ConstantSDNode>(RHS) && - cast<ConstantSDNode>(RHS)->getZExtValue() == 0 && + cast<ConstantSDNode>(RHS)->isNullValue() && CC == ISD::SETNE && (LHS.getOpcode() == PIC16ISD::SELECT_ICC && LHS.getOperand(3).getOpcode() == PIC16ISD::SUBCC) && isa<ConstantSDNode>(LHS.getOperand(0)) && isa<ConstantSDNode>(LHS.getOperand(1)) && - cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 && - cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) { + cast<ConstantSDNode>(LHS.getOperand(0))->isOne() && + cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) { SDValue CMPCC = LHS.getOperand(3); SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue(); LHS = CMPCC.getOperand(0); @@ -1928,15 +1934,12 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -1953,11 +1956,12 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII.get(PIC16::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII.get(PIC16::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h index eea17f8..0a7506c 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.h +++ b/lib/Target/PIC16/PIC16ISelLowering.h @@ -106,12 +106,14 @@ namespace llvm { SDValue LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG) const; @@ -143,6 +145,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -151,6 +154,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const; diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index 793dd9f..e784f74 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -151,25 +151,20 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, llvm_unreachable("Can't load this register from stack slot"); } -bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - if (DestRC == PIC16::FSR16RegisterClass) { - BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg); - return true; - } - - if (DestRC == PIC16::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(PIC16::copy_w), DestReg).addReg(SrcReg); - return true; - } +void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (PIC16::FSR16RegClass.contains(DestReg, SrcReg)) + Opc = PIC16::copy_fsr; + else if (PIC16::GPRRegClass.contains(DestReg, SrcReg)) + Opc = PIC16::copy_w; + else + llvm_unreachable("Impossible reg-to-reg copy"); - // Not yet supported. - return false; + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI, @@ -196,15 +191,15 @@ bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI, unsigned PIC16InstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); if (FBB == 0) { // One way branch. if (Cond.empty()) { // Unconditional branch? - DebugLoc dl; - BuildMI(&MBB, dl, get(PIC16::br_uncond)).addMBB(TBB); + BuildMI(&MBB, DL, get(PIC16::br_uncond)).addMBB(TBB); } return 1; } diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h index 40a4cb4..a3a77f1 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.h +++ b/lib/Target/PIC16/PIC16InstrInfo.h @@ -57,12 +57,10 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SrcSubIdx, unsigned &DstSubIdx) const; @@ -70,7 +68,8 @@ public: virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td index 24df251..86d36cb 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.td +++ b/lib/Target/PIC16/PIC16InstrInfo.td @@ -134,7 +134,7 @@ include "PIC16InstrFormats.td" //===----------------------------------------------------------------------===// // W = W Op F : Load the value from F and do Op to W. -let isTwoAddress = 1, mayLoad = 1 in +let Constraints = "$src = $dst", mayLoad = 1 in class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>: ByteFormat<OpCode, (outs GPR:$dst), (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), @@ -146,7 +146,7 @@ class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>: // F = F Op W : Load the value from F, do op with W and store in F. // This insn class is not marked as TwoAddress because the reg is // being used as a source operand only. (Remember a TwoAddress insn -// needs a copyRegToReg.) +// needs a copy.) let mayStore = 1 in class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>: ByteFormat<OpCode, (outs), @@ -160,7 +160,7 @@ class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>: )]>; // W = W Op L : Do Op of L with W and place result in W. -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in class BinOpWL<bits<6> opcode, string OpcStr, SDNode OpNode> : LiteralFormat<opcode, (outs GPR:$dst), (ins GPR:$src, i8imm:$literal), @@ -220,7 +220,7 @@ def set_fsrlo: "movwf ${fsr}L", []>; -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def set_fsrhi: ByteFormat<0, (outs FSR16:$dst), (ins FSR16:$src, GPR:$val), @@ -234,8 +234,8 @@ def set_pclath: [(set PCLATHR:$dst , (MTPCLATH GPR:$val))]>; //---------------------------- -// copyRegToReg -// copyRegToReg insns. These are dummy. They should always be deleted +// copyPhysReg +// copyPhysReg insns. These are dummy. They should always be deleted // by the optimizer and never be present in the final generated code. // if they are, then we have to write correct macros for these insns. //---------------------------- @@ -362,7 +362,7 @@ def addwfc: BinOpWF<0, "addwfc", adde>; // With Carry. } // W -= [F] ; load from F and sub the value from W. -let isTwoAddress = 1, mayLoad = 1 in +let Constraints = "$src = $dst", mayLoad = 1 in class SUBFW<bits<6> OpCode, string OpcStr, SDNode OpNode>: ByteFormat<OpCode, (outs GPR:$dst), (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), @@ -418,7 +418,7 @@ def orlw : BinOpWL<0, "iorlw", or>; // sublw // W = C - W ; sub W from literal. (Without borrow). -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in class SUBLW<bits<6> opcode, string OpcStr, SDNode OpNode> : LiteralFormat<opcode, (outs GPR:$dst), (ins GPR:$src, i8imm:$literal), @@ -426,7 +426,7 @@ class SUBLW<bits<6> opcode, string OpcStr, SDNode OpNode> : [(set GPR:$dst, (OpNode (i8 imm:$literal), GPR:$src))]>; // subwl // W = W - C ; sub literal from W (Without borrow). -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in class SUBWL<bits<6> opcode, string OpcStr, SDNode OpNode> : LiteralFormat<opcode, (outs GPR:$dst), (ins GPR:$src, i8imm:$literal), diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp index ab81ed1..241170b 100644 --- a/lib/Target/PIC16/PIC16MemSelOpt.cpp +++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp @@ -117,7 +117,7 @@ bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { DebugLoc dl = I->getDebugLoc(); BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$"); Changed = true; - PageChanged = 0; + PageChanged = 0; } } } diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp index c282521..27f1cf5 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp @@ -150,8 +150,8 @@ void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) { // For PIC16, automatic variables of a function are emitted as globals. -// Clone the auto variables of a function and put them in ValueMap, -// this ValueMap will be used while +// Clone the auto variables of a function and put them in VMap, +// this VMap will be used while // Cloning the code of function itself. // void PIC16Cloner::CloneAutos(Function *F) { @@ -160,11 +160,11 @@ void PIC16Cloner::CloneAutos(Function *F) { Module *M = F->getParent(); Module::GlobalListType &Globals = M->getGlobalList(); - // Clear the leftovers in ValueMap by any previous cloning. - ValueMap.clear(); + // Clear the leftovers in VMap by any previous cloning. + VMap.clear(); // Find the auto globls for this function and clone them, and put them - // in ValueMap. + // in VMap. std::string FnName = F->getName().str(); std::string VarName, ClonedVarName; for (Module::global_iterator I = M->global_begin(), E = M->global_end(); @@ -182,8 +182,8 @@ void PIC16Cloner::CloneAutos(Function *F) { // Add these new globals to module's globals list. Globals.push_back(ClonedGV); - // Update ValueMap. - ValueMap[GV] = ClonedGV; + // Update VMap. + VMap[GV] = ClonedGV; } } } @@ -236,10 +236,10 @@ void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) { } // Clone the given function and return it. -// Note: it uses the ValueMap member of the class, which is already populated +// Note: it uses the VMap member of the class, which is already populated // by cloneAutos by the time we reach here. -// FIXME: Should we just pass ValueMap's ref as a parameter here? rather -// than keeping the ValueMap as a member. +// FIXME: Should we just pass VMap's ref as a parameter here? rather +// than keeping the VMap as a member. Function * PIC16Cloner::cloneFunction(Function *OrgF) { Function *ClonedF; @@ -252,11 +252,11 @@ PIC16Cloner::cloneFunction(Function *OrgF) { } // Clone does not exist. - // First clone the autos, and populate ValueMap. + // First clone the autos, and populate VMap. CloneAutos(OrgF); // Now create the clone. - ClonedF = CloneFunction(OrgF, ValueMap); + ClonedF = CloneFunction(OrgF, VMap); // The new function should be for interrupt line. Therefore should have // the name suffixed with IL and section attribute marked with IL. diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h index 24c1152..e8b5aa4 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h @@ -15,7 +15,7 @@ #ifndef PIC16CLONER_H #define PIC16CLONER_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ValueMap.h" using namespace llvm; using std::vector; @@ -72,7 +72,7 @@ namespace llvm { // the corresponding cloned auto variable of the cloned function. // This value map is passed during the function cloning so that all the // uses of auto variables be updated properly. - DenseMap<const Value*, Value*> ValueMap; + ValueMap<const Value*, Value*> VMap; // Map of a already cloned functions. map<Function *, Function *> ClonedFunctionMap; diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp index 30a1d4a..dff98d1 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.cpp +++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp @@ -35,13 +35,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -// PIC16 Callee Saved Reg Classes -const TargetRegisterClass* const* -PIC16RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; - return CalleeSavedRegClasses; -} - BitVector PIC16RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); return Reserved; diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h index 6a9a038..5536a61 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.h +++ b/lib/Target/PIC16/PIC16RegisterInfo.h @@ -41,10 +41,6 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo { virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF = 0) const; - // PIC16 callee saved register classes - virtual const TargetRegisterClass* const * - getCalleeSavedRegClasses(const MachineFunction *MF) const; - virtual BitVector getReservedRegs(const MachineFunction &MF) const; virtual bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 66dfd4b..db11fde 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -78,7 +78,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, isLoad = TID.mayLoad(); isStore = TID.mayStore(); - unsigned TSFlags = TID.TSFlags; + uint64_t TSFlags = TID.TSFlags; isFirst = TSFlags & PPCII::PPC970_First; isSingle = TSFlags & PPCII::PPC970_Single; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 10b516a..d47d989 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1203,11 +1203,11 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); - const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); - SDValue Zero = DAG.getConstant(0, PtrVT); // FIXME there isn't really any debug info here DebugLoc dl = GSDN->getDebugLoc(); + const GlobalValue *GV = GSDN->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GSDN->getOffset()); + SDValue Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); @@ -1631,7 +1631,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), - isImmutable, false); + isImmutable); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); @@ -1700,8 +1700,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( FuncInfo->setVarArgsStackOffset( MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - CCInfo.getNextStackOffset(), - true, false)); + CCInfo.getNextStackOffset(), true)); FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); @@ -1911,7 +1910,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CurArgOffset = CurArgOffset + (4 - ObjSize); } // The value of the object is its address. - int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true, false); + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { @@ -1936,7 +1935,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // the object. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); - int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false); + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, @@ -2062,7 +2061,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( if (needsLoad) { int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset + (ArgSize - ObjSize), - isImmutable, false); + isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); @@ -2097,7 +2096,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - Depth, true, false)); + Depth, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs @@ -2137,6 +2136,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, unsigned CC, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, unsigned &nAltivecParamsAtEnd) { // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is @@ -2153,9 +2153,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // 16-byte aligned. nAltivecParamsAtEnd = 0; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; - EVT ArgVT = Arg.getValueType(); + EVT ArgVT = Outs[i].VT; // Varargs Altivec parameters are padded to a 16 byte boundary. if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { @@ -2314,8 +2314,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, - NewRetAddrLoc, - true, false); + NewRetAddrLoc, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, @@ -2328,7 +2327,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, - true, false); + true); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, PseudoSourceValue::getFixedStack(NewFPIdx), 0, @@ -2346,7 +2345,7 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; - int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true,false); + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue FIN = DAG.getFrameIndex(FI, VT); TailCallArgumentInfo Info; @@ -2472,7 +2471,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + Callee.getValueType()); else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) @@ -2705,6 +2705,7 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -2714,11 +2715,11 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, Ins, + isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); } else { return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, Ins, + isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); } } @@ -2728,6 +2729,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -2737,7 +2739,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) && "Unknown calling convention!"); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); unsigned PtrByteSize = 4; MachineFunction &MF = DAG.getMachineFunction(); @@ -2769,7 +2770,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { - EVT ArgVT = Outs[i].Val.getValueType(); + EVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; bool Result; @@ -2838,7 +2839,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (Flags.isByVal()) { @@ -2934,6 +2935,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -2961,7 +2963,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // prereserved space for [SP][CR][LR][3 x unused]. unsigned NumBytes = CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, - Outs, + Outs, OutVals, nAltivecParamsAtEnd); // Calculate by how many bytes the stack has to be adjusted in case of tail @@ -3025,7 +3027,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, SmallVector<SDValue, 8> MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // PtrOff will be used to store the current argument to the stack if a @@ -3051,7 +3053,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Everything else is passed left-justified. EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, dl, Chain, Arg, NULL, 0, VT, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -3228,8 +3230,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, ArgOffset = ((ArgOffset+15)/16)*16; ArgOffset += 12*16; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; - EVT ArgType = Arg.getValueType(); + SDValue Arg = OutVals[i]; + EVT ArgType = Outs[i].VT; if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { if (++j > NumVRs) { @@ -3297,6 +3299,7 @@ SDValue PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; @@ -3318,7 +3321,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); Flag = Chain.getValue(1); } @@ -3376,8 +3379,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { // Find out what the fix offset of the frame pointer save area. int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, - true, false); + RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); // Save the result. FI->setReturnAddrSaveIndex(RASI); } @@ -3403,8 +3405,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, - true, false); + FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); // Save the result. FI->setFramePointerSaveIndex(FPSI); } @@ -4518,7 +4519,10 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned TmpReg = (!BinOpcode) ? incr : @@ -4583,7 +4587,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = @@ -4716,23 +4723,22 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); unsigned SelectPred = MI->getOperand(4).getImm(); DebugLoc dl = MI->getDebugLoc(); - BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -4745,7 +4751,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII->get(PPC::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); } @@ -4831,7 +4838,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... @@ -4899,7 +4909,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = @@ -5025,7 +5038,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, llvm_unreachable("Unexpected instr type to insert"); } - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -5042,19 +5055,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, default: break; case PPCISD::SHL: if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { - if (C->getZExtValue() == 0) // 0 << V -> 0. + if (C->isNullValue()) // 0 << V -> 0. return N->getOperand(0); } break; case PPCISD::SRL: if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { - if (C->getZExtValue() == 0) // 0 >>u V -> 0. + if (C->isNullValue()) // 0 >>u V -> 0. return N->getOperand(0); } break; case PPCISD::SRA: if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { - if (C->getZExtValue() == 0 || // 0 >>s V -> 0. + if (C->isNullValue() || // 0 >>s V -> 0. C->isAllOnesValue()) // -1 >>s V -> -1. return N->getOperand(0); } @@ -5380,11 +5393,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops -/// vector. If it is invalid, don't add anything to Ops. If hasMemory is true -/// it means one of the asm constraint of the inline asm instruction being -/// processed is 'm'. +/// vector. If it is invalid, don't add anything to Ops. void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter, - bool hasMemory, std::vector<SDValue>&Ops, SelectionDAG &DAG) const { SDValue Result(0,0); @@ -5443,7 +5453,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter, } // Handle standard constraint letters. - TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG); + TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG); } // isLegalAddressingMode - Return true if the addressing mode represented diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 6dcaf1e..700816f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -318,12 +318,9 @@ namespace llvm { unsigned getByValTypeAlignment(const Type *Ty) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops - /// vector. If it is invalid, don't add anything to Ops. If hasMemory is - /// true it means one of the asm constraint of the inline asm instruction - /// being processed is 'm'. + /// vector. If it is invalid, don't add anything to Ops. virtual void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; @@ -438,6 +435,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -446,6 +444,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue @@ -465,6 +464,7 @@ namespace llvm { LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -472,6 +472,7 @@ namespace llvm { LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 1b7a778..1574aa3 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -316,9 +316,8 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -327,50 +326,46 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // One-way branch. if (FBB == 0) { if (Cond.empty()) // Unconditional branch - BuildMI(&MBB, dl, get(PPC::B)).addMBB(TBB); + BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB); else // Conditional branch - BuildMI(&MBB, dl, get(PPC::BCC)) + BuildMI(&MBB, DL, get(PPC::BCC)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); return 1; } // Two-way Conditional Branch. - BuildMI(&MBB, dl, get(PPC::BCC)) + BuildMI(&MBB, DL, get(PPC::BCC)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); - BuildMI(&MBB, dl, get(PPC::B)).addMBB(FBB); + BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); return 2; } -bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC != SrcRC) { - // Not yet supported! - return false; - } - - if (DestRC == PPC::GPRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg); - } else if (DestRC == PPC::G8RCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg); - } else if (DestRC == PPC::F4RCRegisterClass || - DestRC == PPC::F8RCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::FMR), DestReg).addReg(SrcReg); - } else if (DestRC == PPC::CRRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg); - } else if (DestRC == PPC::VRRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::VOR), DestReg).addReg(SrcReg).addReg(SrcReg); - } else if (DestRC == PPC::CRBITRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::CROR), DestReg).addReg(SrcReg).addReg(SrcReg); - } else { - // Attempt to copy register that is not GPR or FPR - return false; - } - - return true; +void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::OR; + else if (PPC::G8RCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::OR8; + else if (PPC::F4RCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::FMR; + else if (PPC::CRRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::MCRF; + else if (PPC::VRRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::VOR; + else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::CROR; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + const TargetInstrDesc &TID = get(Opc); + if (TID.getNumOperands() == 3) + BuildMI(MBB, I, DL, TID, DestReg) + .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); + else + BuildMI(MBB, I, DL, TID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); } bool @@ -654,121 +649,6 @@ PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, return &*MIB; } -/// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into -/// copy instructions, turning them into load/store instructions. -MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { - if (Ops.size() != 1) return NULL; - - // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because - // it takes more than one instruction to store it. - unsigned Opc = MI->getOpcode(); - unsigned OpNum = Ops[0]; - - MachineInstr *NewMI = NULL; - if ((Opc == PPC::OR && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW)) - .addReg(InReg, - getKillRegState(isKill) | - getUndefRegState(isUndef)), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ)) - .addReg(OutReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)), - FrameIndex); - } - } else if ((Opc == PPC::OR8 && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD)) - .addReg(InReg, - getKillRegState(isKill) | - getUndefRegState(isUndef)), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD)) - .addReg(OutReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)), - FrameIndex); - } - } else if (Opc == PPC::FMR || Opc == PPC::FMRSD) { - // The register may be F4RC or F8RC, and that determines the memory op. - unsigned OrigReg = MI->getOperand(OpNum).getReg(); - // We cannot tell the register class from a physreg alone. - if (TargetRegisterInfo::isPhysicalRegister(OrigReg)) - return NULL; - const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigReg); - const bool is64 = RC == PPC::F8RCRegisterClass; - - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), - get(is64 ? PPC::STFD : PPC::STFS)) - .addReg(InReg, - getKillRegState(isKill) | - getUndefRegState(isUndef)), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), - get(is64 ? PPC::LFD : PPC::LFS)) - .addReg(OutReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)), - FrameIndex); - } - } - - return NewMI; -} - -bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { - if (Ops.size() != 1) return false; - - // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because - // it takes more than one instruction to store it. - unsigned Opc = MI->getOpcode(); - - if ((Opc == PPC::OR && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) - return true; - else if ((Opc == PPC::OR8 && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) - return true; - else if (Opc == PPC::FMR || Opc == PPC::FMRSD) - return true; - - return false; -} - - bool PPCInstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 7a9e11b..eadb21e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -109,13 +109,12 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -135,23 +134,6 @@ public: const MDNode *MDPtr, DebugLoc DL) const; - /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into - /// copy instructions, turning them into load/store instructions. - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - - virtual bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const; - virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 0ff852c..4d6132a9 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -269,140 +269,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs; } -const TargetRegisterClass* const* -PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - // 32-bit Darwin calling convention. - static const TargetRegisterClass * const Darwin32_CalleeSavedRegClasses[] = { - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - &PPC::GPRCRegClass, 0 - }; - - // 32-bit SVR4 calling convention. - static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = { - &PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRSAVERCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - 0 - }; - - // 64-bit Darwin calling convention. - static const TargetRegisterClass * const Darwin64_CalleeSavedRegClasses[] = { - &PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - &PPC::G8RCRegClass, 0 - }; - - // 64-bit SVR4 calling convention. - static const TargetRegisterClass * const SVR4_64_CalleeSavedRegClasses[] = { - &PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRSAVERCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - 0 - }; - - if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses : - Darwin32_CalleeSavedRegClasses; - - return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegClasses - : SVR4_CalleeSavedRegClasses; -} - // needsFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. @@ -1060,8 +926,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, - true, false); + FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); // Save the result. FI->setFramePointerSaveIndex(FPSI); } @@ -1069,8 +934,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { - MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, - true, false); + MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } // Reserve a slot closest to SP or frame pointer if we have a dynalloc or @@ -1127,9 +991,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = CSI[i].getRegClass(); - - if (RC == PPC::GPRCRegisterClass) { + if (PPC::GPRCRegisterClass->contains(Reg)) { HasGPSaveArea = true; GPRegs.push_back(CSI[i]); @@ -1137,7 +999,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinGPR) { MinGPR = Reg; } - } else if (RC == PPC::G8RCRegisterClass) { + } else if (PPC::G8RCRegisterClass->contains(Reg)) { HasG8SaveArea = true; G8Regs.push_back(CSI[i]); @@ -1145,7 +1007,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinG8R) { MinG8R = Reg; } - } else if (RC == PPC::F8RCRegisterClass) { + } else if (PPC::F8RCRegisterClass->contains(Reg)) { HasFPSaveArea = true; FPRegs.push_back(CSI[i]); @@ -1154,12 +1016,12 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) MinFPR = Reg; } // FIXME SVR4: Disable CR save area for now. - } else if ( RC == PPC::CRBITRCRegisterClass - || RC == PPC::CRRCRegisterClass) { + } else if (PPC::CRBITRCRegisterClass->contains(Reg) + || PPC::CRRCRegisterClass->contains(Reg)) { // HasCRSaveArea = true; - } else if (RC == PPC::VRSAVERCRegisterClass) { + } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) { HasVRSAVESaveArea = true; - } else if (RC == PPC::VRRCRegisterClass) { + } else if (PPC::VRRCRegisterClass->contains(Reg)) { HasVRSaveArea = true; VRegs.push_back(CSI[i]); @@ -1240,9 +1102,10 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) // which have the CR/CRBIT register class? // Adjust the frame index of the CR spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - const TargetRegisterClass *RC = CSI[i].getRegClass(); + unsigned Reg = CSI[i].getReg(); - if (RC == PPC::CRBITRCRegisterClass || RC == PPC::CRRCRegisterClass) { + if (PPC::CRBITRCRegisterClass->contains(Reg) || + PPC::CRRCRegisterClass->contains(Reg)) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -1257,9 +1120,9 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) // which have the VRSAVE register class? // Adjust the frame index of the VRSAVE spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - const TargetRegisterClass *RC = CSI[i].getRegClass(); + unsigned Reg = CSI[i].getReg(); - if (RC == PPC::VRSAVERCRegisterClass) { + if (PPC::VRSAVERCRegisterClass->contains(Reg)) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -1762,4 +1625,3 @@ int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { } #include "PPCGenRegisterInfo.inc" - diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 43cf535..f026847 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -42,9 +42,6 @@ public: /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; /// targetHandlesStackFrameRounding - Returns true if the target is diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 7fa73ed..4d7ee08 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -300,6 +300,14 @@ unsigned long reverse(unsigned v) { return v ^ (t >> 8); } +Neither is this (very standard idiom): + +int f(int n) +{ + return (((n) << 24) | (((n) & 0xff00) << 8) + | (((n) >> 8) & 0xff00) | ((n) >> 24)); +} + //===---------------------------------------------------------------------===// [LOOP RECOGNITION] @@ -898,17 +906,6 @@ The expression should optimize to something like //===---------------------------------------------------------------------===// -From GCC Bug 3756: -int -pn (int n) -{ - return (n >= 0 ? 1 : -1); -} -Should combine to (n >> 31) | 1. Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts | llc". - -//===---------------------------------------------------------------------===// - void a(int variable) { if (variable == 4 || variable == 6) @@ -1439,33 +1436,6 @@ This pattern repeats several times, basically doing: //===---------------------------------------------------------------------===// -186.crafty contains this interesting pattern: - -%77 = call i8* @strstr(i8* getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0), - i8* %30) -%phitmp648 = icmp eq i8* %77, getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0) -br i1 %phitmp648, label %bb70, label %bb76 - -bb70: ; preds = %OptionMatch.exit91, %bb69 - %78 = call i32 @strlen(i8* %30) nounwind readonly align 1 ; <i32> [#uses=1] - -This is basically: - cststr = "abcdef"; - if (strstr(cststr, P) == cststr) { - x = strlen(P); - ... - -The strstr call would be significantly cheaper written as: - -cststr = "abcdef"; -if (memcmp(P, str, strlen(P))) - x = strlen(P); - -This is memcmp+strlen instead of strstr. This also makes the strlen fully -redundant. - -//===---------------------------------------------------------------------===// - 186.crafty also contains this code: %1906 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0)) @@ -1863,3 +1833,91 @@ LLVM prefers comparisons with zero over non-zero in general, but in this case it choses instead to keep the max operation obvious. //===---------------------------------------------------------------------===// + +Take the following testcase on x86-64 (similar testcases exist for all targets +with addc/adde): + +define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, +i64 %c) nounwind { +entry: + %0 = zext i64 %a to i128 ; <i128> [#uses=1] + %1 = zext i64 %b to i128 ; <i128> [#uses=1] + %2 = add i128 %1, %0 ; <i128> [#uses=2] + %3 = zext i64 %c to i128 ; <i128> [#uses=1] + %4 = shl i128 %3, 64 ; <i128> [#uses=1] + %5 = add i128 %4, %2 ; <i128> [#uses=1] + %6 = lshr i128 %5, 64 ; <i128> [#uses=1] + %7 = trunc i128 %6 to i64 ; <i64> [#uses=1] + store i64 %7, i64* %s, align 8 + %8 = trunc i128 %2 to i64 ; <i64> [#uses=1] + store i64 %8, i64* %t, align 8 + ret void +} + +Generated code: + addq %rcx, %rdx + movl $0, %eax + adcq $0, %rax + addq %r8, %rax + movq %rax, (%rdi) + movq %rdx, (%rsi) + ret + +Expected code: + addq %rcx, %rdx + adcq $0, %r8 + movq %r8, (%rdi) + movq %rdx, (%rsi) + ret + +The generated SelectionDAG has an ADD of an ADDE, where both operands of the +ADDE are zero. Replacing one of the operands of the ADDE with the other operand +of the ADD, and replacing the ADD with the ADDE, should give the desired result. + +(That said, we are doing a lot better than gcc on this testcase. :) ) + +//===---------------------------------------------------------------------===// + +Switch lowering generates less than ideal code for the following switch: +define void @a(i32 %x) nounwind { +entry: + switch i32 %x, label %if.end [ + i32 0, label %if.then + i32 1, label %if.then + i32 2, label %if.then + i32 3, label %if.then + i32 5, label %if.then + ] +if.then: + tail call void @foo() nounwind + ret void +if.end: + ret void +} +declare void @foo() + +Generated code on x86-64 (other platforms give similar results): +a: + cmpl $5, %edi + ja .LBB0_2 + movl %edi, %eax + movl $47, %ecx + btq %rax, %rcx + jb .LBB0_3 +.LBB0_2: + ret +.LBB0_3: + jmp foo # TAILCALL + +The movl+movl+btq+jb could be simplified to a cmpl+jne. + +Or, if we wanted to be really clever, we could simplify the whole thing to +something like the following, which eliminates a branch: + xorl $1, %edi + cmpl $4, %edi + ja .LBB0_2 + ret +.LBB0_2: + jmp foo # TAILCALL + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index f47e53a..4099a62 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -38,6 +38,7 @@ SDValue SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. @@ -66,7 +67,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); @@ -133,7 +134,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(Arg); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load; if (ObjectVT == MVT::i32) { @@ -146,7 +147,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8); FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, DAG.getConstant(Offset, MVT::i32)); - Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr, + Load = DAG.getExtLoad(LoadOp, MVT::i32, dl, Chain, FIPtr, NULL, 0, ObjectVT, false, false, 0); Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load); } @@ -169,7 +170,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(Arg); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0, false, false, 0); @@ -192,7 +193,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, false, false, 0); @@ -205,7 +206,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, false, false, 0); @@ -239,7 +240,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32); int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0, @@ -262,6 +263,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -283,7 +285,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Count the size of the outgoing arguments. unsigned ArgsSize = 0; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - switch (Outs[i].Val.getValueType().getSimpleVT().SimpleTy) { + switch (Outs[i].VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown value type!"); case MVT::i1: case MVT::i8: @@ -316,7 +318,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -358,8 +360,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, unsigned ArgOffset = 68; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - SDValue Val = Outs[i].Val; - EVT ObjectVT = Val.getValueType(); + SDValue Val = OutVals[i]; + EVT ObjectVT = Outs[i].VT; SDValue ValToStore(0, 0); unsigned ObjSize; switch (ObjectVT.getSimpleVT().SimpleTy) { @@ -478,7 +480,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); @@ -737,7 +739,7 @@ void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, unsigned &SPCC) { if (isa<ConstantSDNode>(RHS) && - cast<ConstantSDNode>(RHS)->getZExtValue() == 0 && + cast<ConstantSDNode>(RHS)->isNullValue() && CC == ISD::SETNE && ((LHS.getOpcode() == SPISD::SELECT_ICC && LHS.getOperand(3).getOpcode() == SPISD::CMPICC) || @@ -745,8 +747,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) && isa<ConstantSDNode>(LHS.getOperand(0)) && isa<ConstantSDNode>(LHS.getOperand(1)) && - cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 && - cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) { + cast<ConstantSDNode>(LHS.getOperand(0))->isOne() && + cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) { SDValue CMPCC = LHS.getOperand(3); SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue(); LHS = CMPCC.getOperand(0); @@ -759,7 +761,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); // FIXME there isn't really any debug info here DebugLoc dl = Op.getDebugLoc(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA); SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA); @@ -1007,21 +1009,20 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); // copy0MBB: // %FalseValue = ... @@ -1035,11 +1036,11 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII.get(SP::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, TII.get(SP::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 5ebdcac..db39e08 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -86,6 +86,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -94,6 +95,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 8e49eca..3a4c80a 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -109,38 +109,29 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI, unsigned SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond)const{ - // FIXME this should probably take a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL)const{ // Can only insert uncond branches so far. assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); - BuildMI(&MBB, dl, get(SP::BA)).addMBB(TBB); + BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB); return 1; } -bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC != SrcRC) { - // Not yet supported! - return false; - } - - if (DestRC == SP::IntRegsRegisterClass) - BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg); - else if (DestRC == SP::FPRegsRegisterClass) - BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg).addReg(SrcReg); - else if (DestRC == SP::DFPRegsRegisterClass) - BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD),DestReg) - .addReg(SrcReg); +void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (SP::IntRegsRegClass.contains(DestReg, SrcReg)) + BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) + BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) + BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); else - // Can't copy this register - return false; - - return true; + llvm_unreachable("Impossible reg-to-reg copy"); } void SparcInstrInfo:: @@ -183,61 +174,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Can't load this register from stack slot"); } -MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - bool isFloat = false; - MachineInstr *NewMI = NULL; - switch (MI->getOpcode()) { - case SP::ORrr: - if (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == SP::G0&& - MI->getOperand(0).isReg() && MI->getOperand(2).isReg()) { - if (OpNum == 0) // COPY -> STORE - NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::STri)) - .addFrameIndex(FI) - .addImm(0) - .addReg(MI->getOperand(2).getReg()); - else // COPY -> LOAD - NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::LDri), - MI->getOperand(0).getReg()) - .addFrameIndex(FI) - .addImm(0); - } - break; - case SP::FMOVS: - isFloat = true; - // FALLTHROUGH - case SP::FMOVD: - if (OpNum == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), - get(isFloat ? SP::STFri : SP::STDFri)) - .addFrameIndex(FI) - .addImm(0) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)); - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), - get(isFloat ? SP::LDFri : SP::LDDFri)) - .addReg(DstReg, RegState::Define | - getDeadRegState(isDead) | getUndefRegState(isUndef)) - .addFrameIndex(FI) - .addImm(0); - } - break; - } - - return NewMI; -} - unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { SparcMachineFunctionInfo *SparcFI = MF->getInfo<SparcMachineFunctionInfo>(); diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index a00ba39..1334718 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -68,14 +68,13 @@ public: virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -89,18 +88,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - unsigned getGlobalBaseReg(MachineFunction *MF) const; }; diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 9489580..ddadd51 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -665,7 +665,7 @@ let Defs = [FCC] in { //===----------------------------------------------------------------------===// // V9 Conditional Moves. -let Predicates = [HasV9], isTwoAddress = 1 in { +let Predicates = [HasV9], Constraints = "$T = $dst" in { // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual. // FIXME: Add instruction encodings for the JIT some day. def MOVICCrr diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 08373bb8..427cc7f 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -52,13 +52,6 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } - -const TargetRegisterClass* const* -SparcRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; - return CalleeSavedRegClasses; -} - bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const { return false; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 24d43e3..9f0cda7 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -32,9 +32,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp index 90be222..d7ac8f5 100644 --- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp @@ -124,7 +124,7 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); if (Modifier && strncmp(Modifier, "subreg", 6) == 0) { if (strncmp(Modifier + 7, "even", 4) == 0) - Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_even32); + Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit); else if (strncmp(Modifier + 7, "odd", 3) == 0) Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32); else diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index bb2952a..ed290ca 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -670,7 +670,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? - SystemZ::subreg_even32 : SystemZ::subreg_even); + SystemZ::subreg_32bit : SystemZ::subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), @@ -754,7 +754,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? - SystemZ::subreg_even32 : SystemZ::subreg_even); + SystemZ::subreg_32bit : SystemZ::subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 76f2901..67f739f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -254,6 +254,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -266,7 +267,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CallingConv::Fast: case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); } } @@ -334,7 +335,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, // Create the nodes corresponding to a load from this parameter slot. // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8, - VA.getLocMemOffset(), true, false); + VA.getLocMemOffset(), true); // Create the SelectionDAG nodes corresponding to a load // from this parameter @@ -372,6 +373,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -402,7 +404,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -464,7 +466,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy()); @@ -550,6 +552,7 @@ SDValue SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location @@ -575,7 +578,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; - SDValue ResValue = Outs[i].Val; + SDValue ResValue = OutVals[i]; assert(VA.isRegLoc() && "Can only return in registers!"); // If this is an 8/16/32-bit value, it is really should be passed promoted @@ -729,14 +732,14 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, SDValue Result; if (!IsPic && !ExtraLoadRequired) { - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); Offset = 0; } else { unsigned char OpFlags = 0; if (ExtraLoadRequired) OpFlags = SystemZII::MO_GOTENT; - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); } Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl, @@ -827,16 +830,20 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm(); - BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB); F->insert(I, copy0MBB); F->insert(I, copy1MBB); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. - copy1MBB->transferSuccessors(BB); + copy1MBB->splice(copy1MBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + copy1MBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(copy1MBB); + BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to copy1MBB @@ -849,11 +856,11 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = copy1MBB; - BuildMI(BB, dl, TII.get(SystemZ::PHI), + BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 94bd906..51d2df3 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -98,6 +98,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -126,6 +127,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -134,6 +136,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; const SystemZSubtarget &Subtarget; diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 8c5e905..a658280 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -126,7 +126,7 @@ def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), (implicit PSW)]>; } -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Defs = [PSW] in { let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), @@ -237,7 +237,7 @@ def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2), "ddb\t{$dst, $src2}", [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>; -} // isTwoAddress = 1 +} // Constraints = "$src1 = $dst" def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), "sqebr\t{$dst, $src}", diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 043686c..c03864f 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -117,59 +117,28 @@ void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); } -bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - // Determine if DstRC and SrcRC have a common superclass. - const TargetRegisterClass *CommonRC = DestRC; - if (DestRC == SrcRC) - /* Same regclass for source and dest */; - else if (CommonRC->hasSuperClass(SrcRC)) - CommonRC = SrcRC; - else if (!CommonRC->hasSubClass(SrcRC)) - CommonRC = 0; - - if (CommonRC) { - if (CommonRC == &SystemZ::GR64RegClass || - CommonRC == &SystemZ::ADDR64RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::GR32RegClass || - CommonRC == &SystemZ::ADDR32RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::GR64PRegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV64rrP), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::GR128RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV128rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::FP32RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::FMOV32rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::FP64RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::FMOV64rr), DestReg).addReg(SrcReg); - } else { - return false; - } - - return true; - } - - if ((SrcRC == &SystemZ::GR64RegClass && - DestRC == &SystemZ::ADDR64RegClass) || - (DestRC == &SystemZ::GR64RegClass && - SrcRC == &SystemZ::ADDR64RegClass)) { - BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg); - return true; - } else if ((SrcRC == &SystemZ::GR32RegClass && - DestRC == &SystemZ::ADDR32RegClass) || - (DestRC == &SystemZ::GR32RegClass && - SrcRC == &SystemZ::ADDR32RegClass)) { - BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg); - return true; - } - - return false; +void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (SystemZ::GR64RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV64rr; + else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV32rr; + else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV64rrP; + else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV128rr; + else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::FMOV32rr; + else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::FMOV64rr; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } bool @@ -286,8 +255,7 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass != &SystemZ::FP64RegClass) { + if (!SystemZ::FP64RegClass.contains(Reg)) { unsigned Offset = RegSpillOffsets[Reg]; CalleeFrameSize += 8; if (StartOffset > Offset) { @@ -332,11 +300,10 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Save FPRs for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass == &SystemZ::FP64RegClass) { + if (SystemZ::FP64RegClass.contains(Reg)) { MBB.addLiveIn(Reg); - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass, - &RI); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), + &SystemZ::FP64RegClass, &RI); } } @@ -361,9 +328,9 @@ SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // Restore FP registers for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass == &SystemZ::FP64RegClass) - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); + if (SystemZ::FP64RegClass.contains(Reg)) + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + &SystemZ::FP64RegClass, &RI); } // Restore GP registers @@ -523,9 +490,8 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME: this should probably have a DebugLoc operand - DebugLoc DL; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index a753f14..0559619 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -60,11 +60,10 @@ public: /// virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; } - bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; bool isMoveInstr(const MachineInstr& MI, unsigned &SrcReg, unsigned &DstReg, @@ -102,7 +101,8 @@ public: bool AllowModify) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 22bde4e..8df07c0 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -478,7 +478,8 @@ def MOV64rmm : RSYI<0x04EB, "lmg\t{$from, $to, $dst}", []>; -let isReMaterializable = 1, isAsCheapAsAMove = 1, isTwoAddress = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1, + Constraints = "$src = $dst" in { def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src), "lhi\t${dst:subreg_even}, 0", []>; @@ -537,7 +538,7 @@ def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src), (implicit PSW)]>; } -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Defs = [PSW] in { @@ -924,12 +925,12 @@ def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2 "dlg\t{$dst, $src2}", []>; } // mayLoad -} // isTwoAddress = 1 +} // Constraints = "$src1 = $dst" //===----------------------------------------------------------------------===// // Shifts -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def SRL32rri : RSI<0x88, (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), "srl\t{$src, $amt}", @@ -939,7 +940,7 @@ def SRL64rri : RSYI<0xEB0C, "srlg\t{$dst, $src, $amt}", [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>; -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def SHL32rri : RSI<0x89, (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), "sll\t{$src, $amt}", @@ -950,7 +951,7 @@ def SHL64rri : RSYI<0xEB0D, [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>; let Defs = [PSW] in { -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def SRA32rri : RSI<0x8A, (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), "sra\t{$src, $amt}", @@ -1129,13 +1130,13 @@ def : Pat<(mulhs GR32:$src1, GR32:$src2), (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), GR32:$src1, subreg_odd32), GR32:$src2), - subreg_even32)>; + subreg_32bit)>; def : Pat<(mulhu GR32:$src1, GR32:$src2), (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), GR32:$src1, subreg_odd32), GR32:$src2), - subreg_even32)>; + subreg_32bit)>; def : Pat<(mulhu GR64:$src1, GR64:$src2), (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src1, subreg_odd), diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 638fd17..ae96b0b 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -47,22 +47,6 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -const TargetRegisterClass* const* -SystemZRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, 0 - }; - return CalleeSavedRegClasses; -} - BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); if (hasFP(MF)) diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 42aa5dd..670025f 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -32,9 +32,6 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasReservedCallFrame(MachineFunction &MF) const { return true; } diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index b561744..33be8dd 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -55,7 +55,6 @@ class FPRL<bits<4> num, string n, list<Register> subregs> let Namespace = "SystemZ" in { def subreg_32bit : SubRegIndex; -def subreg_even32 : SubRegIndex; def subreg_odd32 : SubRegIndex; def subreg_even : SubRegIndex; def subreg_odd : SubRegIndex; @@ -99,7 +98,7 @@ def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>; } // Register pairs -let SubRegIndices = [subreg_even32, subreg_odd32] in { +let SubRegIndices = [subreg_32bit, subreg_odd32] in { def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>, DwarfRegNum<[0]>; def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>, DwarfRegNum<[2]>; def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>, DwarfRegNum<[4]>; @@ -111,8 +110,7 @@ def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>; } let SubRegIndices = [subreg_even, subreg_odd], - CompositeIndices = [(subreg_even32 subreg_even, subreg_32bit), - (subreg_odd32 subreg_odd, subreg_32bit)] in { + CompositeIndices = [(subreg_odd32 subreg_odd, subreg_32bit)] in { def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>, DwarfRegNum<[0]>; def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>, DwarfRegNum<[2]>; def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>, DwarfRegNum<[4]>; @@ -355,7 +353,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64, def GR64P : RegisterClass<"SystemZ", [v2i32], 64, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]> { - let SubRegClasses = [(GR32 subreg_even32, subreg_odd32)]; + let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -391,7 +389,7 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64, def GR128 : RegisterClass<"SystemZ", [v2i64], 128, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]> { - let SubRegClasses = [(GR32 subreg_even32, subreg_odd32), + let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32), (GR64 subreg_even, subreg_odd)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index 094a57e..c099a7e 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -28,6 +28,10 @@ const TargetRegisterClass * TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const { if (isLookupPtrRegClass()) return TRI->getPointerRegClass(RegClass); + // Instructions like INSERT_SUBREG do not have fixed register classes. + if (RegClass < 0) + return 0; + // Otherwise just look it up normally. return TRI->getRegClass(RegClass); } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index b9372d0..dd7b532 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -101,7 +101,7 @@ static bool IsNullTerminatedString(const Constant *C) { ConstantInt *Null = dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1)); - if (Null == 0 || Null->getZExtValue() != 0) + if (Null == 0 || !Null->isZero()) return false; // Not null terminated. // Verify that the null doesn't occur anywhere else in the string. diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index dcc5f61..49bfad5 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -39,20 +39,20 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR, TargetRegisterInfo::~TargetRegisterInfo() {} -/// getPhysicalRegisterRegClass - Returns the Register Class of a physical -/// register of the given type. If type is EVT::Other, then just return any -/// register class the register belongs to. +/// getMinimalPhysRegClass - Returns the Register Class of a physical +/// register of the given type, picking the most sub register class of +/// the right type that contains this physreg. const TargetRegisterClass * -TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { +TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { assert(isPhysicalRegister(reg) && "reg must be a physical register"); - // Pick the most super register class of the right type that contains + // Pick the most sub register class of the right type that contains // this physreg. const TargetRegisterClass* BestRC = 0; for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){ const TargetRegisterClass* RC = *I; if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && - (!BestRC || BestRC->hasSuperClass(RC))) + (!BestRC || BestRC->hasSubClass(RC))) BestRC = RC; } diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp index a58f58e..26797ab 100644 --- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp @@ -33,13 +33,11 @@ class X86AsmLexer : public TargetAsmLexer { } const AsmToken &lexDefinite() { - if(tentativeIsValid) { + if (tentativeIsValid) { tentativeIsValid = false; return tentativeToken; } - else { - return getLexer()->Lex(); - } + return getLexer()->Lex(); } AsmToken LexTokenATT(); @@ -72,38 +70,65 @@ public: static unsigned MatchRegisterName(StringRef Name); AsmToken X86AsmLexer::LexTokenATT() { - const AsmToken lexedToken = lexDefinite(); + AsmToken lexedToken = lexDefinite(); switch (lexedToken.getKind()) { default: - return AsmToken(lexedToken); + return lexedToken; case AsmToken::Error: SetError(Lexer->getErrLoc(), Lexer->getErr()); - return AsmToken(lexedToken); - case AsmToken::Percent: - { + return lexedToken; + + case AsmToken::Percent: { const AsmToken &nextToken = lexTentative(); - if (nextToken.getKind() == AsmToken::Identifier) { - unsigned regID = MatchRegisterName(nextToken.getString()); + if (nextToken.getKind() != AsmToken::Identifier) + return lexedToken; + - if (regID) { - lexDefinite(); + if (unsigned regID = MatchRegisterName(nextToken.getString())) { + lexDefinite(); + // FIXME: This is completely wrong when there is a space or other + // punctuation between the % and the register name. + StringRef regStr(lexedToken.getString().data(), + lexedToken.getString().size() + + nextToken.getString().size()); + + return AsmToken(AsmToken::Register, regStr, + static_cast<int64_t>(regID)); + } + + // Match register name failed. If this is "db[0-7]", match it as an alias + // for dr[0-7]. + if (nextToken.getString().size() == 3 && + nextToken.getString().startswith("db")) { + int RegNo = -1; + switch (nextToken.getString()[2]) { + case '0': RegNo = X86::DR0; break; + case '1': RegNo = X86::DR1; break; + case '2': RegNo = X86::DR2; break; + case '3': RegNo = X86::DR3; break; + case '4': RegNo = X86::DR4; break; + case '5': RegNo = X86::DR5; break; + case '6': RegNo = X86::DR6; break; + case '7': RegNo = X86::DR7; break; + } + + if (RegNo != -1) { + lexDefinite(); + + // FIXME: This is completely wrong when there is a space or other + // punctuation between the % and the register name. StringRef regStr(lexedToken.getString().data(), lexedToken.getString().size() + nextToken.getString().size()); - - return AsmToken(AsmToken::Register, - regStr, - static_cast<int64_t>(regID)); - } - else { - return AsmToken(lexedToken); + return AsmToken(AsmToken::Register, regStr, + static_cast<int64_t>(RegNo)); } } - else { - return AsmToken(lexedToken); - } + + + return lexedToken; } } } @@ -113,26 +138,22 @@ AsmToken X86AsmLexer::LexTokenIntel() { switch(lexedToken.getKind()) { default: - return AsmToken(lexedToken); + return lexedToken; case AsmToken::Error: SetError(Lexer->getErrLoc(), Lexer->getErr()); - return AsmToken(lexedToken); - case AsmToken::Identifier: - { + return lexedToken; + case AsmToken::Identifier: { std::string upperCase = lexedToken.getString().str(); std::string lowerCase = LowercaseString(upperCase); StringRef lowerRef(lowerCase); unsigned regID = MatchRegisterName(lowerRef); - if (regID) { + if (regID) return AsmToken(AsmToken::Register, lexedToken.getString(), static_cast<int64_t>(regID)); - } - else { - return AsmToken(lexedToken); - } + return lexedToken; } } } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 40a6a7b..a856e9c 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -412,6 +412,28 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, return false; } + // If this is "db[0-7]", match it as an alias + // for dr[0-7]. + if (RegNo == 0 && Tok.getString().size() == 3 && + Tok.getString().startswith("db")) { + switch (Tok.getString()[2]) { + case '0': RegNo = X86::DR0; break; + case '1': RegNo = X86::DR1; break; + case '2': RegNo = X86::DR2; break; + case '3': RegNo = X86::DR3; break; + case '4': RegNo = X86::DR4; break; + case '5': RegNo = X86::DR5; break; + case '6': RegNo = X86::DR6; break; + case '7': RegNo = X86::DR7; break; + } + + if (RegNo != 0) { + EndLoc = Tok.getLoc(); + Parser.Lex(); // Eat it. + return false; + } + } + if (RegNo == 0) return Error(Tok.getLoc(), "invalid register name"); @@ -597,6 +619,16 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode"); } + // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and + // the form jrcxz is not allowed in 32-bit mode. + if (Is64Bit) { + if (Name == "jcxz") + return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode"); + } else { + if (Name == "jrcxz") + return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode"); + } + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to // represent alternative syntaxes in the .td file, without requiring // instruction duplication. @@ -617,6 +649,23 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, .Case("setnz", "setne") .Case("jz", "je") .Case("jnz", "jne") + .Case("jc", "jb") + // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode + // jecxz requires an AdSize prefix but jecxz does not have a prefix in + // 32-bit mode. + .Case("jecxz", "jcxz") + .Case("jrcxz", "jcxz") + .Case("jna", "jbe") + .Case("jnae", "jb") + .Case("jnb", "jae") + .Case("jnbe", "ja") + .Case("jnc", "jae") + .Case("jng", "jle") + .Case("jnge", "jl") + .Case("jnl", "jge") + .Case("jnle", "jg") + .Case("jpe", "jp") + .Case("jpo", "jnp") .Case("cmovcl", "cmovbl") .Case("cmovcl", "cmovbl") .Case("cmovnal", "cmovbel") @@ -631,36 +680,64 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, .Case("cmovnlel", "cmovgl") .Case("cmovnzl", "cmovnel") .Case("cmovzl", "cmovel") + .Case("fwait", "wait") + .Case("movzx", "movzb") .Default(Name); // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. const MCExpr *ExtraImmOp = 0; - if (PatchedName.startswith("cmp") && + if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && (PatchedName.endswith("ss") || PatchedName.endswith("sd") || PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { + bool IsVCMP = PatchedName.startswith("vcmp"); + unsigned SSECCIdx = IsVCMP ? 4 : 3; unsigned SSEComparisonCode = StringSwitch<unsigned>( - PatchedName.slice(3, PatchedName.size() - 2)) - .Case("eq", 0) - .Case("lt", 1) - .Case("le", 2) - .Case("unord", 3) - .Case("neq", 4) - .Case("nlt", 5) - .Case("nle", 6) - .Case("ord", 7) + PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) + .Case("eq", 0) + .Case("lt", 1) + .Case("le", 2) + .Case("unord", 3) + .Case("neq", 4) + .Case("nlt", 5) + .Case("nle", 6) + .Case("ord", 7) + .Case("eq_uq", 8) + .Case("nge", 9) + .Case("ngt", 0x0A) + .Case("false", 0x0B) + .Case("neq_oq", 0x0C) + .Case("ge", 0x0D) + .Case("gt", 0x0E) + .Case("true", 0x0F) + .Case("eq_os", 0x10) + .Case("lt_oq", 0x11) + .Case("le_oq", 0x12) + .Case("unord_s", 0x13) + .Case("neq_us", 0x14) + .Case("nlt_uq", 0x15) + .Case("nle_uq", 0x16) + .Case("ord_s", 0x17) + .Case("eq_us", 0x18) + .Case("nge_uq", 0x19) + .Case("ngt_uq", 0x1A) + .Case("false_os", 0x1B) + .Case("neq_os", 0x1C) + .Case("ge_oq", 0x1D) + .Case("gt_oq", 0x1E) + .Case("true_us", 0x1F) .Default(~0U); if (SSEComparisonCode != ~0U) { ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, getParser().getContext()); if (PatchedName.endswith("ss")) { - PatchedName = "cmpss"; + PatchedName = IsVCMP ? "vcmpss" : "cmpss"; } else if (PatchedName.endswith("sd")) { - PatchedName = "cmpsd"; + PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; } else if (PatchedName.endswith("ps")) { - PatchedName = "cmpps"; + PatchedName = IsVCMP ? "vcmpps" : "cmpps"; } else { assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); - PatchedName = "cmppd"; + PatchedName = IsVCMP ? "vcmppd" : "cmppd"; } } } diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index 0b64cb4..f2cdb5b 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -85,11 +85,18 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } -void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { +void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, + raw_ostream &O) { const MCOperand &BaseReg = MI->getOperand(Op); const MCOperand &IndexReg = MI->getOperand(Op+2); const MCOperand &DispSpec = MI->getOperand(Op+3); + const MCOperand &SegReg = MI->getOperand(Op+4); + + // If this has a segment register, print it. + if (SegReg.getReg()) { + printOperand(MI, Op+4, O); + O << ':'; + } if (DispSpec.isImm()) { int64_t DispVal = DispSpec.getImm(); @@ -115,13 +122,3 @@ void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, O << ')'; } } - -void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { - // If this has a segment register, print it. - if (MI->getOperand(Op+4).getReg()) { - printOperand(MI, Op+4, O); - O << ':'; - } - printLeaMemReference(MI, Op, O); -} diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 8d5d508..3be4bae 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -34,7 +34,6 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); - void printLeaMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS); void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS); @@ -69,14 +68,8 @@ public: void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } - void printlea32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printLeaMemReference(MI, OpNo, O); - } - void printlea64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printLeaMemReference(MI, OpNo, O); - } - void printlea64_32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printLeaMemReference(MI, OpNo, O); + void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); } }; diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 183213d..73bc603 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -200,6 +200,11 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, case X86II::MO_GOT: O << "@GOT"; break; case X86II::MO_GOTOFF: O << "@GOTOFF"; break; case X86II::MO_PLT: O << "@PLT"; break; + case X86II::MO_TLVP: O << "@TLVP"; break; + case X86II::MO_TLVP_PIC_BASE: + O << "@TLVP" << '-'; + PrintPICBaseSymbol(O); + break; } } @@ -383,6 +388,8 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, } if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) { printSymbolOperand(MO, O); + if (Subtarget->isPICStyleRIPRel()) + O << "(%rip)"; return false; } if (MO.isReg()) { diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp index 7e0a9bb..a632047 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -81,12 +81,19 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } -void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { +void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, + raw_ostream &O) { const MCOperand &BaseReg = MI->getOperand(Op); unsigned ScaleVal = MI->getOperand(Op+1).getImm(); const MCOperand &IndexReg = MI->getOperand(Op+2); const MCOperand &DispSpec = MI->getOperand(Op+3); + const MCOperand &SegReg = MI->getOperand(Op+4); + + // If this has a segment register, print it. + if (SegReg.getReg()) { + printOperand(MI, Op+4, O); + O << ':'; + } O << '['; @@ -104,7 +111,7 @@ void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, NeedPlus = true; } - + if (!DispSpec.isImm()) { if (NeedPlus) O << " + "; assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); @@ -126,13 +133,3 @@ void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, O << ']'; } - -void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { - // If this has a segment register, print it. - if (MI->getOperand(Op+4).getReg()) { - printOperand(MI, Op+4, O); - O << ':'; - } - printLeaMemReference(MI, Op, O); -} diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h index a0beeb2..4d68074 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -36,7 +36,6 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); - void printLeaMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O); void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O); @@ -81,17 +80,9 @@ public: O << "XMMWORD PTR "; printMemReference(MI, OpNo, O); } - void printlea32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "DWORD PTR "; - printLeaMemReference(MI, OpNo, O); - } - void printlea64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "QWORD PTR "; - printLeaMemReference(MI, OpNo, O); - } - void printlea64_32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "QWORD PTR "; - printLeaMemReference(MI, OpNo, O); + void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo, O); } }; diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 4edeca9..09f150b 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -152,6 +152,17 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_DARWIN_STUB: break; + case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; + case X86II::MO_TLVP_PIC_BASE: + Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); + // Subtract the pic base. + Expr + = MCBinaryExpr::CreateSub(Expr, + MCSymbolRefExpr::Create(GetPICBaseSymbol(), + Ctx), + Ctx); + + break; case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; @@ -266,10 +277,21 @@ static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) { return; // Check whether this is an absolute address. - if (Inst.getOperand(AddrBase + 0).getReg() != 0 || - Inst.getOperand(AddrBase + 2).getReg() != 0 || - Inst.getOperand(AddrBase + 4).getReg() != 0 || - Inst.getOperand(AddrBase + 1).getImm() != 1) + // FIXME: We know TLVP symbol refs aren't, but there should be a better way + // to do this here. + bool Absolute = true; + if (Inst.getOperand(AddrOp).isExpr()) { + const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr(); + if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE)) + if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP) + Absolute = false; + } + + if (Absolute && + (Inst.getOperand(AddrBase + 0).getReg() != 0 || + Inst.getOperand(AddrBase + 2).getReg() != 0 || + Inst.getOperand(AddrBase + 4).getReg() != 0 || + Inst.getOperand(AddrBase + 1).getImm() != 1)) return; // If so, rewrite the instruction. @@ -327,6 +349,15 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { switch (OutMI.getOpcode()) { case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand. lower_lea64_32mem(&OutMI, 1); + // FALL THROUGH. + case X86::LEA64r: + case X86::LEA16r: + case X86::LEA32r: + // LEA should have a segment register, but it must be empty. + assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands && + "Unexpected # of LEA operands"); + assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 && + "LEA has segment specified!"); break; case X86::MOVZX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break; case X86::MOVZX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break; @@ -364,10 +395,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; - // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have + // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have // register inputs modeled as normal uses instead of implicit uses. As such, // truncate off all but the first operand (the callee). FIXME: Change isel. - case X86::TAILJMPr: case X86::TAILJMPr64: case X86::CALL64r: case X86::CALL64pcrel32: { @@ -380,11 +410,20 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions. + case X86::TAILJMPr: case X86::TAILJMPd: case X86::TAILJMPd64: { + unsigned Opcode; + switch (OutMI.getOpcode()) { + default: assert(0 && "Invalid opcode"); + case X86::TAILJMPr: Opcode = X86::JMP32r; break; + case X86::TAILJMPd: + case X86::TAILJMPd64: Opcode = X86::JMP_1; break; + } + MCOperand Saved = OutMI.getOperand(0); OutMI = MCInst(); - OutMI.setOpcode(X86::TAILJMP_1); + OutMI.setOpcode(Opcode); OutMI.addOperand(Saved); break; } @@ -483,8 +522,12 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, O << V.getName(); O << " <- "; // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); - O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O); + O << '['; + if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) + printOperand(MI, 0, O); + else + O << "undef"; + O << '+'; printOperand(MI, 3, O); O << ']'; O << "+"; printOperand(MI, NOps-2, O); @@ -495,8 +538,9 @@ X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { MachineLocation Location; assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); - Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); + + if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); return Location; } @@ -513,6 +557,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; + case X86::TAILJMPr: + case X86::TAILJMPd: + case X86::TAILJMPd64: + // Lower these as normal, but add some comments. + OutStreamer.AddComment("TAILCALL"); + break; + case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which @@ -578,7 +629,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); - OutStreamer.EmitInstruction(TmpInst); } diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index 9f91060..97589c0 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -4,8 +4,8 @@ add_llvm_library(LLVMX86Disassembler X86Disassembler.cpp X86DisassemblerDecoder.c ) -# workaround for hanging compilation on MSVC9 -if( MSVC_VERSION EQUAL 1500 ) +# workaround for hanging compilation on MSVC9 and 10 +if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) set_property( SOURCE X86Disassembler.cpp PROPERTY COMPILE_FLAGS "/Od" diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 8a5a630..09f1584 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -252,13 +252,8 @@ static bool translateRMRegister(MCInst &mcInst, /// @param mcInst - The MCInst to append to. /// @param insn - The instruction to extract Mod, R/M, and SIB fields /// from. -/// @param sr - Whether or not to emit the segment register. The -/// LEA instruction does not expect a segment-register -/// operand. /// @return - 0 on success; nonzero otherwise -static bool translateRMMemory(MCInst &mcInst, - InternalInstruction &insn, - bool sr) { +static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { // Addresses in an MCInst are represented as five operands: // 1. basereg (register) The R/M base, or (if there is a SIB) the // SIB base @@ -385,10 +380,7 @@ static bool translateRMMemory(MCInst &mcInst, mcInst.addOperand(scaleAmount); mcInst.addOperand(indexReg); mcInst.addOperand(displacement); - - if (sr) - mcInst.addOperand(segmentReg); - + mcInst.addOperand(segmentReg); return false; } @@ -439,9 +431,8 @@ static bool translateRM(MCInst &mcInst, case TYPE_M1616: case TYPE_M1632: case TYPE_M1664: - return translateRMMemory(mcInst, insn, true); case TYPE_LEA: - return translateRMMemory(mcInst, insn, false); + return translateRMMemory(mcInst, insn); } } diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index e5f84e8..b6aba93 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -36,62 +36,6 @@ The pattern isel got this one right. //===---------------------------------------------------------------------===// -SSE doesn't have [mem] op= reg instructions. If we have an SSE instruction -like this: - - X += y - -and the register allocator decides to spill X, it is cheaper to emit this as: - -Y += [xslot] -store Y -> [xslot] - -than as: - -tmp = [xslot] -tmp += y -store tmp -> [xslot] - -..and this uses one fewer register (so this should be done at load folding -time, not at spiller time). *Note* however that this can only be done -if Y is dead. Here's a testcase: - -@.str_3 = external global [15 x i8] -declare void @printf(i32, ...) -define void @main() { -build_tree.exit: - br label %no_exit.i7 - -no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit - %tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ], - [ %tmp.34.i18, %no_exit.i7 ] - %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], - [ %tmp.28.i16, %no_exit.i7 ] - %tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00 - %tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00 - br i1 false, label %Compute_Tree.exit23, label %no_exit.i7 - -Compute_Tree.exit23: ; preds = %no_exit.i7 - tail call void (i32, ...)* @printf( i32 0 ) - store double %tmp.34.i18, double* null - ret void -} - -We currently emit: - -.BBmain_1: - xorpd %XMM1, %XMM1 - addsd %XMM0, %XMM1 -*** movsd %XMM2, QWORD PTR [%ESP + 8] -*** addsd %XMM2, %XMM1 -*** movsd QWORD PTR [%ESP + 8], %XMM2 - jmp .BBmain_1 # no_exit.i7 - -This is a bugpoint reduced testcase, which is why the testcase doesn't make -much sense (e.g. its an infinite loop). :) - -//===---------------------------------------------------------------------===// - SSE should implement 'select_cc' using 'emulated conditional moves' that use pcmp/pand/pandn/por to do a selection instead of a conditional branch: @@ -122,12 +66,6 @@ LBB_X_2: //===---------------------------------------------------------------------===// -It's not clear whether we should use pxor or xorps / xorpd to clear XMM -registers. The choice may depend on subtarget information. We should do some -more experiments on different x86 machines. - -//===---------------------------------------------------------------------===// - Lower memcpy / memset to a series of SSE 128 bit move instructions when it's feasible. @@ -151,45 +89,6 @@ Perhaps use pxor / xorp* to clear a XMM register first? //===---------------------------------------------------------------------===// -How to decide when to use the "floating point version" of logical ops? Here are -some code fragments: - - movaps LCPI5_5, %xmm2 - divps %xmm1, %xmm2 - mulps %xmm2, %xmm3 - mulps 8656(%ecx), %xmm3 - addps 8672(%ecx), %xmm3 - andps LCPI5_6, %xmm2 - andps LCPI5_1, %xmm3 - por %xmm2, %xmm3 - movdqa %xmm3, (%edi) - - movaps LCPI5_5, %xmm1 - divps %xmm0, %xmm1 - mulps %xmm1, %xmm3 - mulps 8656(%ecx), %xmm3 - addps 8672(%ecx), %xmm3 - andps LCPI5_6, %xmm1 - andps LCPI5_1, %xmm3 - orps %xmm1, %xmm3 - movaps %xmm3, 112(%esp) - movaps %xmm3, (%ebx) - -Due to some minor source change, the later case ended up using orps and movaps -instead of por and movdqa. Does it matter? - -//===---------------------------------------------------------------------===// - -X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible -to choose between movaps, movapd, and movdqa based on types of source and -destination? - -How about andps, andpd, and pand? Do we really care about the type of the packed -elements? If not, why not always use the "ps" variants which are likely to be -shorter. - -//===---------------------------------------------------------------------===// - External test Nurbs exposed some problems. Look for __ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc emits: @@ -278,41 +177,6 @@ It also exposes some other problems. See MOV32ri -3 and the spills. //===---------------------------------------------------------------------===// -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25500 - -LLVM is producing bad code. - -LBB_main_4: # cond_true44 - addps %xmm1, %xmm2 - subps %xmm3, %xmm2 - movaps (%ecx), %xmm4 - movaps %xmm2, %xmm1 - addps %xmm4, %xmm1 - addl $16, %ecx - incl %edx - cmpl $262144, %edx - movaps %xmm3, %xmm2 - movaps %xmm4, %xmm3 - jne LBB_main_4 # cond_true44 - -There are two problems. 1) No need to two loop induction variables. We can -compare against 262144 * 16. 2) Known register coalescer issue. We should -be able eliminate one of the movaps: - - addps %xmm2, %xmm1 <=== Commute! - subps %xmm3, %xmm1 - movaps (%ecx), %xmm4 - movaps %xmm1, %xmm1 <=== Eliminate! - addps %xmm4, %xmm1 - addl $16, %ecx - incl %edx - cmpl $262144, %edx - movaps %xmm3, %xmm2 - movaps %xmm4, %xmm3 - jne LBB_main_4 # cond_true44 - -//===---------------------------------------------------------------------===// - Consider: __m128 test(float a) { @@ -382,22 +246,6 @@ elements are fixed zeros. //===---------------------------------------------------------------------===// -__m128d test1( __m128d A, __m128d B) { - return _mm_shuffle_pd(A, B, 0x3); -} - -compiles to - -shufpd $3, %xmm1, %xmm0 - -Perhaps it's better to use unpckhpd instead? - -unpckhpd %xmm1, %xmm0 - -Don't know if unpckhpd is faster. But it is shorter. - -//===---------------------------------------------------------------------===// - This code generates ugly code, probably due to costs being off or something: define void @test(float* %P, <4 x float>* %P2 ) { @@ -549,6 +397,7 @@ entry: %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly ret i64 %tmp20 } +declare i64 @ccoshf(float %z.0, float %z.1) nounwind readonly This currently compiles to: @@ -987,3 +836,34 @@ This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and doing a shuffle from v[1] to v[0] then a float store. //===---------------------------------------------------------------------===// + +On SSE4 machines, we compile this code: + +define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, + <2 x float> *%P) nounwind { + %Z = fadd <2 x float> %Q, %R + + store <2 x float> %Z, <2 x float> *%P + ret <2 x float> %Z +} + +into: + +_test2: ## @test2 +## BB#0: + insertps $0, %xmm2, %xmm2 + insertps $16, %xmm3, %xmm2 + insertps $0, %xmm0, %xmm3 + insertps $16, %xmm1, %xmm3 + addps %xmm2, %xmm3 + movq %xmm3, (%rdi) + movaps %xmm3, %xmm0 + pshufd $1, %xmm3, %xmm1 + ## kill: XMM1<def> XMM1<kill> + ret + +The insertps's of $0 are pointless complex copies. + +//===---------------------------------------------------------------------===// + + diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index e8f7c5d..78c4dc0 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -1,27 +1,5 @@ //===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===// -Implement different PIC models? Right now we only support Mac OS X with small -PIC code model. - -//===---------------------------------------------------------------------===// - -For this: - -extern void xx(void); -void bar(void) { - xx(); -} - -gcc compiles to: - -.globl _bar -_bar: - jmp _xx - -We need to do the tailcall optimization as well. - -//===---------------------------------------------------------------------===// - AMD64 Optimization Manual 8.2 has some nice information about optimizing integer multiplication by a constant. How much of it applies to Intel's X86-64 implementation? There are definite trade-offs to consider: latency vs. register @@ -96,123 +74,14 @@ gcc: movq %rax, (%rdx) ret -//===---------------------------------------------------------------------===// - -Vararg function prologue can be further optimized. Currently all XMM registers -are stored into register save area. Most of them can be eliminated since the -upper bound of the number of XMM registers used are passed in %al. gcc produces -something like the following: - - movzbl %al, %edx - leaq 0(,%rdx,4), %rax - leaq 4+L2(%rip), %rdx - leaq 239(%rsp), %rax - jmp *%rdx - movaps %xmm7, -15(%rax) - movaps %xmm6, -31(%rax) - movaps %xmm5, -47(%rax) - movaps %xmm4, -63(%rax) - movaps %xmm3, -79(%rax) - movaps %xmm2, -95(%rax) - movaps %xmm1, -111(%rax) - movaps %xmm0, -127(%rax) -L2: - -It jumps over the movaps that do not need to be stored. Hard to see this being -significant as it added 5 instruciton (including a indirect branch) to avoid -executing 0 to 8 stores in the function prologue. - -Perhaps we can optimize for the common case where no XMM registers are used for -parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a -leaf function where we can determine that no XMM input parameter is need, avoid -emitting the stores at all. - -//===---------------------------------------------------------------------===// +And the codegen is even worse for the following +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103): + void fill1(char *s, int a) + { + __builtin_memset(s, a, 15); + } -AMD64 has a complex calling convention for aggregate passing by value: - -1. If the size of an object is larger than two eightbytes, or in C++, is a non- - POD structure or union type, or contains unaligned fields, it has class - MEMORY. -2. Both eightbytes get initialized to class NO_CLASS. -3. Each field of an object is classified recursively so that always two fields - are considered. The resulting class is calculated according to the classes - of the fields in the eightbyte: - (a) If both classes are equal, this is the resulting class. - (b) If one of the classes is NO_CLASS, the resulting class is the other - class. - (c) If one of the classes is MEMORY, the result is the MEMORY class. - (d) If one of the classes is INTEGER, the result is the INTEGER. - (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as - class. - (f) Otherwise class SSE is used. -4. Then a post merger cleanup is done: - (a) If one of the classes is MEMORY, the whole argument is passed in memory. - (b) If SSEUP is not preceeded by SSE, it is converted to SSE. - -Currently llvm frontend does not handle this correctly. - -Problem 1: - typedef struct { int i; double d; } QuadWordS; -It is currently passed in two i64 integer registers. However, gcc compiled -callee expects the second element 'd' to be passed in XMM0. - -Problem 2: - typedef struct { int32_t i; float j; double d; } QuadWordS; -The size of the first two fields == i64 so they will be combined and passed in -a integer register RDI. The third field is still passed in XMM0. - -Problem 3: - typedef struct { int64_t i; int8_t j; int64_t d; } S; - void test(S s) -The size of this aggregate is greater than two i64 so it should be passed in -memory. Currently llvm breaks this down and passed it in three integer -registers. - -Problem 4: -Taking problem 3 one step ahead where a function expects a aggregate value -in memory followed by more parameter(s) passed in register(s). - void test(S s, int b) - -LLVM IR does not allow parameter passing by aggregates, therefore it must break -the aggregates value (in problem 3 and 4) into a number of scalar values: - void %test(long %s.i, byte %s.j, long %s.d); - -However, if the backend were to lower this code literally it would pass the 3 -values in integer registers. To force it be passed in memory, the frontend -should change the function signiture to: - void %test(long %undef1, long %undef2, long %undef3, long %undef4, - long %undef5, long %undef6, - long %s.i, byte %s.j, long %s.d); -And the callee would look something like this: - call void %test( undef, undef, undef, undef, undef, undef, - %tmp.s.i, %tmp.s.j, %tmp.s.d ); -The first 6 undef parameters would exhaust the 6 integer registers used for -parameter passing. The following three integer values would then be forced into -memory. - -For problem 4, the parameter 'd' would be moved to the front of the parameter -list so it will be passed in register: - void %test(int %d, - long %undef1, long %undef2, long %undef3, long %undef4, - long %undef5, long %undef6, - long %s.i, byte %s.j, long %s.d); - -//===---------------------------------------------------------------------===// - -Right now the asm printer assumes GlobalAddress are accessed via RIP relative -addressing. Therefore, it is not possible to generate this: - movabsq $__ZTV10polynomialIdE+16, %rax - -That is ok for now since we currently only support small model. So the above -is selected as - leaq __ZTV10polynomialIdE+16(%rip), %rax - -This is probably slightly slower but is much shorter than movabsq. However, if -we were to support medium or larger code models, we need to use the movabs -instruction. We should probably introduce something like AbsoluteAddress to -distinguish it from GlobalAddress so the asm printer and JIT code emitter can -do the right thing. +For this version, we duplicate the computation of the constant to store. //===---------------------------------------------------------------------===// @@ -298,3 +167,107 @@ be able to recognize the zero extend. This could also presumably be implemented if we have whole-function selectiondags. //===---------------------------------------------------------------------===// + +Take the following C code +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43640): + +struct u1 +{ + float x; + float y; +}; + +float foo(struct u1 u) +{ + return u.x + u.y; +} + +Optimizes to the following IR: +define float @foo(double %u.0) nounwind readnone { +entry: + %tmp8 = bitcast double %u.0 to i64 ; <i64> [#uses=2] + %tmp6 = trunc i64 %tmp8 to i32 ; <i32> [#uses=1] + %tmp7 = bitcast i32 %tmp6 to float ; <float> [#uses=1] + %tmp2 = lshr i64 %tmp8, 32 ; <i64> [#uses=1] + %tmp3 = trunc i64 %tmp2 to i32 ; <i32> [#uses=1] + %tmp4 = bitcast i32 %tmp3 to float ; <float> [#uses=1] + %0 = fadd float %tmp7, %tmp4 ; <float> [#uses=1] + ret float %0 +} + +And current llvm-gcc/clang output: + movd %xmm0, %rax + movd %eax, %xmm1 + shrq $32, %rax + movd %eax, %xmm0 + addss %xmm1, %xmm0 + ret + +We really shouldn't move the floats to RAX, only to immediately move them +straight back to the XMM registers. + +There really isn't any good way to handle this purely in IR optimizers; it +could possibly be handled by changing the output of the fronted, though. It +would also be feasible to add a x86-specific DAGCombine to optimize the +bitcast+trunc+(lshr+)bitcast combination. + +//===---------------------------------------------------------------------===// + +Take the following code +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653): +extern unsigned long table[]; +unsigned long foo(unsigned char *p) { + unsigned long tag = *p; + return table[tag >> 4] + table[tag & 0xf]; +} + +Current code generated: + movzbl (%rdi), %eax + movq %rax, %rcx + andq $240, %rcx + shrq %rcx + andq $15, %rax + movq table(,%rax,8), %rax + addq table(%rcx), %rax + ret + +Issues: +1. First movq should be movl; saves a byte. +2. Both andq's should be andl; saves another two bytes. I think this was + implemented at one point, but subsequently regressed. +3. shrq should be shrl; saves another byte. +4. The first andq can be completely eliminated by using a slightly more + expensive addressing mode. + +//===---------------------------------------------------------------------===// + +Consider the following (contrived testcase, but contains common factors): + +#include <stdarg.h> +int test(int x, ...) { + int sum, i; + va_list l; + va_start(l, x); + for (i = 0; i < x; i++) + sum += va_arg(l, int); + va_end(l); + return sum; +} + +Testcase given in C because fixing it will likely involve changing the IR +generated for it. The primary issue with the result is that it doesn't do any +of the optimizations which are possible if we know the address of a va_list +in the current function is never taken: +1. We shouldn't spill the XMM registers because we only call va_arg with "int". +2. It would be nice if we could scalarrepl the va_list. +3. Probably overkill, but it'd be cool if we could peel off the first five +iterations of the loop. + +Other optimizations involving functions which use va_arg on floats which don't +have the address of a va_list taken: +1. Conversely to the above, we shouldn't spill general registers if we only + call va_arg on "double". +2. If we know nothing more than 64 bits wide is read from the XMM registers, + we can change the spilling code to reduce the amount of stack used by half. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index d4545a6..efc0cd8 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1103,57 +1103,6 @@ be folded into: shl [mem], 1 //===---------------------------------------------------------------------===// -This testcase misses a read/modify/write opportunity (from PR1425): - -void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){ - int i; - for(i=0; i<width; i++) - b1[i] += (1*(b0[i] + b2[i])+0)>>0; -} - -We compile it down to: - -LBB1_2: # bb - movl (%esi,%edi,4), %ebx - addl (%ecx,%edi,4), %ebx - addl (%edx,%edi,4), %ebx - movl %ebx, (%ecx,%edi,4) - incl %edi - cmpl %eax, %edi - jne LBB1_2 # bb - -the inner loop should add to the memory location (%ecx,%edi,4), saving -a mov. Something like: - - movl (%esi,%edi,4), %ebx - addl (%edx,%edi,4), %ebx - addl %ebx, (%ecx,%edi,4) - -Here is another interesting example: - -void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){ - int i; - for(i=0; i<width; i++) - b1[i] -= (1*(b0[i] + b2[i])+0)>>0; -} - -We miss the r/m/w opportunity here by using 2 subs instead of an add+sub[mem]: - -LBB9_2: # bb - movl (%ecx,%edi,4), %ebx - subl (%esi,%edi,4), %ebx - subl (%edx,%edi,4), %ebx - movl %ebx, (%ecx,%edi,4) - incl %edi - cmpl %eax, %edi - jne LBB9_2 # bb - -Additionally, LSR should rewrite the exit condition of these loops to use -a stride-4 IV, would would allow all the scales in the loop to go away. -This would result in smaller code and more efficient microops. - -//===---------------------------------------------------------------------===// - In SSE mode, we turn abs and neg into a load from the constant pool plus a xor or and instruction, for example: @@ -1301,15 +1250,8 @@ FirstOnet: xorl %eax, %eax ret -There are a few possible improvements here: -1. We should be able to eliminate the dead load into %ecx -2. We could change the "movl 8(%esp), %eax" into - "movzwl 10(%esp), %eax"; this lets us change the cmpl - into a testl, which is shorter, and eliminate the shift. - -We could also in theory eliminate the branch by using a conditional -for the address of the load, but that seems unlikely to be worthwhile -in general. +We could change the "movl 8(%esp), %eax" into "movzwl 10(%esp), %eax"; this +lets us change the cmpl into a testl, which is shorter, and eliminate the shift. //===---------------------------------------------------------------------===// @@ -1331,22 +1273,23 @@ bb7: ; preds = %entry to: -_foo: +foo: # @foo +# BB#0: # %entry + movl 4(%esp), %ecx cmpb $0, 16(%esp) - movl 12(%esp), %ecx + je .LBB0_2 +# BB#1: # %bb movl 8(%esp), %eax - movl 4(%esp), %edx - je LBB1_2 # bb7 -LBB1_1: # bb - addl %edx, %eax + addl %ecx, %eax ret -LBB1_2: # bb7 - movl %edx, %eax - subl %ecx, %eax +.LBB0_2: # %bb7 + movl 12(%esp), %edx + movl %ecx, %eax + subl %edx, %eax ret -The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2 -if it commuted the addl in LBB1_1. +There's an obviously unnecessary movl in .LBB0_2, and we could eliminate a +couple more movls by putting 4(%esp) into %eax instead of %ecx. //===---------------------------------------------------------------------===// @@ -1396,8 +1339,7 @@ Also check why xmm7 is not used at all in the function. //===---------------------------------------------------------------------===// -Legalize loses track of the fact that bools are always zero extended when in -memory. This causes us to compile abort_gzip (from 164.gzip) from: +Take the following: target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" @@ -1416,16 +1358,15 @@ bb4.i: ; preds = %entry } declare void @exit(i32) noreturn nounwind -into: - -_abort_gzip: +This compiles into: +_abort_gzip: ## @abort_gzip +## BB#0: ## %entry subl $12, %esp movb _in_exit.4870.b, %al - notb %al - testb $1, %al - jne LBB1_2 ## bb4.i -LBB1_1: ## bb.i - ... + cmpb $1, %al + jne LBB0_2 + +We somehow miss folding the movb into the cmpb. //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 22e89a5..677781d 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -35,6 +35,10 @@ class formatted_raw_ostream; FunctionPass *createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel); +/// createGlobalBaseRegPass - This pass initializes a global base +/// register for PIC on x86-32. +FunctionPass* createGlobalBaseRegPass(); + /// createX86FloatingPointStackifierPass - This function returns a pass which /// converts floating point register references and pseudo instructions into /// floating point stack references and physical instructions. diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 151087f..2cf65c1 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -23,13 +23,13 @@ #include "llvm/Target/TargetAsmBackend.h" using namespace llvm; -namespace { static unsigned getFixupKindLog2Size(unsigned Kind) { switch (Kind) { default: assert(0 && "invalid fixup kind!"); case X86::reloc_pcrel_1byte: case FK_Data_1: return 0; + case X86::reloc_pcrel_2byte: case FK_Data_2: return 1; case X86::reloc_pcrel_4byte: case X86::reloc_riprel_4byte: @@ -39,6 +39,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) { } } +namespace { class X86AsmBackend : public TargetAsmBackend { public: X86AsmBackend(const Target &T) @@ -60,6 +61,7 @@ public: bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; }; +} // end anonymous namespace static unsigned getRelaxedOpcode(unsigned Op) { switch (Op) { @@ -75,7 +77,6 @@ static unsigned getRelaxedOpcode(unsigned Op) { case X86::JG_1: return X86::JG_4; case X86::JLE_1: return X86::JLE_4; case X86::JL_1: return X86::JL_4; - case X86::TAILJMP_1: case X86::JMP_1: return X86::JMP_4; case X86::JNE_1: return X86::JNE_4; case X86::JNO_1: return X86::JNO_4; @@ -180,6 +181,7 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { /* *** */ +namespace { class ELFX86AsmBackend : public X86AsmBackend { public: ELFX86AsmBackend(const Target &T) @@ -281,7 +283,7 @@ public: } }; -} +} // end anonymous namespace TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, const std::string &TT) { diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index a5774e1..a6a1e4e 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -42,7 +42,7 @@ def RetCC_X86Common : CallingConv<[ // MMX vector types are always returned in MM0. If the target doesn't have // MM0, it doesn't support these vector types. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToReg<[MM0]>>, + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>, // Long double types are always returned in ST0 (even with SSE). CCIfType<[f80], CCAssignToReg<[ST0, ST1]>> @@ -89,7 +89,7 @@ def RetCC_X86_64_C : CallingConv<[ // returned in RAX. This disagrees with ABI documentation but is bug // compatible with gcc. CCIfType<[v1i64], CCAssignToReg<[RAX]>>, - CCIfType<[v8i8, v4i16, v2i32, v2f32], CCAssignToReg<[XMM0, XMM1]>>, + CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>, CCDelegateTo<RetCC_X86Common> ]>; @@ -155,7 +155,7 @@ def CC_X86_64_C : CallingConv<[ // The first 8 MMX (except for v1i64) vector arguments are passed in XMM // registers on Darwin. - CCIfType<[v8i8, v4i16, v2i32, v2f32], + CCIfType<[v8i8, v4i16, v2i32], CCIfSubtarget<"isTargetDarwin()", CCIfSubtarget<"hasSSE2()", CCPromoteToType<v2i64>>>>, @@ -177,7 +177,7 @@ def CC_X86_64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToStack<8, 8>> + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> ]>; // Calling convention used on Win64 @@ -195,7 +195,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>, // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>, // The first 4 integer arguments are passed in integer registers. @@ -254,7 +254,7 @@ def CC_X86_32_Common : CallingConv<[ // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx // registers if the call is not a vararg call. - CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32, v2f32], + CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[MM0, MM1, MM2]>>>, // Integer/Float values get stored in stack slots that are 4 bytes in diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 8f02604..f13669b 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -138,7 +138,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { // MOVPC32r is basically a call plus a pop instruction. if (Desc.getOpcode() == X86::MOVPC32r) emitInstruction(*I, &II->get(X86::POP32r)); - NumEmitted++; // Keep track of the # of mi's emitted + ++NumEmitted; // Keep track of the # of mi's emitted } } } while (MCE.finishFunction(MF)); @@ -730,9 +730,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, case X86II::MRMDestMem: { MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp, - getX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands) + getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands) .getReg())); - CurOp += X86AddrNumOperands + 1; + CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), X86II::getSizeOfImm(Desc->TSFlags)); @@ -750,13 +750,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, break; case X86II::MRMSrcMem: { - // FIXME: Maybe lea should have its own form? - int AddrOperands; - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - AddrOperands = X86AddrNumOperands - 1; // No segment register - else - AddrOperands = X86AddrNumOperands; + int AddrOperands = X86::AddrNumOperands; intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? X86II::getSizeOfImm(Desc->TSFlags) : 0; @@ -810,14 +804,14 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: { - intptr_t PCAdj = (CurOp + X86AddrNumOperands != NumOps) ? - (MI.getOperand(CurOp+X86AddrNumOperands).isImm() ? + intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ? + (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0; MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m, PCAdj); - CurOp += X86AddrNumOperands; + CurOp += X86::AddrNumOperands; if (CurOp == NumOps) break; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 1bc5eb7..cdde24a 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -23,7 +23,9 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -52,20 +54,7 @@ class X86FastISel : public FastISel { bool X86ScalarSSEf32; public: - explicit X86FastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &vm, - DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am, - std::vector<std::pair<MachineInstr*, unsigned> > &pn -#ifndef NDEBUG - , SmallSet<const Instruction *, 8> &cil -#endif - ) - : FastISel(mf, vm, bm, am, pn -#ifndef NDEBUG - , cil -#endif - ) { + explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { Subtarget = &TM.getSubtarget<X86Subtarget>(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; X86ScalarSSEf64 = Subtarget->hasSSE2(); @@ -96,6 +85,8 @@ private: bool X86SelectStore(const Instruction *I); + bool X86SelectRet(const Instruction *I); + bool X86SelectCmp(const Instruction *I); bool X86SelectZExt(const Instruction *I); @@ -117,6 +108,7 @@ private: bool X86SelectCall(const Instruction *I); CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); + CCAssignFn *CCAssignFnForRet(CallingConv::ID CC, bool isTailCall = false); const X86InstrInfo *getInstrInfo() const { return getTargetMachine()->getInstrInfo(); @@ -190,6 +182,20 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, return CC_X86_32_C; } +/// CCAssignFnForRet - Selects the correct CCAssignFn for a given calling +/// convention. +CCAssignFn *X86FastISel::CCAssignFnForRet(CallingConv::ID CC, + bool isTaillCall) { + if (Subtarget->is64Bit()) { + if (Subtarget->isTargetWin64()) + return RetCC_X86_Win64_C; + else + return RetCC_X86_64_C; + } + + return RetCC_X86_32_C; +} + /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. @@ -242,7 +248,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, } ResultReg = createResultReg(RC); - addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc), ResultReg), AM); return true; } @@ -261,7 +268,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, case MVT::i1: { // Mask out all but lowest bit. unsigned AndResult = createResultReg(X86::GR8RegisterClass); - BuildMI(MBB, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); Val = AndResult; } @@ -278,7 +285,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, break; } - addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc)), AM).addReg(Val); return true; } @@ -306,7 +314,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, } if (Opc) { - addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc)), AM) .addImm(Signed ? (uint64_t) CI->getSExtValue() : CI->getZExtValue()); return true; @@ -342,6 +351,12 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { const User *U = NULL; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Don't walk into other basic blocks; it's possible we haven't + // visited them yet, so the instructions may not yet be assigned + // virtual registers. + if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) + return false; + Opcode = I->getOpcode(); U = I; } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { @@ -349,6 +364,12 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { U = C; } + if (const PointerType *Ty = dyn_cast<PointerType>(V->getType())) + if (Ty->getAddressSpace() > 255) + // Fast instruction selection doesn't support the special + // address spaces. + return false; + switch (Opcode) { default: break; case Instruction::BitCast: @@ -370,8 +391,9 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { case Instruction::Alloca: { // Do static allocas. const AllocaInst *A = cast<AllocaInst>(V); - DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A); - if (SI != StaticAllocaMap.end()) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(A); + if (SI != FuncInfo.StaticAllocaMap.end()) { AM.BaseType = X86AddressMode::FrameIndexBase; AM.Base.FrameIndex = SI->second; return true; @@ -411,20 +433,33 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { Disp += SL->getElementOffset(Idx); } else { uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); - if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { - // Constant-offset addressing. - Disp += CI->getSExtValue() * S; - } else if (IndexReg == 0 && - (!AM.GV || !Subtarget->isPICStyleRIPRel()) && - (S == 1 || S == 2 || S == 4 || S == 8)) { - // Scaled-index addressing. - Scale = S; - IndexReg = getRegForGEPIndex(Op).first; - if (IndexReg == 0) - return false; - } else - // Unsupported. - goto unsupported_gep; + SmallVector<const Value *, 4> Worklist; + Worklist.push_back(Op); + do { + Op = Worklist.pop_back_val(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + // Constant-offset addressing. + Disp += CI->getSExtValue() * S; + } else if (isa<AddOperator>(Op) && + isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { + // An add with a constant operand. Fold the constant. + ConstantInt *CI = + cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); + Disp += CI->getSExtValue() * S; + // Add the other operand back to the work list. + Worklist.push_back(cast<AddOperator>(Op)->getOperand(0)); + } else if (IndexReg == 0 && + (!AM.GV || !Subtarget->isPICStyleRIPRel()) && + (S == 1 || S == 2 || S == 4 || S == 8)) { + // Scaled-index addressing. + Scale = S; + IndexReg = getRegForGEPIndex(Op).first; + if (IndexReg == 0) + return false; + } else + // Unsupported. + goto unsupported_gep; + } while (!Worklist.empty()); } } // Check for displacement overflow. @@ -473,7 +508,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // If this reference is relative to the pic base, set it now. if (isGlobalRelativeToPICBase(GVFlags)) { // FIXME: How do we know Base.Reg is free?? - AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); + AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } // Unless the ABI requires an extra load, return a direct reference to @@ -504,6 +539,9 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { StubAM.GV = GV; StubAM.GVOpFlags = GVFlags; + // Prepare for inserting code in the local-value area. + MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); + if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; RC = X86::GR64RegisterClass; @@ -516,8 +554,13 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { } LoadReg = createResultReg(RC); - addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM); - + MachineInstrBuilder LoadMI = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); + addFullAddress(LoadMI, StubAM); + + // Ok, back to normal mode. + leaveLocalValueArea(SaveInsertPt); + // Prevent loading GV stub multiple times in same MBB. LocalValueMap[V] = LoadReg; } @@ -642,6 +685,93 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { return X86FastEmitStore(VT, I->getOperand(0), AM); } +/// X86SelectRet - Select and emit code to implement ret instructions. +bool X86FastISel::X86SelectRet(const Instruction *I) { + const ReturnInst *Ret = cast<ReturnInst>(I); + const Function &F = *I->getParent()->getParent(); + + if (!FuncInfo.CanLowerReturn) + return false; + + CallingConv::ID CC = F.getCallingConv(); + if (CC != CallingConv::C && + CC != CallingConv::Fast && + CC != CallingConv::X86_FastCall) + return false; + + if (Subtarget->isTargetWin64()) + return false; + + // Don't handle popping bytes on return for now. + if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>() + ->getBytesToPopOnReturn() != 0) + return 0; + + // fastcc with -tailcallopt is intended to provide a guaranteed + // tail call optimization. Fastisel doesn't know how to do that. + if (CC == CallingConv::Fast && GuaranteedTailCallOpt) + return false; + + // Let SDISel handle vararg functions. + if (F.isVarArg()) + return false; + + if (Ret->getNumOperands() > 0) { + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + Outs, TLI); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ValLocs; + CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); + CCInfo.AnalyzeReturn(Outs, CCAssignFnForRet(CC)); + + const Value *RV = Ret->getOperand(0); + unsigned Reg = getRegForValue(RV); + if (Reg == 0) + return false; + + // Only handle a single return value for now. + if (ValLocs.size() != 1) + return false; + + CCValAssign &VA = ValLocs[0]; + + // Don't bother handling odd stuff for now. + if (VA.getLocInfo() != CCValAssign::Full) + return false; + // Only handle register returns for now. + if (!VA.isRegLoc()) + return false; + // TODO: For now, don't try to handle cases where getLocInfo() + // says Full but the types don't match. + if (VA.getValVT() != TLI.getValueType(RV->getType())) + return false; + + // The calling-convention tables for x87 returns don't tell + // the whole story. + if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) + return false; + + // Make the copy. + unsigned SrcReg = Reg + VA.getValNo(); + unsigned DstReg = VA.getLocReg(); + const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); + // Avoid a cross-class copy. This is very unlikely. + if (!SrcRC->contains(DstReg)) + return false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + DstReg).addReg(SrcReg); + + // Mark the register as live out of the function. + MRI.addLiveOut(VA.getLocReg()); + } + + // Now emit the RET. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); + return true; +} + /// X86SelectLoad - Select and emit code to implement load instructions. /// bool X86FastISel::X86SelectLoad(const Instruction *I) { @@ -661,15 +791,15 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { return false; } -static unsigned X86ChooseCmpOpcode(EVT VT) { +static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { switch (VT.getSimpleVT().SimpleTy) { default: return 0; case MVT::i8: return X86::CMP8rr; case MVT::i16: return X86::CMP16rr; case MVT::i32: return X86::CMP32rr; case MVT::i64: return X86::CMP64rr; - case MVT::f32: return X86::UCOMISSrr; - case MVT::f64: return X86::UCOMISDrr; + case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0; + case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0; } } @@ -706,18 +836,21 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, // CMPri, otherwise use CMPrr. if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { - BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg) - .addImm(Op1C->getSExtValue()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc)) + .addReg(Op0Reg) + .addImm(Op1C->getSExtValue()); return true; } } - unsigned CompareOpc = X86ChooseCmpOpcode(VT); + unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); if (CompareOpc == 0) return false; unsigned Op1Reg = getRegForValue(Op1); if (Op1Reg == 0) return false; - BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc)) + .addReg(Op0Reg) + .addReg(Op1Reg); return true; } @@ -739,9 +872,10 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { unsigned EReg = createResultReg(&X86::GR8RegClass); unsigned NPReg = createResultReg(&X86::GR8RegClass); - BuildMI(MBB, DL, TII.get(X86::SETEr), EReg); - BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg); - BuildMI(MBB, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::SETNPr), NPReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); UpdateValueMap(I, ResultReg); return true; @@ -752,9 +886,13 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { unsigned NEReg = createResultReg(&X86::GR8RegClass); unsigned PReg = createResultReg(&X86::GR8RegClass); - BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg); - BuildMI(MBB, DL, TII.get(X86::SETPr), PReg); - BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::SETNEr), NEReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::SETPr), PReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::OR8rr), ResultReg) + .addReg(PReg).addReg(NEReg); UpdateValueMap(I, ResultReg); return true; } @@ -793,7 +931,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { if (!X86FastEmitCompare(Op0, Op1, VT)) return false; - BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg); UpdateValueMap(I, ResultReg); return true; } @@ -819,8 +957,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // Unconditional branches are selected by tablegen-generated code. // Handle a conditional branch. const BranchInst *BI = cast<BranchInst>(I); - MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)]; - MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)]; + MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; // Fold the common case of a conditional branch with a comparison. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { @@ -829,7 +967,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // Try to take advantage of fallthrough opportunities. CmpInst::Predicate Predicate = CI->getPredicate(); - if (MBB->isLayoutSuccessor(TrueMBB)) { + if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { std::swap(TrueMBB, FalseMBB); Predicate = CmpInst::getInversePredicate(Predicate); } @@ -878,16 +1016,18 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { if (!X86FastEmitCompare(Op0, Op1, VT)) return false; - BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc)) + .addMBB(TrueMBB); if (Predicate == CmpInst::FCMP_UNE) { // X86 requires a second branch to handle UNE (and OEQ, // which is mapped to UNE above). - BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4)) + .addMBB(TrueMBB); } - FastEmitBranch(FalseMBB); - MBB->addSuccessor(TrueMBB); + FastEmitBranch(FalseMBB, DL); + FuncInfo.MBB->addSuccessor(TrueMBB); return true; } } else if (ExtractValueInst *EI = @@ -910,10 +1050,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { const MachineInstr *SetMI = 0; - unsigned Reg = lookUpRegForValue(EI); + unsigned Reg = getRegForValue(EI); for (MachineBasicBlock::const_reverse_iterator - RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) { + RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend(); + RI != RE; ++RI) { const MachineInstr &MI = *RI; if (MI.definesRegister(Reg)) { @@ -938,11 +1079,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { unsigned OpCode = SetMI->getOpcode(); if (OpCode == X86::SETOr || OpCode == X86::SETBr) { - BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? - X86::JO_4 : X86::JB_4)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpCode == X86::SETOr ? X86::JO_4 : X86::JB_4)) .addMBB(TrueMBB); - FastEmitBranch(FalseMBB); - MBB->addSuccessor(TrueMBB); + FastEmitBranch(FalseMBB, DL); + FuncInfo.MBB->addSuccessor(TrueMBB); return true; } } @@ -954,10 +1095,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { unsigned OpReg = getRegForValue(BI->getCondition()); if (OpReg == 0) return false; - BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); - BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB); - FastEmitBranch(FalseMBB); - MBB->addSuccessor(TrueMBB); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) + .addReg(OpReg).addReg(OpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4)) + .addMBB(TrueMBB); + FastEmitBranch(FalseMBB, DL); + FuncInfo.MBB->addSuccessor(TrueMBB); return true; } @@ -1014,7 +1157,7 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { // Fold immediate in shl(x,3). if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned ResultReg = createResultReg(RC); - BuildMI(MBB, DL, TII.get(OpImm), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); UpdateValueMap(I, ResultReg); return true; @@ -1022,17 +1165,19 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; - TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + CReg).addReg(Op1Reg); // The shift instruction uses X86::CL. If we defined a super-register - // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what - // we're doing here. + // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. if (CReg != X86::CL) - BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL) - .addReg(CReg).addImm(X86::sub_8bit); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::KILL), X86::CL) + .addReg(CReg, RegState::Kill); unsigned ResultReg = createResultReg(RC); - BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg) + .addReg(Op0Reg); UpdateValueMap(I, ResultReg); return true; } @@ -1064,9 +1209,11 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { unsigned Op2Reg = getRegForValue(I->getOperand(2)); if (Op2Reg == 0) return false; - BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) + .addReg(Op0Reg).addReg(Op0Reg); unsigned ResultReg = createResultReg(RC); - BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addReg(Op1Reg).addReg(Op2Reg); UpdateValueMap(I, ResultReg); return true; } @@ -1080,7 +1227,9 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR64RegisterClass); - BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::CVTSS2SDrr), ResultReg) + .addReg(OpReg); UpdateValueMap(I, ResultReg); return true; } @@ -1097,7 +1246,9 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR32RegisterClass); - BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::CVTSD2SSrr), ResultReg) + .addReg(OpReg); UpdateValueMap(I, ResultReg); return true; } @@ -1132,7 +1283,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; unsigned CopyReg = createResultReg(CopyRC); - BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg) + .addReg(InputReg); // Then issue an extract_subreg. unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, @@ -1153,14 +1305,18 @@ bool X86FastISel::X86SelectExtractValue(const Instruction *I) { switch (CI->getIntrinsicID()) { default: break; case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: + case Intrinsic::uadd_with_overflow: { // Cheat a little. We know that the registers for "add" and "seto" are // allocated sequentially. However, we only keep track of the register // for "add" in the value map. Use extractvalue's index to get the // correct register for "seto". - UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin()); + unsigned OpReg = getRegForValue(Agg); + if (OpReg == 0) + return false; + UpdateValueMap(I, OpReg + *EI->idx_begin()); return true; } + } } return false; @@ -1174,8 +1330,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // Emit code inline code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(); - const Value *Op1 = I.getOperand(1); // The guard's value. - const AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); + const Value *Op1 = I.getArgOperand(0); // The guard's value. + const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); // Grab the frame index. X86AddressMode AM; @@ -1186,7 +1342,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return true; } case Intrinsic::objectsize: { - ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2)); + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); const Type *Ty = I.getCalledFunction()->getReturnType(); assert(CI && "Non-constant type in Intrinsic::objectsize?"); @@ -1204,8 +1360,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(MBB, DL, TII.get(OpC), ResultReg). - addImm(CI->getZExtValue() == 0 ? -1ULL : 0); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg). + addImm(CI->isZero() ? -1ULL : 0); UpdateValueMap(&I, ResultReg); return true; } @@ -1218,12 +1374,12 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); // FIXME may need to add RegState::Debug to any registers produced, // although ESP/EBP should be the only ones at the moment. - addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0). - addMetadata(DI->getVariable()); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM). + addImm(0).addMetadata(DI->getVariable()); return true; } case Intrinsic::trap: { - BuildMI(MBB, DL, TII.get(X86::TRAP)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP)); return true; } case Intrinsic::sadd_with_overflow: @@ -1241,8 +1397,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { if (!isTypeLegal(RetTy, VT)) return false; - const Value *Op1 = I.getOperand(1); - const Value *Op2 = I.getOperand(2); + const Value *Op1 = I.getArgOperand(0); + const Value *Op2 = I.getArgOperand(1); unsigned Reg1 = getRegForValue(Op1); unsigned Reg2 = getRegForValue(Op2); @@ -1259,7 +1415,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg) + .addReg(Reg1).addReg(Reg2); unsigned DestReg1 = UpdateValueMap(&I, ResultReg); // If the add with overflow is an intra-block value then we just want to @@ -1277,7 +1434,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { unsigned Opc = X86::SETBr; if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) Opc = X86::SETOr; - BuildMI(MBB, DL, TII.get(Opc), ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); return true; } } @@ -1285,7 +1442,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { bool X86FastISel::X86SelectCall(const Instruction *I) { const CallInst *CI = cast<CallInst>(I); - const Value *Callee = I->getOperand(0); + const Value *Callee = CI->getCalledValue(); // Can't handle inline asm yet. if (isa<InlineAsm>(Callee)) @@ -1314,6 +1471,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (FTy->isVarArg()) return false; + // Fast-isel doesn't know about callee-pop yet. + if (Subtarget->IsCalleePop(FTy->isVarArg(), CC)) + return false; + // Handle *simple* calls for now. const Type *RetTy = CS.getType(); EVT RetVT; @@ -1387,6 +1548,12 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); + + // Allocate shadow area for Win64 + if (Subtarget->isTargetWin64()) { + CCInfo.AllocateStack(32, 8); + } + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); // Get a count of how many bytes are to be pushed on the stack. @@ -1394,7 +1561,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Issue CALLSEQ_START unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); - BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown)) + .addImm(NumBytes); // Process argument: walk the register/memloc assignments, inserting // copies / loads. @@ -1449,11 +1617,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } if (VA.isRegLoc()) { - TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), - Arg, RC, RC, DL); - assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; - Emitted = true; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + VA.getLocReg()).addReg(Arg); RegArgs.push_back(VA.getLocReg()); } else { unsigned LocMemOffset = VA.getLocMemOffset(); @@ -1475,12 +1640,9 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. if (Subtarget->isPICStyleGOT()) { - TargetRegisterClass *RC = X86::GR32RegisterClass; - unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC, - DL); - assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; - Emitted = true; + unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + X86::EBX).addReg(Base); } // Issue the call. @@ -1488,7 +1650,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (CalleeOp) { // Register-indirect call. unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r; - MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) + .addReg(CalleeOp); } else { // Direct call. @@ -1517,7 +1680,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } - MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) + .addGlobalAddress(GV, 0, OpFlags); } // Add an implicit use GOT pointer in EBX. @@ -1530,9 +1694,11 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Issue CALLSEQ_END unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); - BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) + .addImm(NumBytes).addImm(0); // Now handle call return value (if any). + SmallVector<unsigned, 4> UsedRegs; if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); @@ -1542,7 +1708,6 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); EVT CopyVT = RVLocs[0].getValVT(); TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); - TargetRegisterClass *SrcRC = DstRC; // If this is a call to a function that returns an fp value on the x87 fp // stack, but where we prefer to use the value in xmm registers, copy it @@ -1551,15 +1716,14 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { RVLocs[0].getLocReg() == X86::ST1) && isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { CopyVT = MVT::f80; - SrcRC = X86::RSTRegisterClass; DstRC = X86::RFP80RegisterClass; } unsigned ResultReg = createResultReg(DstRC); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - RVLocs[0].getLocReg(), DstRC, SrcRC, DL); - assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; - Emitted = true; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(RVLocs[0].getLocReg()); + UsedRegs.push_back(RVLocs[0].getLocReg()); + if (CopyVT != RVLocs[0].getValVT()) { // Round the F80 the right size, which also moves to the appropriate xmm // register. This is accomplished by storing the F80 value in memory and @@ -1568,18 +1732,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; unsigned MemSize = ResVT.getSizeInBits()/8; int FI = MFI.CreateStackObject(MemSize, MemSize, false); - addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg); + addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc)), FI) + .addReg(ResultReg); DstRC = ResVT == MVT::f32 ? X86::FR32RegisterClass : X86::FR64RegisterClass; Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; ResultReg = createResultReg(DstRC); - addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI); + addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), FI); } if (AndToI1) { // Mask out all but lowest bit for some call which produces an i1. unsigned AndResult = createResultReg(X86::GR8RegisterClass); - BuildMI(MBB, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); ResultReg = AndResult; } @@ -1587,6 +1754,9 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { UpdateValueMap(I, ResultReg); } + // Set all unused physreg defs as dead. + static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + return true; } @@ -1599,6 +1769,8 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { return X86SelectLoad(I); case Instruction::Store: return X86SelectStore(I); + case Instruction::Ret: + return X86SelectRet(I); case Instruction::ICmp: case Instruction::FCmp: return X86SelectCmp(I); @@ -1699,7 +1871,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { else Opc = X86::LEA64r; unsigned ResultReg = createResultReg(RC); - addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), AM); return ResultReg; } return 0; @@ -1717,10 +1890,10 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { unsigned char OpFlag = 0; if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic OpFlag = X86II::MO_PIC_BASE_OFFSET; - PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } else if (Subtarget->isPICStyleGOT()) { OpFlag = X86II::MO_GOTOFF; - PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } else if (Subtarget->isPICStyleRIPRel() && TM.getCodeModel() == CodeModel::Small) { PICBase = X86::RIP; @@ -1729,7 +1902,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { // Create the load from the constant pool. unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); unsigned ResultReg = createResultReg(RC); - addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), + addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), MCPOffset, PICBase, OpFlag); return ResultReg; @@ -1743,7 +1917,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { // various places, but TargetMaterializeAlloca also needs a check // in order to avoid recursion between getRegForValue, // X86SelectAddrss, and TargetMaterializeAlloca. - if (!StaticAllocaMap.count(C)) + if (!FuncInfo.StaticAllocaMap.count(C)) return 0; X86AddressMode AM; @@ -1752,24 +1926,13 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); unsigned ResultReg = createResultReg(RC); - addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), AM); return ResultReg; } namespace llvm { - llvm::FastISel *X86::createFastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &vm, - DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am, - std::vector<std::pair<MachineInstr*, unsigned> > &pn -#ifndef NDEBUG - , SmallSet<const Instruction *, 8> &cil -#endif - ) { - return new X86FastISel(mf, vm, bm, am, pn -#ifndef NDEBUG - , cil -#endif - ); + llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { + return new X86FastISel(funcInfo); } } diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h index a8117d4..96e0aae 100644 --- a/lib/Target/X86/X86FixupKinds.h +++ b/lib/Target/X86/X86FixupKinds.h @@ -17,6 +17,7 @@ namespace X86 { enum Fixups { reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 + reloc_pcrel_2byte, // 16-bit pcrel, e.g. callw reloc_riprel_4byte, // 32-bit rip-relative reloc_riprel_4byte_movq_load // 32-bit rip-relative in movq }; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 93460ef..cee4ad7 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -133,7 +133,7 @@ namespace { // Emit an fxch to update the runtime processors version of the state. BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); - NumFXCH++; + ++NumFXCH; } void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { @@ -164,6 +164,8 @@ namespace { void handleCompareFP(MachineBasicBlock::iterator &I); void handleCondMovFP(MachineBasicBlock::iterator &I); void handleSpecialFP(MachineBasicBlock::iterator &I); + + bool translateCopy(MachineInstr*); }; char FPS::ID = 0; } @@ -232,12 +234,15 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { MachineInstr *MI = I; - unsigned Flags = MI->getDesc().TSFlags; + uint64_t Flags = MI->getDesc().TSFlags; unsigned FPInstClass = Flags & X86II::FPTypeMask; if (MI->isInlineAsm()) FPInstClass = X86II::SpecialFP; - + + if (MI->isCopy() && translateCopy(MI)) + FPInstClass = X86II::SpecialFP; + if (FPInstClass == X86II::NotFP) continue; // Efficiently ignore non-fp insts! @@ -628,7 +633,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; unsigned NumOps = MI->getDesc().getNumOperands(); - assert((NumOps == X86AddrNumOperands + 1 || NumOps == 1) && + assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && "Can only handle fst* & ftst instructions!"); // Is this the last use of the source register? @@ -1001,15 +1006,17 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { case X86::FpSET_ST0_32: case X86::FpSET_ST0_64: case X86::FpSET_ST0_80: { + // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm + // arguments that use an st constraint. We expect a sequence of + // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM unsigned Op0 = getFPReg(MI->getOperand(0)); - // FpSET_ST0_80 is generated by copyRegToReg for both function return - // and inline assembly with the "st" constrain. In the latter case, - // it is possible for ST(0) to be alive after this instruction. if (!MI->killsRegister(X86::FP0 + Op0)) { - // Duplicate Op0 - duplicateToTop(0, 7 /*temp register*/, I); + // Duplicate Op0 into a temporary on the stack top. + // This actually assumes that FP7 is dead. + duplicateToTop(Op0, 7, I); } else { + // Op0 is killed, so just swap it into position. moveToTop(Op0, I); } --StackTop; // "Forget" we have something on the top of stack! @@ -1017,17 +1024,29 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } case X86::FpSET_ST1_32: case X86::FpSET_ST1_64: - case X86::FpSET_ST1_80: - // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them. - if (StackTop == 1) { - BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1); - NumFXCH++; - StackTop = 0; - break; + case X86::FpSET_ST1_80: { + // Set up st(1) for inline asm. We are assuming that st(0) has already been + // set up by FpSET_ST0, and our StackTop is off by one because of it. + unsigned Op0 = getFPReg(MI->getOperand(0)); + // Restore the actual StackTop from before Fp_SET_ST0. + // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we + // are not enforcing the constraint. + ++StackTop; + unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). + if (!MI->killsRegister(X86::FP0 + Op0)) { + // Assume FP6 is not live, use it as a scratch register. + duplicateToTop(Op0, 6, I); + moveToTop(RegOnTop, I); + } else if (getSTReg(Op0) != X86::ST1) { + // We have the wrong value at st(1). Shuffle! Untested! + moveToTop(getStackEntry(1), I); + moveToTop(Op0, I); + moveToTop(RegOnTop, I); } - assert(StackTop == 2 && "Stack should have two element on it to return!"); - --StackTop; // "Forget" we have something on the top of stack! + assert(StackTop >= 2 && "Too few live registers"); + StackTop -= 2; // "Forget" both st(0) and st(1). break; + } case X86::MOV_Fp3232: case X86::MOV_Fp3264: case X86::MOV_Fp6432: @@ -1041,32 +1060,6 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { unsigned SrcReg = getFPReg(MO1); const MachineOperand &MO0 = MI->getOperand(0); - // These can be created due to inline asm. Two address pass can introduce - // copies from RFP registers to virtual registers. - if (MO0.getReg() == X86::ST0 && SrcReg == 0) { - assert(MO1.isKill()); - // Treat %ST0<def> = MOV_Fp8080 %FP0<kill> - // like FpSET_ST0_80 %FP0<kill>, %ST0<imp-def> - assert((StackTop == 1 || StackTop == 2) - && "Stack should have one or two element on it to return!"); - --StackTop; // "Forget" we have something on the top of stack! - break; - } else if (MO0.getReg() == X86::ST1 && SrcReg == 1) { - assert(MO1.isKill()); - // Treat %ST1<def> = MOV_Fp8080 %FP1<kill> - // like FpSET_ST1_80 %FP0<kill>, %ST1<imp-def> - // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them. - if (StackTop == 1) { - BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1); - NumFXCH++; - StackTop = 0; - break; - } - assert(StackTop == 2 && "Stack should have two element on it to return!"); - --StackTop; // "Forget" we have something on the top of stack! - break; - } - unsigned DestReg = getFPReg(MO0); if (MI->killsRegister(X86::FP0+SrcReg)) { // If the input operand is killed, we can just change the owner of the @@ -1206,3 +1199,33 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { I = MBB->erase(I); // Remove the pseudo instruction --I; } + +// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. +bool FPS::translateCopy(MachineInstr *MI) { + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + if (DstReg == X86::ST0) { + MI->setDesc(TII->get(X86::FpSET_ST0_80)); + MI->RemoveOperand(0); + return true; + } + if (DstReg == X86::ST1) { + MI->setDesc(TII->get(X86::FpSET_ST1_80)); + MI->RemoveOperand(0); + return true; + } + if (SrcReg == X86::ST0) { + MI->setDesc(TII->get(X86::FpGET_ST0_80)); + return true; + } + if (SrcReg == X86::ST1) { + MI->setDesc(TII->get(X86::FpGET_ST1_80)); + return true; + } + if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { + MI->setDesc(TII->get(X86::MOV_Fp8080)); + return true; + } + return false; +} diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 747683d..2c98b96 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -72,18 +72,15 @@ static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) { /// stack code, and thus needs an FP_REG_KILL. static bool ContainsFPStackCode(MachineBasicBlock *MBB, const MachineRegisterInfo &MRI) { - // Scan the block, looking for instructions that define fp stack vregs. + // Scan the block, looking for instructions that define or use fp stack vregs. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - if (I->getNumOperands() == 0 || !I->getOperand(0).isReg()) - continue; - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef()) + if (!I->getOperand(op).isReg()) continue; - - if (isFPStackVReg(I->getOperand(op).getReg(), MRI)) - return true; + if (unsigned Reg = I->getOperand(op).getReg()) + if (isFPStackVReg(Reg, MRI)) + return true; } } @@ -108,8 +105,8 @@ static bool ContainsFPStackCode(MachineBasicBlock *MBB, bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { // If we are emitting FP stack code, scan the basic block to determine if this - // block defines any FP values. If so, put an FP_REG_KILL instruction before - // the terminator of the block. + // block defines or uses any FP values. If so, put an FP_REG_KILL instruction + // before the terminator of the block. // Note that FP stack instructions are used in all modes for long double, // so we always need to do this check. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 0f64383..72f2bc1 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -137,21 +137,6 @@ namespace { } namespace { - class X86ISelListener : public SelectionDAG::DAGUpdateListener { - SmallSet<SDNode*, 4> Deletes; - public: - explicit X86ISelListener() {} - virtual void NodeDeleted(SDNode *N, SDNode *E) { - Deletes.insert(N); - } - virtual void NodeUpdated(SDNode *N) { - // Ignore updates. - } - bool IsDeleted(SDNode *N) { - return Deletes.count(N); - } - }; - //===--------------------------------------------------------------------===// /// ISel - X86 specific code to select X86 machine instructions for /// SelectionDAG operations. @@ -199,16 +184,17 @@ namespace { bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); bool MatchAddress(SDValue N, X86ISelAddressMode &AM); bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, - X86ISelListener &DeadNodes, unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Scale, SDValue &Index, SDValue &Disp); + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Scale, SDValue &Index, SDValue &Disp); + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); bool SelectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, @@ -239,7 +225,8 @@ namespace { // These are 32-bit even in 64-bit mode since RIP relative offset // is 32-bit. if (AM.GV) - Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp, + Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(), + MVT::i32, AM.Disp, AM.SymbolFlags); else if (AM.CP) Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, @@ -386,14 +373,14 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, } for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) Ops.push_back(OrigChain.getOperand(i)); - CurDAG->UpdateNodeOperands(OrigChain, &Ops[0], Ops.size()); - CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), + CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); Ops.clear(); Ops.push_back(SDValue(Load.getNode(), 1)); for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) Ops.push_back(Call.getOperand(i)); - CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size()); } /// isCalleeLoad - Return true if call address is a load and it can be @@ -515,7 +502,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { N->getOperand(0), MemTmp, NULL, 0, MemVT, false, false, 0); - SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, + SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp, NULL, 0, MemVT, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the @@ -664,8 +651,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { /// returning true if it cannot be done. This just pattern matches for the /// addressing mode. bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { - X86ISelListener DeadNodes; - if (MatchAddressRecursively(N, AM, DeadNodes, 0)) + if (MatchAddressRecursively(N, AM, 0)) return true; // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has @@ -713,7 +699,6 @@ static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) { } bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, - X86ISelListener &DeadNodes, unsigned Depth) { bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); @@ -876,13 +861,13 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // other uses, since it avoids a two-address sub instruction, however // it costs an additional mov if the index register has other uses. + // Add an artificial use to this node so that we can keep track of + // it if it gets CSE'd with a different node. + HandleSDNode Handle(N); + // Test if the LHS of the sub can be folded. X86ISelAddressMode Backup = AM; - if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, - DeadNodes, Depth+1) || - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - DeadNodes.IsDeleted(N.getNode())) { + if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { AM = Backup; break; } @@ -893,7 +878,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, } int Cost = 0; - SDValue RHS = N.getNode()->getOperand(1); + SDValue RHS = Handle.getValue().getNode()->getOperand(1); // If the RHS involves a register with multiple uses, this // transformation incurs an extra mov, due to the neg instruction // clobbering its operand. @@ -944,35 +929,27 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, } case ISD::ADD: { + // Add an artificial use to this node so that we can keep track of + // it if it gets CSE'd with a different node. + HandleSDNode Handle(N); + SDValue LHS = Handle.getValue().getNode()->getOperand(0); + SDValue RHS = Handle.getValue().getNode()->getOperand(1); + X86ISelAddressMode Backup = AM; - if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, - DeadNodes, Depth+1)) { - if (DeadNodes.IsDeleted(N.getNode())) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return true; - if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, - DeadNodes, Depth+1)) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return DeadNodes.IsDeleted(N.getNode()); - } + if (!MatchAddressRecursively(LHS, AM, Depth+1) && + !MatchAddressRecursively(RHS, AM, Depth+1)) + return false; + AM = Backup; + LHS = Handle.getValue().getNode()->getOperand(0); + RHS = Handle.getValue().getNode()->getOperand(1); // Try again after commuting the operands. + if (!MatchAddressRecursively(RHS, AM, Depth+1) && + !MatchAddressRecursively(LHS, AM, Depth+1)) + return false; AM = Backup; - if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, - DeadNodes, Depth+1)) { - if (DeadNodes.IsDeleted(N.getNode())) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return true; - if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, - DeadNodes, Depth+1)) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return DeadNodes.IsDeleted(N.getNode()); - } - AM = Backup; + LHS = Handle.getValue().getNode()->getOperand(0); + RHS = Handle.getValue().getNode()->getOperand(1); // If we couldn't fold both operands into the address at the same time, // see if we can just put each operand into a register and fold at least @@ -980,8 +957,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (AM.BaseType == X86ISelAddressMode::RegBase && !AM.Base_Reg.getNode() && !AM.IndexReg.getNode()) { - AM.Base_Reg = N.getNode()->getOperand(0); - AM.IndexReg = N.getNode()->getOperand(1); + AM.Base_Reg = LHS; + AM.IndexReg = RHS; AM.Scale = 1; return false; } @@ -996,7 +973,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, uint64_t Offset = CN->getSExtValue(); // Start with the LHS as an addr mode. - if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) && + if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && // Address could not have picked a GV address for the displacement. AM.GV == NULL && // On x86-64, the resultant disp must fit in 32-bits. @@ -1073,7 +1050,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, CurDAG->RepositionNode(N.getNode(), Shl.getNode()); Shl.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, Shl, &DeadNodes); + CurDAG->ReplaceAllUsesWith(N, Shl); AM.IndexReg = And; AM.Scale = (1 << ScaleLog); return false; @@ -1124,7 +1101,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, NewSHIFT, &DeadNodes); + CurDAG->ReplaceAllUsesWith(N, NewSHIFT); AM.Scale = 1 << ShiftCst; AM.IndexReg = NewAND; @@ -1230,7 +1207,8 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, /// mode it matches can be cost effectively emitted as an LEA instruction. bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, - SDValue &Index, SDValue &Disp) { + SDValue &Index, SDValue &Disp, + SDValue &Segment) { X86ISelAddressMode AM; // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support @@ -1284,7 +1262,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, if (Complexity <= 2) return false; - SDValue Segment; getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1292,10 +1269,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, - SDValue &Disp) { + SDValue &Disp, SDValue &Segment) { assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); - + X86ISelAddressMode AM; AM.GV = GA->getGlobal(); AM.Disp += GA->getOffset(); @@ -1309,7 +1286,6 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, AM.IndexReg = CurDAG->getRegister(0, MVT::i64); } - SDValue Segment; getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1672,6 +1648,26 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); } + // Prevent use of AH in a REX instruction by referencing AX instead. + if (HiReg == X86::AH && Subtarget->is64Bit() && + !SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + // Get the low part if needed. Don't use getCopyFromReg for aliasing + // registers. + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + + // Shift AX down 8 bits. + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), 0); + // Then truncate it down to i8. + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1682,24 +1678,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), 0); - // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, - MVT::i8, Result); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } @@ -1812,6 +1793,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); } + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + if (HiReg == X86::AH && Subtarget->is64Bit() && + !SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + + // If we also need AL (the quotient), get it by extracting a subreg from + // Result. The fast register allocator does not like multiple CopyFromReg + // nodes using aliasing registers. + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + + // Shift AX right by 8 bits instead of using AH. + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), + 0); + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + } // Copy the division (low) result, if it is needed. if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1822,25 +1826,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the remainder (high) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), - 0); - // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, - MVT::i8, Result); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b02c33d..1a63474 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -62,21 +62,19 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { - switch (TM.getSubtarget<X86Subtarget>().TargetType) { - default: llvm_unreachable("unknown subtarget type"); - case X86Subtarget::isDarwin: - if (TM.getSubtarget<X86Subtarget>().is64Bit()) - return new X8664_MachoTargetObjectFile(); + + bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); + + if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) { + if (is64Bit) return new X8664_MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); - case X86Subtarget::isELF: - if (TM.getSubtarget<X86Subtarget>().is64Bit()) - return new X8664_ELFTargetObjectFile(TM); + } else if (TM.getSubtarget<X86Subtarget>().isTargetELF() ){ + if (is64Bit) return new X8664_ELFTargetObjectFile(TM); return new X8632_ELFTargetObjectFile(TM); - case X86Subtarget::isMingw: - case X86Subtarget::isCygwin: - case X86Subtarget::isWindows: + } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) { return new TargetLoweringObjectFileCOFF(); - } + } + llvm_unreachable("unknown subtarget type"); } X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) @@ -347,6 +345,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (!Subtarget->hasSSE2()) setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + // On X86 and X86-64, atomic operations are lowered to locked instructions. + // Locked instructions, in turn, have implicit fence semantics (all memory + // operations are flushed before issuing the locked instruction, and they + // are not buffered), so we can fold away the common pattern of + // fence-atomic-fence. + setShouldFoldAtomicFences(true); // Expand certain atomics setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom); @@ -611,7 +615,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false); addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false); addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v2f32, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false); setOperationAction(ISD::ADD, MVT::v8i8, Legal); @@ -657,14 +661,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); setOperationAction(ISD::LOAD, MVT::v2i32, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v2f32, Promote); - AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64); setOperationAction(ISD::LOAD, MVT::v1i64, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); @@ -672,7 +673,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); @@ -691,7 +691,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom); } } @@ -792,9 +791,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) EVT VT = SVT; // Do not attempt to promote non-128-bit vectors - if (!VT.is128BitVector()) { + if (!VT.is128BitVector()) continue; - } setOperationAction(ISD::AND, SVT, Promote); AddPromotedToType (ISD::AND, SVT, MVT::v2i64); @@ -825,6 +823,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } if (Subtarget->hasSSE41()) { + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); @@ -965,15 +974,24 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Add/Sub/Mul with overflow operations are custom lowered. setOperationAction(ISD::SADDO, MVT::i32, Custom); - setOperationAction(ISD::SADDO, MVT::i64, Custom); setOperationAction(ISD::UADDO, MVT::i32, Custom); - setOperationAction(ISD::UADDO, MVT::i64, Custom); setOperationAction(ISD::SSUBO, MVT::i32, Custom); - setOperationAction(ISD::SSUBO, MVT::i64, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); - setOperationAction(ISD::USUBO, MVT::i64, Custom); setOperationAction(ISD::SMULO, MVT::i32, Custom); - setOperationAction(ISD::SMULO, MVT::i64, Custom); + + // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't + // handle type legalization for these operations here. + // + // FIXME: We really should do custom legalization for addition and + // subtraction on x86-32 once PR3203 is fixed. We really can't do much better + // than generic legalization for 64-bit multiplication-with-overflow, though. + if (Subtarget->is64Bit()) { + setOperationAction(ISD::SADDO, MVT::i64, Custom); + setOperationAction(ISD::UADDO, MVT::i64, Custom); + setOperationAction(ISD::SSUBO, MVT::i64, Custom); + setOperationAction(ISD::USUBO, MVT::i64, Custom); + setOperationAction(ISD::SMULO, MVT::i64, Custom); + } if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. @@ -992,7 +1010,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::MEMBARRIER); setTargetDAGCombine(ISD::ZERO_EXTEND); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); @@ -1172,6 +1189,27 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; } +bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, + unsigned &Offset) const { + if (!Subtarget->isTargetLinux()) + return false; + + if (Subtarget->is64Bit()) { + // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs: + Offset = 0x28; + if (getTargetMachine().getCodeModel() == CodeModel::Kernel) + AddressSpace = 256; + else + AddressSpace = 257; + } else { + // %gs:0x14 on i386 + Offset = 0x14; + AddressSpace = 256; + } + return true; +} + + //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1180,19 +1218,19 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { bool X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<EVT> &OutTys, - const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) const { + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), - RVLocs, *DAG.getContext()); - return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86); + RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_X86); } SDValue X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); @@ -1220,7 +1258,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue ValToCopy = Outs[i].Val; + SDValue ValToCopy = OutVals[i]; // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. @@ -1308,17 +1346,34 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, report_fatal_error("SSE register return with SSE disabled"); } + SDValue Val; + // If this is a call to a function that returns an fp value on the floating - // point stack, but where we prefer to use the value in xmm registers, copy - // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. - if ((VA.getLocReg() == X86::ST0 || - VA.getLocReg() == X86::ST1) && - isScalarFPTypeInSSEReg(VA.getValVT())) { - CopyVT = MVT::f80; - } + // point stack, we must guarantee the the value is popped from the stack, so + // a CopyFromReg is not good enough - the copy instruction may be eliminated + // if the return value is not used. We use the FpGET_ST0 instructions + // instead. + if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) { + // If we prefer to use the value in xmm registers, copy it out as f80 and + // use a truncate to move it from fp stack reg to xmm reg. + if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80; + bool isST0 = VA.getLocReg() == X86::ST0; + unsigned Opc = 0; + if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32; + if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64; + if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80; + SDValue Ops[] = { Chain, InFlag }; + Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag, + Ops, 2), 1); + Val = Chain.getValue(0); - SDValue Val; - if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) { + // Round the f80 to the right size, which also moves it to the appropriate + // xmm register. + if (CopyVT != VA.getValVT()) + Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, + // This truncation won't change the value. + DAG.getIntPtrConstant(1)); + } else if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) { // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64. if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), @@ -1338,15 +1393,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Val = Chain.getValue(0); } InFlag = Chain.getValue(2); - - if (CopyVT != VA.getValVT()) { - // Round the F80 the right size, which also moves to the appropriate xmm - // register. - Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, - // This truncation won't change the value. - DAG.getIntPtrConstant(1)); - } - InVals.push_back(Val); } @@ -1383,29 +1429,6 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { return Ins[0].Flags.isSRet(); } -/// IsCalleePop - Determines whether the callee is required to pop its -/// own arguments. Callee pop is necessary to support tail calls. -bool X86TargetLowering::IsCalleePop(bool IsVarArg, - CallingConv::ID CallingConv) const { - if (IsVarArg) - return false; - - switch (CallingConv) { - default: - return false; - case CallingConv::X86_StdCall: - return !Subtarget->is64Bit(); - case CallingConv::X86_FastCall: - return !Subtarget->is64Bit(); - case CallingConv::X86_ThisCall: - return !Subtarget->is64Bit(); - case CallingConv::Fast: - return GuaranteedTailCallOpt; - case CallingConv::GHC: - return GuaranteedTailCallOpt; - } -} - /// CCAssignFnForNode - Selects the correct CCAssignFn for a the /// given CallingConvention value. CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { @@ -1483,11 +1506,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // could be overwritten by lowering of arguments in case of a tail call. if (Flags.isByVal()) { int FI = MFI->CreateFixedObject(Flags.getByValSize(), - VA.getLocMemOffset(), isImmutable, false); + VA.getLocMemOffset(), isImmutable); return DAG.getFrameIndex(FI, getPointerTy()); } else { int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), isImmutable, false); + VA.getLocMemOffset(), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); return DAG.getLoad(ValVT, dl, Chain, FIN, PseudoSourceValue::getFixedStack(FI), 0, @@ -1615,8 +1638,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (isVarArg) { if (Is64Bit || (CallConv != CallingConv::X86_FastCall && CallConv != CallingConv::X86_ThisCall)) { - FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, - true, false)); + FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true)); } if (Is64Bit) { unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; @@ -1722,7 +1744,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } // Some CCs need callee pop. - if (IsCalleePop(isVarArg, CallConv)) { + if (Subtarget->IsCalleePop(isVarArg, CallConv)) { FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. @@ -1788,7 +1810,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, @@ -1802,6 +1824,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -1814,7 +1837,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), - Outs, Ins, DAG); + Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. @@ -1874,7 +1897,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; bool isByVal = Flags.isByVal(); @@ -2013,12 +2036,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.isRegLoc()) continue; assert(VA.isMemLoc()); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; - FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false); + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) { @@ -2059,7 +2082,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, FPDiff, dl); } - bool WasGlobalOrExternal = false; if (getTargetMachine().getCodeModel() == CodeModel::Large) { assert(Is64Bit && "Large code model is only legal in 64-bit mode."); // In the 64-bit large code model, we have to make all calls @@ -2067,7 +2089,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // pc-relative offset may not be large enough to hold the whole // address. } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - WasGlobalOrExternal = true; // If the callee is a GlobalAddress node (quite common, every direct call // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack // it. @@ -2095,11 +2116,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, OpFlags = X86II::MO_DARWIN_STUB; } - Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(), + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), G->getOffset(), OpFlags); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - WasGlobalOrExternal = true; unsigned char OpFlags = 0; // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external @@ -2153,17 +2173,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(InFlag); if (isTailCall) { - // If this is the first return lowered for this function, add the regs - // to the liveout set for the function. - if (MF.getRegInfo().liveout_empty()) { - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, - *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_X86); - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } + // We used to do: + //// If this is the first return lowered for this function, add the regs + //// to the liveout set for the function. + // This isn't right, although it's probably harmless on x86; liveouts + // should be computed from returns not tail calls. Consider a void + // function making a tail call to a function returning int. return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); } @@ -2173,7 +2188,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush; - if (IsCalleePop(isVarArg, CallConv)) + if (Subtarget->IsCalleePop(isVarArg, CallConv)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet) // If this is a call to a struct-return function, the callee @@ -2314,6 +2329,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { if (!IsTailCallConvention(CalleeCC) && @@ -2332,8 +2348,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return false; } - // Look for obvious safe cases to perform tail call optimization that does not - // requite ABI changes. This is what gcc calls sibcall. + // Look for obvious safe cases to perform tail call optimization that do not + // require ABI changes. This is what gcc calls sibcall. // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to // emit a special epilogue. @@ -2427,8 +2443,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (VA.getLocInfo() == CCValAssign::Indirect) return false; @@ -2439,26 +2454,32 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } } + + // If the tailcall address may be in a register, then make sure it's + // possible to register allocate for it. In 32-bit, the call address can + // only target EAX, EDX, or ECX since the tail call must be scheduled after + // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI, + // RDI, R8, R9, R11. + if (!isa<GlobalAddressSDNode>(Callee) && + !isa<ExternalSymbolSDNode>(Callee)) { + unsigned Limit = Subtarget->is64Bit() ? 8 : 3; + unsigned NumInRegs = 0; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) { + if (++NumInRegs == Limit) + return false; + } + } + } } return true; } FastISel * -X86TargetLowering::createFastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &vm, - DenseMap<const BasicBlock*, MachineBasicBlock*> &bm, - DenseMap<const AllocaInst *, int> &am, - std::vector<std::pair<MachineInstr*, unsigned> > &pn -#ifndef NDEBUG - , SmallSet<const Instruction *, 8> &cil -#endif - ) const { - return X86::createFastISel(mf, vm, bm, am, pn -#ifndef NDEBUG - , cil -#endif - ); +X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { + return X86::createFastISel(funcInfo); } @@ -2476,7 +2497,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, - false, false); + false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -3175,7 +3196,7 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { return ((isa<ConstantSDNode>(Elt) && - cast<ConstantSDNode>(Elt)->getZExtValue() == 0) || + cast<ConstantSDNode>(Elt)->isNullValue()) || (isa<ConstantFPSDNode>(Elt) && cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); } @@ -4433,7 +4454,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide -/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be +/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be /// done when every pair / quad of shuffle mask elements point to elements in /// the right sequence. e.g. /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> @@ -4447,7 +4468,6 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); - EVT MaskEltVT = MaskVT.getVectorElementType(); EVT NewVT = MaskVT; switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unexpected!"); @@ -5059,13 +5079,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - if (Op.getValueType() == MVT::v2f32) - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, - Op.getOperand(0)))); - - if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64) + + if (Op.getValueType() == MVT::v1i64 && + Op.getOperand(0).getValueType() == MVT::i64) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); @@ -5230,10 +5246,10 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, if (OpFlags == X86II::MO_NO_FLAG && X86::isOffsetSuitableForCodeModel(Offset, M)) { // A direct static reference to a global. - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); Offset = 0; } else { - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); } if (Subtarget->isPICStyleRIPRel() && @@ -5278,7 +5294,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); DebugLoc dl = GA->getDebugLoc(); - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); @@ -5351,7 +5367,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial // exec) - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, + GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); @@ -5366,33 +5383,78 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { - // TODO: implement the "local dynamic" model - // TODO: implement the "initial exec"model for pic executables - assert(Subtarget->isTargetELF() && - "TLS not implemented for non-ELF targets"); + GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GA->getGlobal(); - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GV = GA->resolveAliasedGlobal(false); - - TLSModel::Model model = getTLSModel(GV, - getTargetMachine().getRelocationModel()); - - switch (model) { - case TLSModel::GeneralDynamic: - case TLSModel::LocalDynamic: // not implemented - if (Subtarget->is64Bit()) - return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); - return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); + if (Subtarget->isTargetELF()) { + // TODO: implement the "local dynamic" model + // TODO: implement the "initial exec"model for pic executables + + // If GV is an alias then use the aliasee for determining + // thread-localness. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GV = GA->resolveAliasedGlobal(false); + + TLSModel::Model model + = getTLSModel(GV, getTargetMachine().getRelocationModel()); + + switch (model) { + case TLSModel::GeneralDynamic: + case TLSModel::LocalDynamic: // not implemented + if (Subtarget->is64Bit()) + return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); + return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); + + case TLSModel::InitialExec: + case TLSModel::LocalExec: + return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, + Subtarget->is64Bit()); + } + } else if (Subtarget->isTargetDarwin()) { + // Darwin only has one model of TLS. Lower to that. + unsigned char OpFlag = 0; + unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ? + X86ISD::WrapperRIP : X86ISD::Wrapper; + + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the + // global base reg. + bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) && + !Subtarget->is64Bit(); + if (PIC32) + OpFlag = X86II::MO_TLVP_PIC_BASE; + else + OpFlag = X86II::MO_TLVP; + DebugLoc DL = Op.getDebugLoc(); + SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, + getPointerTy(), + GA->getOffset(), OpFlag); + SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + + // With PIC32, the address is actually $g + Offset. + if (PIC32) + Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc(), getPointerTy()), + Offset); + + // Lowering the machine isd will make sure everything is in the right + // location. + SDValue Args[] = { Offset }; + SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1); + + // TLSCALL will be codegen'ed as call. Inform MFI that function has calls. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setAdjustsStack(true); - case TLSModel::InitialExec: - case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, - Subtarget->is64Bit()); + // And our return value (tls address) is in the standard call return value + // location. + unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; + return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); } + + assert(false && + "TLS not implemented for this target."); llvm_unreachable("Unreachable"); return SDValue(); @@ -5715,7 +5777,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Load the value out, extending it from f32 to f80. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(), FudgePtr, PseudoSourceValue::getConstantPool(), 0, MVT::f32, false, false, 4); // Extend everything to 80 bits to force it to be done on x87. @@ -5964,6 +6026,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, bool NeedCF = false; bool NeedOF = false; switch (X86CC) { + default: break; case X86::COND_A: case X86::COND_AE: case X86::COND_B: case X86::COND_BE: NeedCF = true; @@ -5973,120 +6036,129 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, case X86::COND_O: case X86::COND_NO: NeedOF = true; break; - default: break; } // See if we can use the EFLAGS value from the operand instead of // doing a separate TEST. TEST always sets OF and CF to 0, so unless // we prove that the arithmetic won't overflow, we can't use OF or CF. - if (Op.getResNo() == 0 && !NeedOF && !NeedCF) { - unsigned Opcode = 0; - unsigned NumOperands = 0; - switch (Op.getNode()->getOpcode()) { - case ISD::ADD: - // Due to an isel shortcoming, be conservative if this add is - // likely to be selected as part of a load-modify-store - // instruction. When the root node in a match is a store, isel - // doesn't know how to remap non-chain non-flag uses of other - // nodes in the match, such as the ADD in this case. This leads - // to the ADD being left around and reselected, with the result - // being two adds in the output. Alas, even if none our users - // are stores, that doesn't prove we're O.K. Ergo, if we have - // any parents that aren't CopyToReg or SETCC, eschew INC/DEC. - // A better fix seems to require climbing the DAG back to the - // root, and it doesn't seem to be worth the effort. - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) - goto default_case; - if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) { - // An add of one will be selected as an INC. - if (C->getAPIntValue() == 1) { - Opcode = X86ISD::INC; - NumOperands = 1; - break; - } - // An add of negative one (subtract of one) will be selected as a DEC. - if (C->getAPIntValue().isAllOnesValue()) { - Opcode = X86ISD::DEC; - NumOperands = 1; - break; - } + if (Op.getResNo() != 0 || NeedOF || NeedCF) + // Emit a CMP with 0, which is the TEST pattern. + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, + DAG.getConstant(0, Op.getValueType())); + + unsigned Opcode = 0; + unsigned NumOperands = 0; + switch (Op.getNode()->getOpcode()) { + case ISD::ADD: + // Due to an isel shortcoming, be conservative if this add is likely to be + // selected as part of a load-modify-store instruction. When the root node + // in a match is a store, isel doesn't know how to remap non-chain non-flag + // uses of other nodes in the match, such as the ADD in this case. This + // leads to the ADD being left around and reselected, with the result being + // two adds in the output. Alas, even if none our users are stores, that + // doesn't prove we're O.K. Ergo, if we have any parents that aren't + // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require + // climbing the DAG back to the root, and it doesn't seem to be worth the + // effort. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) + goto default_case; + + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) { + // An add of one will be selected as an INC. + if (C->getAPIntValue() == 1) { + Opcode = X86ISD::INC; + NumOperands = 1; + break; } - // Otherwise use a regular EFLAGS-setting add. - Opcode = X86ISD::ADD; - NumOperands = 2; - break; - case ISD::AND: { - // If the primary and result isn't used, don't bother using X86ISD::AND, - // because a TEST instruction will be better. - bool NonFlagUse = false; - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - unsigned UOpNo = UI.getOperandNo(); - if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { - // Look pass truncate. - UOpNo = User->use_begin().getOperandNo(); - User = *User->use_begin(); - } - if (User->getOpcode() != ISD::BRCOND && - User->getOpcode() != ISD::SETCC && - (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { - NonFlagUse = true; - break; - } + + // An add of negative one (subtract of one) will be selected as a DEC. + if (C->getAPIntValue().isAllOnesValue()) { + Opcode = X86ISD::DEC; + NumOperands = 1; + break; } - if (!NonFlagUse) + } + + // Otherwise use a regular EFLAGS-setting add. + Opcode = X86ISD::ADD; + NumOperands = 2; + break; + case ISD::AND: { + // If the primary and result isn't used, don't bother using X86ISD::AND, + // because a TEST instruction will be better. + bool NonFlagUse = false; + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + unsigned UOpNo = UI.getOperandNo(); + if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { + // Look pass truncate. + UOpNo = User->use_begin().getOperandNo(); + User = *User->use_begin(); + } + + if (User->getOpcode() != ISD::BRCOND && + User->getOpcode() != ISD::SETCC && + (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { + NonFlagUse = true; break; + } } + + if (!NonFlagUse) + break; + } // FALL THROUGH - case ISD::SUB: - case ISD::OR: - case ISD::XOR: - // Due to the ISEL shortcoming noted above, be conservative if this op is - // likely to be selected as part of a load-modify-store instruction. - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + case ISD::SUB: + case ISD::OR: + case ISD::XOR: + // Due to the ISEL shortcoming noted above, be conservative if this op is + // likely to be selected as part of a load-modify-store instruction. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() == ISD::STORE) - goto default_case; - // Otherwise use a regular EFLAGS-setting instruction. - switch (Op.getNode()->getOpcode()) { - case ISD::SUB: Opcode = X86ISD::SUB; break; - case ISD::OR: Opcode = X86ISD::OR; break; - case ISD::XOR: Opcode = X86ISD::XOR; break; - case ISD::AND: Opcode = X86ISD::AND; break; - default: llvm_unreachable("unexpected operator!"); - } - NumOperands = 2; - break; - case X86ISD::ADD: - case X86ISD::SUB: - case X86ISD::INC: - case X86ISD::DEC: - case X86ISD::OR: - case X86ISD::XOR: - case X86ISD::AND: - return SDValue(Op.getNode(), 1); - default: - default_case: - break; - } - if (Opcode != 0) { - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0; i != NumOperands; ++i) - Ops.push_back(Op.getOperand(i)); - SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); - DAG.ReplaceAllUsesWith(Op, New); - return SDValue(New.getNode(), 1); + if (UI->getOpcode() == ISD::STORE) + goto default_case; + + // Otherwise use a regular EFLAGS-setting instruction. + switch (Op.getNode()->getOpcode()) { + default: llvm_unreachable("unexpected operator!"); + case ISD::SUB: Opcode = X86ISD::SUB; break; + case ISD::OR: Opcode = X86ISD::OR; break; + case ISD::XOR: Opcode = X86ISD::XOR; break; + case ISD::AND: Opcode = X86ISD::AND; break; } + + NumOperands = 2; + break; + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::INC: + case X86ISD::DEC: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: + return SDValue(Op.getNode(), 1); + default: + default_case: + break; } - // Otherwise just emit a CMP with 0, which is the TEST pattern. - return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, - DAG.getConstant(0, Op.getValueType())); + if (Opcode == 0) + // Emit a CMP with 0, which is the TEST pattern. + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, + DAG.getConstant(0, Op.getValueType())); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0; i != NumOperands; ++i) + Ops.push_back(Op.getOperand(i)); + + SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); + DAG.ReplaceAllUsesWith(Op, New); + return SDValue(New.getNode(), 1); } /// Emit nodes that will be selected as "cmp Op0,Op1", or something @@ -6113,15 +6185,21 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, Op1 = Op1.getOperand(0); SDValue LHS, RHS; - if (Op1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0))) - if (And10C->getZExtValue() == 1) { - LHS = Op0; - RHS = Op1.getOperand(1); - } - } else if (Op0.getOpcode() == ISD::SHL) { + if (Op1.getOpcode() == ISD::SHL) + std::swap(Op0, Op1); + if (Op0.getOpcode() == ISD::SHL) { if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0))) if (And00C->getZExtValue() == 1) { + // If we looked past a truncate, check that it's only truncating away + // known zeros. + unsigned BitWidth = Op0.getValueSizeInBits(); + unsigned AndBitWidth = And.getValueSizeInBits(); + if (BitWidth > AndBitWidth) { + APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones; + DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones); + if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth) + return SDValue(); + } LHS = Op1; RHS = Op0.getOperand(1); } @@ -6172,7 +6250,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Op1)->getZExtValue() == 0 && + cast<ConstantSDNode>(Op1)->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); if (NewSetCC.getNode()) @@ -6552,15 +6630,16 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, MVT::i8); - SDValue User = SDValue(*Op.getNode()->use_begin(), 0); + SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order // to implement FCMP_OEQ. - if (User.getOpcode() == ISD::BR) { - SDValue FalseBB = User.getOperand(1); - SDValue NewBR = - DAG.UpdateNodeOperands(User, User.getOperand(0), Dest); + if (User->getOpcode() == ISD::BR) { + SDValue FalseBB = User->getOperand(1); + SDNode *NewBR = + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); assert(NewBR == User); + (void)NewBR; Dest = FalseBB; Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), @@ -6632,7 +6711,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Flag; - EVT IntPtr = getPointerTy(); EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); @@ -6685,7 +6763,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { Store = DAG.getStore(Op.getOperand(0), dl, DAG.getConstant(FuncInfo->getVarArgsFPOffset(), MVT::i32), - FIN, SV, 0, false, false, 0); + FIN, SV, 4, false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area @@ -6693,7 +6771,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, + Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8, false, false, 0); MemOps.push_back(Store); @@ -6702,7 +6780,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { FIN, DAG.getIntPtrConstant(8)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, + Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16, false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -6712,9 +6790,6 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // X86-64 va_list is a struct { i32, i32, i8*, i8* }. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); - SDValue Chain = Op.getOperand(0); - SDValue SrcPtr = Op.getOperand(1); - SDValue SrcSV = Op.getOperand(2); report_fatal_error("VAArgInst is not yet implemented for x86-64!"); return SDValue(); @@ -7733,6 +7808,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; + case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; @@ -7944,8 +8020,11 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors to thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(bInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -7955,17 +8034,17 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, newMBB->addSuccessor(newMBB); // Insert instructions into newMBB based on incoming instruction - assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 && + assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 && "unexpected number of operands"); DebugLoc dl = bInstr->getDebugLoc(); MachineOperand& destOper = bInstr->getOperand(0); - MachineOperand* argOpers[2 + X86AddrNumOperands]; + MachineOperand* argOpers[2 + X86::AddrNumOperands]; int numArgs = bInstr->getNumOperands() - 1; for (int i=0; i < numArgs; ++i) argOpers[i] = &bInstr->getOperand(i+1); // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] int valArgIndx = lastAddrIndx + 1; unsigned t1 = F->getRegInfo().createVirtualRegister(RC); @@ -8008,7 +8087,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + bInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8053,8 +8132,11 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors to thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(bInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -8066,12 +8148,12 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, DebugLoc dl = bInstr->getDebugLoc(); // Insert instructions into newMBB based on incoming instruction // There are 8 "real" operands plus 9 implicit def/uses, ignored here. - assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 && + assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 && "unexpected number of operands"); MachineOperand& dest1Oper = bInstr->getOperand(0); MachineOperand& dest2Oper = bInstr->getOperand(1); - MachineOperand* argOpers[2 + X86AddrNumOperands]; - for (int i=0; i < 2 + X86AddrNumOperands; ++i) { + MachineOperand* argOpers[2 + X86::AddrNumOperands]; + for (int i=0; i < 2 + X86::AddrNumOperands; ++i) { argOpers[i] = &bInstr->getOperand(i+2); // We use some of the operands multiple times, so conservatively just @@ -8081,7 +8163,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, } // x86 address has 5 operands: base, index, scale, displacement, and segment. - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] unsigned t1 = F->getRegInfo().createVirtualRegister(RC); MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1); @@ -8171,7 +8253,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + bInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8205,8 +8287,11 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors of thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(mInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -8217,16 +8302,16 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, DebugLoc dl = mInstr->getDebugLoc(); // Insert instructions into newMBB based on incoming instruction - assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 && + assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 && "unexpected number of operands"); MachineOperand& destOper = mInstr->getOperand(0); - MachineOperand* argOpers[2 + X86AddrNumOperands]; + MachineOperand* argOpers[2 + X86::AddrNumOperands]; int numArgs = mInstr->getNumOperands() - 1; for (int i=0; i < numArgs; ++i) argOpers[i] = &mInstr->getOperand(i+1); // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] int valArgIndx = lastAddrIndx + 1; unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); @@ -8274,7 +8359,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. + mInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8284,7 +8369,6 @@ MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { - MachineFunction *F = BB->getParent(); DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -8306,7 +8390,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) .addReg(X86::XMM0); - F->DeleteMachineInstr(MI); + MI->eraseFromParent(); return BB; } @@ -8335,9 +8419,12 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( F->insert(MBBIter, XMMSaveMBB); F->insert(MBBIter, EndMBB); - // Set up the CFG. - // Move any original successors of MBB to the end block. - EndMBB->transferSuccessors(MBB); + // Transfer the remainder of MBB and its successor edges to EndMBB. + EndMBB->splice(EndMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + EndMBB->transferSuccessorsAndUpdatePHIs(MBB); + // The original block will now fall through to the XMM save block. MBB->addSuccessor(XMMSaveMBB); // The XMMSaveMBB will fall through to the end block. @@ -8376,7 +8463,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( .addMemOperand(MMO); } - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return EndMBB; } @@ -8405,24 +8492,39 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - unsigned Opc = - X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); - BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // If the EFLAGS register isn't dead in the terminator, then claim that it's + // live into the sink and copy blocks. + const MachineFunction *MF = BB->getParent(); + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + BitVector ReservedRegs = TRI->getReservedRegs(*MF); + + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue; + unsigned Reg = MO.getReg(); + if (Reg != X86::EFLAGS) continue; + copy0MBB->addLiveIn(Reg); + sinkMBB->addLiveIn(Reg); + } + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + // Create the conditional branch instruction. + unsigned Opc = + X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); + BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -8431,11 +8533,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... - BuildMI(sinkMBB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(X86::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return sinkMBB; } @@ -8444,21 +8547,70 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); - MachineFunction *F = BB->getParent(); // The lowering is pretty easy: we're just emitting the call to _alloca. The // non-trivial part is impdef of ESP. // FIXME: The code should be tweaked as soon as we'll try to do codegen for // mingw-w64. - BuildMI(BB, DL, TII->get(X86::CALLpcrel32)) + BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) .addExternalSymbol("_alloca") .addReg(X86::EAX, RegState::Implicit) .addReg(X86::ESP, RegState::Implicit) .addReg(X86::EAX, RegState::Define | RegState::Implicit) .addReg(X86::ESP, RegState::Define | RegState::Implicit); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, + MachineBasicBlock *BB) const { + // This is pretty easy. We're taking the value that we received from + // our load from the relocation, sticking it in either RDI (x86-64) + // or EAX and doing an indirect call. The return value will then + // be in the normal return register. + const X86InstrInfo *TII + = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo()); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *F = BB->getParent(); + + assert(MI->getOperand(3).isGlobal() && "This should be a global"); + + if (Subtarget->is64Bit()) { + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV64rm), X86::RDI) + .addReg(X86::RIP) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); + addDirectMem(MIB, X86::RDI); + } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV32rm), X86::EAX) + .addReg(0) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); + addDirectMem(MIB, X86::EAX); + } else { + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV32rm), X86::EAX) + .addReg(TII->getGlobalBaseReg(F)) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); + addDirectMem(MIB, X86::EAX); + } + + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -8469,6 +8621,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, default: assert(false && "Unexpected instr type to insert"); case X86::MINGW_ALLOCA: return EmitLoweredMingwAlloca(MI, BB); + case X86::TLSCall_32: + case X86::TLSCall_64: + return EmitLoweredTLSCall(MI, BB); case X86::CMOV_GR8: case X86::CMOV_V1I64: case X86::CMOV_FR32: @@ -8499,23 +8654,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false); - addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... unsigned OldCW = F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW), + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); // Set the high part to be round to zero... - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx) + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx) .addImm(0xC7F); // Reload the modified control word now... - addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FLDCW16m)), CWFrameIdx); // Restore the memory image of control word to original value - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx) + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx) .addReg(OldCW); // Get the X86 opcode to use. @@ -8554,13 +8711,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } else { AM.Disp = Op.getImm(); } - addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM) - .addReg(MI->getOperand(X86AddrNumOperands).getReg()); + addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM) + .addReg(MI->getOperand(X86::AddrNumOperands).getReg()); // Reload the original control word now. - addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FLDCW16m)), CWFrameIdx); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } // String/text processing lowering. @@ -9513,8 +9671,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) { - if (SumC->getSExtValue() == Bits && - ShAmt1.getOperand(1) == ShAmt0) + SDValue ShAmt1Op1 = ShAmt1.getOperand(1); + if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE) + ShAmt1Op1 = ShAmt1Op1.getOperand(0); + if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, DAG.getNode(ISD::TRUNCATE, DL, @@ -9710,58 +9870,6 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// On X86 and X86-64, atomic operations are lowered to locked instructions. -// Locked instructions, in turn, have implicit fence semantics (all memory -// operations are flushed before issuing the locked instruction, and the -// are not buffered), so we can fold away the common pattern of -// fence-atomic-fence. -static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { - SDValue atomic = N->getOperand(0); - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - break; - default: - return SDValue(); - } - - SDValue fence = atomic.getOperand(0); - if (fence.getOpcode() != ISD::MEMBARRIER) - return SDValue(); - - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2), - atomic.getOperand(3)); - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2)); - default: - return SDValue(); - } -} - static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> // (and (i32 x86isd::setcc_carry), 1) @@ -9809,7 +9917,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); - case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG); case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); } @@ -9932,8 +10039,8 @@ static bool LowerToBSwap(CallInst *CI) { // so don't worry about this. // Verify this is a simple bswap. - if (CI->getNumOperands() != 2 || - CI->getType() != CI->getOperand(1)->getType() || + if (CI->getNumArgOperands() != 1 || + CI->getType() != CI->getArgOperand(0)->getType() || !CI->getType()->isIntegerTy()) return false; @@ -9946,7 +10053,7 @@ static bool LowerToBSwap(CallInst *CI) { Module *M = CI->getParent()->getParent()->getParent(); Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Op = CallInst::Create(Int, Op, CI->getName(), CI); CI->replaceAllUsesWith(Op); @@ -10079,7 +10186,6 @@ LowerXConstraint(EVT ConstraintVT) const { /// vector. If it is invalid, don't add anything to Ops. void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Constraint, - bool hasMemory, std::vector<SDValue>&Ops, SelectionDAG &DAG) const { SDValue Result(0, 0); @@ -10121,9 +10227,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'e': { // 32-bit signed value if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { - const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), - C->getSExtValue())) { + if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getSExtValue())) { // Widen to 64 bits here to get it sign extended. Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64); break; @@ -10136,9 +10241,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'Z': { // 32-bit unsigned value if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { - const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), - C->getZExtValue())) { + if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getZExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); break; } @@ -10155,6 +10259,12 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, break; } + // In any sort of PIC mode addresses need to be computed at runtime by + // adding in a register or some sort of table lookup. These can't + // be used as immediates. + if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC()) + return; + // If we are in non-pic codegen mode, we allow the address of a global (with // an optional displacement) to be used with 'i'. GlobalAddressSDNode *GA = 0; @@ -10190,11 +10300,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, getTargetMachine()))) return; - if (hasMemory) - Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG); - else - Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset); - Result = Op; + Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + GA->getValueType(0), Offset); break; } } @@ -10203,8 +10310,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, Ops.push_back(Result); return; } - return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, - Ops, DAG); + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } std::vector<unsigned> X86TargetLowering:: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1ef1a7b..2d28e5c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -196,6 +196,10 @@ namespace llvm { // TLSADDR - Thread Local Storage. TLSADDR, + + // TLSCALL - Thread Local Storage. When calling to an OS provided + // thunk at the address from an earlier relocation. + TLSCALL, // SegmentBaseAddress - The address segment:0 SegmentBaseAddress, @@ -496,7 +500,6 @@ namespace llvm { /// being processed is 'm'. virtual void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; @@ -576,20 +579,17 @@ namespace llvm { /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel * - createFastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &, - DenseMap<const BasicBlock *, MachineBasicBlock *> &, - DenseMap<const AllocaInst *, int> &, - std::vector<std::pair<MachineInstr*, unsigned> > & -#ifndef NDEBUG - , SmallSet<const Instruction *, 8> & -#endif - ) const; + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + /// getStackCookieLocation - Return true if the target stores stack + /// protector cookies at a fixed offset in some non-standard address + /// space, and populates the address space and offset as + /// appropriate. + virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const; + private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -643,6 +643,7 @@ namespace llvm { bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const; bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; @@ -725,6 +726,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -733,13 +735,13 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<EVT> &OutTys, - const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) const; + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const; void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, unsigned NewOp) const; @@ -794,6 +796,9 @@ namespace llvm { MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, + MachineBasicBlock *BB) const; /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. @@ -806,15 +811,7 @@ namespace llvm { }; namespace X86 { - FastISel *createFastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &, - DenseMap<const BasicBlock *, MachineBasicBlock *> &, - DenseMap<const AllocaInst *, int> &, - std::vector<std::pair<MachineInstr*, unsigned> > & -#ifndef NDEBUG - , SmallSet<const Instruction*, 8> & -#endif - ); + FastISel *createFastISel(FunctionLoweringInfo &funcInfo); } } diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 97eb17c..42d0e7f 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -35,6 +35,14 @@ def i64i8imm : Operand<i64> { let ParserMatchClass = ImmSExti64i8AsmOperand; } +def lea64_32mem : Operand<i32> { + let PrintMethod = "printi32mem"; + let AsmOperandLowerMethod = "lower_lea64_32mem"; + let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + + // Special i64mem for addresses of load folding tail calls. These are not // allowed to use callee-saved registers since they must be scheduled // after callee-saved register are popped. @@ -44,29 +52,16 @@ def i64mem_TC : Operand<i64> { let ParserMatchClass = X86MemAsmOperand; } -def lea64mem : Operand<i64> { - let PrintMethod = "printlea64mem"; - let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm); - let ParserMatchClass = X86NoSegMemAsmOperand; -} - -def lea64_32mem : Operand<i32> { - let PrintMethod = "printlea64_32mem"; - let AsmOperandLowerMethod = "lower_lea64_32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); - let ParserMatchClass = X86NoSegMemAsmOperand; -} - //===----------------------------------------------------------------------===// // Complex Pattern Definitions. // -def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr", +def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr", [add, sub, mul, X86mul_imm, shl, or, frameindex, X86WrapperRIP], []>; -def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr", +def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; - + //===----------------------------------------------------------------------===// // Pattern fragments. // @@ -289,11 +284,11 @@ def LEA64_32r : I<0x8D, MRMSrcMem, [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>; let isReMaterializable = 1 in -def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), +def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)]>; -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "bswap{q}\t$dst", [(set GR64:$dst, (bswap GR64:$src))]>, TB; @@ -521,7 +516,7 @@ let Defs = [EFLAGS] in { def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src), "add{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isConvertibleToThreeAddress = 1 in { let isCommutable = 1 in // Register-Register Addition @@ -559,7 +554,7 @@ def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86add_flag GR64:$src1, (load addr:$src2)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Memory-Register Addition def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), @@ -580,7 +575,7 @@ let Uses = [EFLAGS] in { def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src), "adc{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -606,7 +601,7 @@ def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", @@ -621,7 +616,7 @@ def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2), addr:$dst)]>; } // Uses = [EFLAGS] -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { // Register-Register Subtraction def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -653,7 +648,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), "sub{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src), "sub{q}\t{$src, %rax|%rax, $src}", []>; @@ -677,7 +672,7 @@ def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2), (implicit EFLAGS)]>; let Uses = [EFLAGS] in { -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", @@ -702,7 +697,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src), "sbb{q}\t{$src, %rax|%rax, $src}", []>; @@ -736,7 +731,7 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), } let Defs = [EFLAGS] in { -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in // Register-Register Signed Integer Multiplication def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), @@ -751,7 +746,7 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Suprisingly enough, these are not two address instructions! @@ -803,7 +798,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // Unary instructions let Defs = [EFLAGS], CodeSize = 2 in { -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst", [(set GR64:$dst, (ineg GR64:$src)), (implicit EFLAGS)]>; @@ -811,14 +806,14 @@ def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), (implicit EFLAGS)]>; -let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in +let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>; def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(store (add (loadi64 addr:$dst), 1), addr:$dst), (implicit EFLAGS)]>; -let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in +let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>; def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", @@ -826,7 +821,7 @@ def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", (implicit EFLAGS)]>; // In 64-bit mode, single byte INC and DEC cannot be encoded. -let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in { +let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst", @@ -844,38 +839,36 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>, Requires<[In64BitMode]>; -} // isConvertibleToThreeAddress +} // Constraints = "$src = $dst", isConvertibleToThreeAddress // These are duplicates of their 32-bit counterparts. Only needed so X86 knows // how to unfold them. -let isTwoAddress = 0, CodeSize = 2 in { - def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", - [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In64BitMode]>; - def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", - [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In64BitMode]>; - def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", - [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In64BitMode]>; - def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", - [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In64BitMode]>; -} +def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", + [(store (add (loadi16 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", + [(store (add (loadi32 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; +def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", + [(store (add (loadi16 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", + [(store (add (loadi32 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; } // Defs = [EFLAGS], CodeSize let Defs = [EFLAGS] in { // Shift instructions -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src), +def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (shl GR64:$src, CL))]>; + [(set GR64:$dst, (shl GR64:$src1, CL))]>; let isConvertibleToThreeAddress = 1 in // Can transform into LEA. def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), @@ -885,7 +878,7 @@ def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), // 'add reg,reg' is cheaper. def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t$dst", []>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), @@ -898,18 +891,18 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t$dst", [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src), +def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (srl GR64:$src, CL))]>; + [(set GR64:$dst, (srl GR64:$src1, CL))]>; def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shr{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>; def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t$dst", [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), @@ -922,11 +915,11 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t$dst", [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src), +def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (sra GR64:$src, CL))]>; + [(set GR64:$dst, (sra GR64:$src1, CL))]>; def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "sar{q}\t{$src2, $dst|$dst, $src2}", @@ -934,7 +927,7 @@ def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t$dst", [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src = $dst" let Uses = [CL] in def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), @@ -949,7 +942,7 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), // Rotate instructions -let isTwoAddress = 1 in { +let Constraints = "$src = $dst" in { def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), "rcl{q}\t{1, $dst|$dst, 1}", []>; def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), @@ -966,9 +959,8 @@ def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{%cl, $dst|$dst, CL}", []>; } -} +} // Constraints = "$src = $dst" -let isTwoAddress = 0 in { def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), "rcl{q}\t{1, $dst|$dst, 1}", []>; def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), @@ -984,13 +976,12 @@ def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), "rcr{q}\t{%cl, $dst|$dst, CL}", []>; } -} -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src), +def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotl GR64:$src, CL))]>; + [(set GR64:$dst, (rotl GR64:$src1, CL))]>; def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "rol{q}\t{$src2, $dst|$dst, $src2}", @@ -998,7 +989,7 @@ def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t$dst", [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), @@ -1011,11 +1002,11 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t$dst", [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src), +def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotr GR64:$src, CL))]>; + [(set GR64:$dst, (rotr GR64:$src1, CL))]>; def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "ror{q}\t{$src2, $dst|$dst, $src2}", @@ -1023,7 +1014,7 @@ def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t$dst", [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), @@ -1037,7 +1028,7 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; // Double shift instructions (generalizations of rotate) -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1067,7 +1058,7 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg, (i8 imm:$src3)))]>, TB; } // isCommutable -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in { def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), @@ -1097,7 +1088,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem, // Logical Instructions... // -let isTwoAddress = 1 , AddedComplexity = 15 in +let Constraints = "$src = $dst" , AddedComplexity = 15 in def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst", [(set GR64:$dst, (not GR64:$src))]>; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", @@ -1107,7 +1098,7 @@ let Defs = [EFLAGS] in { def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src), "and{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def AND64rr : RI<0x21, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1134,7 +1125,7 @@ def AND64ri32 : RIi32<0x81, MRM4r, "and{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86and_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def AND64mr : RI<0x21, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), @@ -1152,7 +1143,7 @@ def AND64mi32 : RIi32<0x81, MRM4m, [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1179,7 +1170,7 @@ def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), "or{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86or_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "or{q}\t{$src, $dst|$dst, $src}", @@ -1197,7 +1188,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src), "or{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1224,7 +1215,7 @@ def XOR64ri32 : RIi32<0x81, MRM6r, "xor{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "xor{q}\t{$src, $dst|$dst, $src}", @@ -1366,7 +1357,7 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), } // Defs = [EFLAGS] // Conditional moves -let Uses = [EFLAGS], isTwoAddress = 1 in { +let Uses = [EFLAGS], Constraints = "$src1 = $dst" in { let isCommutable = 1 in { def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1530,7 +1521,7 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64] "cmovno{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_NO, EFLAGS))]>, TB; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Use sbb to materialize carry flag into a GPR. // FIXME: This are pseudo ops that should be replaced with Pat<> patterns. @@ -1588,7 +1579,7 @@ def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}", @@ -1601,7 +1592,7 @@ def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, [(set VR128:$dst, (int_x86_sse2_cvtsi642sd VR128:$src1, (loadi64 addr:$src2)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Signed i64 -> f32 def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src), @@ -1611,7 +1602,7 @@ def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src), "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}", @@ -1625,7 +1616,7 @@ let isTwoAddress = 1 in { [(set VR128:$dst, (int_x86_sse_cvtsi642ss VR128:$src1, (loadi64 addr:$src2)))]>; -} +} // Constraints = "$src1 = $dst" // f32 -> signed i64 def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src), @@ -1691,6 +1682,7 @@ def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), // Thread Local Storage Instructions //===----------------------------------------------------------------------===// +// ELF TLS Support // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. @@ -1700,7 +1692,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [RSP] in -def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym), +def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), ".byte\t0x66; " "leaq\t$sym(%rip), %rdi; " ".word\t0x6666; " @@ -1709,6 +1701,17 @@ def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym), [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; +// Darwin TLS Support +// For x86_64, the address of the thunk is passed in %rdi, on return +// the address of the variable is in %rax. All other registers are preserved. +let Defs = [RAX], + Uses = [RDI], + usesCustomInserter = 1 in +def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), + "# TLSCall_64", + [(X86TLSCall addr:$sym)]>, + Requires<[In64BitMode]>; + let AddedComplexity = 5, isCodeGenOnly = 1 in def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movq\t%gs:$src, $dst", @@ -1964,6 +1967,17 @@ def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), (TCRETURNdi64 texternalsym:$dst, imm:$off)>, Requires<[In64BitMode]>; +// tls has some funny stuff here... +// This corresponds to movabs $foo@tpoff, %rax +def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), + (MOV64ri tglobaltlsaddr :$dst)>; +// This corresponds to add $foo@tpoff, %rax +def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), + (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; +// This corresponds to mov foo@tpoff(%rbx), %eax +def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), + (MOV64rm tglobaltlsaddr :$dst)>; + // Comparisons. // TEST R,R is smaller than CMP R,0 @@ -2332,45 +2346,3 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; -//===----------------------------------------------------------------------===// -// X86-64 SSE4.1 Instructions -//===----------------------------------------------------------------------===// - -/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination -multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), - (ins VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set GR64:$dst, - (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W; - def mr : SS4AIi8<opc, MRMDestMem, (outs), - (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(store (extractelt (v2i64 VR128:$src1), imm:$src2), - addr:$dst)]>, OpSize, REX_W; -} - -defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; - -let isTwoAddress = 1 in { - multiclass SS41I_insert64<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, - OpSize, REX_W; - def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), - imm:$src3)))]>, OpSize, REX_W; - } -} - -defm PINSRQ : SS41I_insert64<0x22, "pinsrq">; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 5a82a7b..2a6a71d 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -64,19 +64,15 @@ struct X86AddressMode { /// static inline const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) { - // Because memory references are always represented with four - // values, this adds: Reg, [1, NoReg, 0] to the instruction. - return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0); + // Because memory references are always represented with five + // values, this adds: Reg, 1, NoReg, 0, NoReg to the instruction. + return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0); } -static inline const MachineInstrBuilder & -addLeaOffset(const MachineInstrBuilder &MIB, int Offset) { - return MIB.addImm(1).addReg(0).addImm(Offset); -} static inline const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset) { - return addLeaOffset(MIB, Offset).addReg(0); + return MIB.addImm(1).addReg(0).addImm(Offset).addReg(0); } /// addRegOffset - This function is used to add a memory reference of the form @@ -89,25 +85,20 @@ addRegOffset(const MachineInstrBuilder &MIB, return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); } -static inline const MachineInstrBuilder & -addLeaRegOffset(const MachineInstrBuilder &MIB, - unsigned Reg, bool isKill, int Offset) { - return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); -} - /// addRegReg - This function is used to add a memory reference of the form: /// [Reg + Reg]. static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) { return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1) - .addReg(Reg2, getKillRegState(isKill2)).addImm(0); + .addReg(Reg2, getKillRegState(isKill2)).addImm(0).addReg(0); } static inline const MachineInstrBuilder & -addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { - assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); - +addFullAddress(const MachineInstrBuilder &MIB, + const X86AddressMode &AM) { + assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); + if (AM.BaseType == X86AddressMode::RegBase) MIB.addReg(AM.Base.Reg); else if (AM.BaseType == X86AddressMode::FrameIndexBase) @@ -116,15 +107,11 @@ addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { assert (0); MIB.addImm(AM.Scale).addReg(AM.IndexReg); if (AM.GV) - return MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); + MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); else - return MIB.addImm(AM.Disp); -} - -static inline const MachineInstrBuilder & -addFullAddress(const MachineInstrBuilder &MIB, - const X86AddressMode &AM) { - return addLeaAddress(MIB, AM).addReg(0); + MIB.addImm(AM.Disp); + + return MIB.addReg(0); } /// addFrameReference - This function is used to add a reference to the base of diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 0aae4a8..da93de9 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -371,7 +371,7 @@ multiclass FPCMov<PatLeaf cc> { Requires<[HasCMov]>; } -let Uses = [EFLAGS], isTwoAddress = 1 in { +let Uses = [EFLAGS], Constraints = "$src1 = $dst" in { defm CMOVB : FPCMov<X86_COND_B>; defm CMOVBE : FPCMov<X86_COND_BE>; defm CMOVE : FPCMov<X86_COND_E>; @@ -380,7 +380,7 @@ defm CMOVNB : FPCMov<X86_COND_AE>; defm CMOVNBE: FPCMov<X86_COND_A>; defm CMOVNE : FPCMov<X86_COND_NE>; defm CMOVNP : FPCMov<X86_COND_NP>; -} +} // Uses = [EFLAGS], Constraints = "$src1 = $dst" let Predicates = [HasCMov] in { // These are not factored because there's no clean way to pass DA/DB. @@ -680,19 +680,19 @@ def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>; // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. -def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, +def : Pat<(f64 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, +def : Pat<(f80 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, +def : Pat<(f80 (fextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, Requires<[FPStackf64]>; // FP truncations map onto simple pseudo-value conversions if they are to/from // the FP stack. We have validated that only value-preserving truncations make // it through isel. -def : Pat<(f32 (fround RFP64:$src)), (MOV_Fp6432 RFP64:$src)>, +def : Pat<(f32 (fround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f32 (fround RFP80:$src)), (MOV_Fp8032 RFP80:$src)>, +def : Pat<(f32 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f64 (fround RFP80:$src)), (MOV_Fp8064 RFP80:$src)>, +def : Pat<(f64 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, Requires<[FPStackf64]>; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index c4522f3..97578af 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -50,9 +50,10 @@ def NoImm : ImmType<0>; def Imm8 : ImmType<1>; def Imm8PCRel : ImmType<2>; def Imm16 : ImmType<3>; -def Imm32 : ImmType<4>; -def Imm32PCRel : ImmType<5>; -def Imm64 : ImmType<6>; +def Imm16PCRel : ImmType<4>; +def Imm32 : ImmType<5>; +def Imm32PCRel : ImmType<6>; +def Imm64 : ImmType<7>; // FPFormat - This specifies what form this FP instruction has. This is used by // the Floating-Point stackifier pass. @@ -101,6 +102,10 @@ class XS { bits<4> Prefix = 12; } class T8 { bits<4> Prefix = 13; } class TA { bits<4> Prefix = 14; } class TF { bits<4> Prefix = 15; } +class VEX { bit hasVEXPrefix = 1; } +class VEX_W { bit hasVEX_WPrefix = 1; } +class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } +class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, Domain d = GenericDomain> @@ -128,6 +133,11 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? bits<2> SegOvrBits = 0; // Segment override prefix. Domain ExeDomain = d; + bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix? + bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field? + bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field? + bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register + // to be encoded in a immediate field? // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -141,6 +151,10 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{21-20} = SegOvrBits; let TSFlags{23-22} = ExeDomain.Value; let TSFlags{31-24} = Opcode; + let TSFlags{32} = hasVEXPrefix; + let TSFlags{33} = hasVEX_WPrefix; + let TSFlags{34} = hasVEX_4VPrefix; + let TSFlags{35} = hasVEX_i8ImmReg; } class I<bits<8> o, Format f, dag outs, dag ins, string asm, @@ -174,6 +188,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm16PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} + class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern> : X86Inst<o, f, Imm32PCRel, outs, ins, asm> { @@ -211,11 +232,56 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +// SI - SSE 1 & 2 scalar instructions +class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> + : I<o, F, outs, ins, asm, pattern> { + let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], + !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); +} + +// SIi8 - SSE 1 & 2 scalar instructions +class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : Ii8<o, F, outs, ins, asm, pattern> { + let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], + !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); +} + +// PI - SSE 1 & 2 packed instructions +class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, + Domain d> + : I<o, F, outs, ins, asm, pattern, d> { + let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], + !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); +} + +// PIi8 - SSE 1 & 2 packed instructions with immediate +class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, Domain d> + : Ii8<o, F, outs, ins, asm, pattern, d> { + let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX], + !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm); +} + // SSE1 Instruction Templates: // // SSI - SSE1 instructions with XS prefix. // PSI - SSE1 instructions with TB prefix. // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix. +// VSSI - SSE1 instructions with XS prefix in AVX form. +// VPSI - SSE1 instructions with TB prefix in AVX form. class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>; @@ -229,6 +295,14 @@ class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, Requires<[HasSSE1]>; +class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, + Requires<[HasAVX]>; +class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, + Requires<[HasAVX]>; // SSE2 Instruction Templates: // @@ -237,6 +311,8 @@ class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. // PDI - SSE2 instructions with TB and OpSize prefixes. // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. +// VSDI - SSE2 instructions with XD prefix in AVX form. +// VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form. class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>; @@ -253,6 +329,14 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, Requires<[HasSSE2]>; +class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD, + Requires<[HasAVX]>; +class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, + OpSize, Requires<[HasAVX]>; // SSE3 Instruction Templates: // diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 6b9478d..71c4e8b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -60,3 +60,339 @@ def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); }], MMX_SHUFFLE_get_shuf_imm>; + +//===----------------------------------------------------------------------===// +// SSE specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, + SDTCisFP<0>, SDTCisInt<2> ]>; +def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, + SDTCisFP<1>, SDTCisVT<3, i8>]>; + +def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; +def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; +def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; +def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; +def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; +def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; +def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; +def X86pshufb : SDNode<"X86ISD::PSHUFB", + SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86pextrb : SDNode<"X86ISD::PEXTRB", + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; +def X86pextrw : SDNode<"X86ISD::PEXTRW", + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; +def X86pinsrb : SDNode<"X86ISD::PINSRB", + SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; +def X86pinsrw : SDNode<"X86ISD::PINSRW", + SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; +def X86insrtps : SDNode<"X86ISD::INSERTPS", + SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, + SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; +def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", + SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; +def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, + [SDNPHasChain, SDNPMayLoad]>; +def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>; +def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>; +def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>; +def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>; +def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>; +def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; +def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; +def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; + +def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, v4f32>, + SDTCisVT<2, v4f32>]>; +def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; + +//===----------------------------------------------------------------------===// +// SSE Complex Patterns +//===----------------------------------------------------------------------===// + +// These are 'extloads' from a scalar to the low element of a vector, zeroing +// the top elements. These are used for the SSE 'ss' and 'sd' instruction +// forms. +def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [], + [SDNPHasChain, SDNPMayLoad]>; +def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [], + [SDNPHasChain, SDNPMayLoad]>; + +def ssmem : Operand<v4f32> { + let PrintMethod = "printf32mem"; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} +def sdmem : Operand<v2f64> { + let PrintMethod = "printf64mem"; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + +//===----------------------------------------------------------------------===// +// SSE pattern fragments +//===----------------------------------------------------------------------===// + +def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; +def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; +def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; +def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; + +// FIXME: move this to a more appropriate place after all AVX is done. +def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; +def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; +def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; +def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; + +// Like 'store', but always requires vector alignment. +def alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 16; +}]>; + +// Like 'load', but always requires vector alignment. +def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 16; +}]>; + +def alignedloadfsf32 : PatFrag<(ops node:$ptr), + (f32 (alignedload node:$ptr))>; +def alignedloadfsf64 : PatFrag<(ops node:$ptr), + (f64 (alignedload node:$ptr))>; +def alignedloadv4f32 : PatFrag<(ops node:$ptr), + (v4f32 (alignedload node:$ptr))>; +def alignedloadv2f64 : PatFrag<(ops node:$ptr), + (v2f64 (alignedload node:$ptr))>; +def alignedloadv4i32 : PatFrag<(ops node:$ptr), + (v4i32 (alignedload node:$ptr))>; +def alignedloadv2i64 : PatFrag<(ops node:$ptr), + (v2i64 (alignedload node:$ptr))>; + +// FIXME: move this to a more appropriate place after all AVX is done. +def alignedloadv8f32 : PatFrag<(ops node:$ptr), + (v8f32 (alignedload node:$ptr))>; +def alignedloadv4f64 : PatFrag<(ops node:$ptr), + (v4f64 (alignedload node:$ptr))>; +def alignedloadv8i32 : PatFrag<(ops node:$ptr), + (v8i32 (alignedload node:$ptr))>; +def alignedloadv4i64 : PatFrag<(ops node:$ptr), + (v4i64 (alignedload node:$ptr))>; + +// Like 'load', but uses special alignment checks suitable for use in +// memory operands in most SSE instructions, which are required to +// be naturally aligned on some targets but not on others. If the subtarget +// allows unaligned accesses, match any load, though this may require +// setting a feature bit in the processor (on startup, for example). +// Opteron 10h and later implement such a feature. +def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return Subtarget->hasVectorUAMem() + || cast<LoadSDNode>(N)->getAlignment() >= 16; +}]>; + +def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; +def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; +def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; +def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; +def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; +def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; +def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; + +// FIXME: move this to a more appropriate place after all AVX is done. +def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; +def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; + +// SSSE3 uses MMX registers for some instructions. They aren't aligned on a +// 16-byte boundary. +// FIXME: 8 byte alignment for mmx reads is not required +def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 8; +}]>; + +def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>; +def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; +def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; +def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; + +// MOVNT Support +// Like 'store', but requires the non-temporal bit to be set +def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal(); + return false; +}]>; + +def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && !ST->isTruncatingStore() && + ST->getAddressingMode() == ISD::UNINDEXED && + ST->getAlignment() >= 16; + return false; +}]>; + +def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && + ST->getAlignment() < 16; + return false; +}]>; + +def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; +def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; +def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; +def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; +def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; +def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; + +def vzmovl_v2i64 : PatFrag<(ops node:$src), + (bitconvert (v2i64 (X86vzmovl + (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; +def vzmovl_v4i32 : PatFrag<(ops node:$src), + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; + +def vzload_v2i64 : PatFrag<(ops node:$src), + (bitconvert (v2i64 (X86vzload node:$src)))>; + + +def fp32imm0 : PatLeaf<(f32 fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + +// BYTE_imm - Transform bit immediates into byte immediates. +def BYTE_imm : SDNodeXForm<imm, [{ + // Transformation function: imm >> 3 + return getI32Imm(N->getZExtValue() >> 3); +}]>; + +// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, +// SHUFP* etc. imm. +def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShuffleSHUFImmediate(N)); +}]>; + +// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to +// PSHUFHW imm. +def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShufflePSHUFHWImmediate(N)); +}]>; + +// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to +// PSHUFLW imm. +def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); +}]>; + +// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to +// a PALIGNR imm. +def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShufflePALIGNRImmediate(N)); +}]>; + +def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + return SVOp->isSplat() && SVOp->getSplatIndex() == 0; +}]>; + +def movddup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movhlps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movlhps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movlp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movshdup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movsldup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def unpckl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def unpckh : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def pshufd : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_shuf_imm>; + +def shufp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_shuf_imm>; + +def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_pshufhw_imm>; + +def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_pshuflw_imm>; + +def palign : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_palign_imm>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 34e12ca..ce471ea 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -784,7 +784,9 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOV8rm: case X86::MOV16rm: case X86::MOV32rm: + case X86::MOV32rm_TC: case X86::MOV64rm: + case X86::MOV64rm_TC: case X86::LD_Fp64m: case X86::MOVSSrm: case X86::MOVSDrm: @@ -805,7 +807,9 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOV8mr: case X86::MOV16mr: case X86::MOV32mr: + case X86::MOV32mr_TC: case X86::MOV64mr: + case X86::MOV64mr_TC: case X86::ST_FpP64m: case X86::MOVSSmr: case X86::MOVSDmr: @@ -863,7 +867,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameStoreOpcode(MI->getOpcode())) if (isFrameOperand(MI, 0, FrameIndex)) - return MI->getOperand(X86AddrNumOperands).getReg(); + return MI->getOperand(X86::AddrNumOperands).getReg(); return 0; } @@ -1064,14 +1068,9 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo &TRI) const { DebugLoc DL = Orig->getDebugLoc(); - if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { - DestReg = TRI->getSubReg(DestReg, SubIdx); - SubIdx = 0; - } - // MOV32r0 etc. are implemented with xor which clobbers condition code. // Re-materialize them as movri instructions to avoid side effects. bool Clone = true; @@ -1098,14 +1097,13 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, if (Clone) { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); MBB.insert(I, MI); } else { - BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); + BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0); } MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); + NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); } /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that @@ -1151,10 +1149,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // least on modern x86 machines). BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); MachineInstr *InsMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) - .addReg(leaInReg) - .addReg(Src, getKillRegState(isKill)) - .addImm(X86::sub_16bit); + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) + .addReg(leaInReg, RegState::Define, X86::sub_16bit) + .addReg(Src, getKillRegState(isKill)); MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); @@ -1165,20 +1162,20 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, case X86::SHL16ri: { unsigned ShAmt = MI->getOperand(2).getImm(); MIB.addReg(0).addImm(1 << ShAmt) - .addReg(leaInReg, RegState::Kill).addImm(0); + .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0); break; } case X86::INC16r: case X86::INC64_16r: - addLeaRegOffset(MIB, leaInReg, true, 1); + addRegOffset(MIB, leaInReg, true, 1); break; case X86::DEC16r: case X86::DEC64_16r: - addLeaRegOffset(MIB, leaInReg, true, -1); + addRegOffset(MIB, leaInReg, true, -1); break; case X86::ADD16ri: case X86::ADD16ri8: - addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); + addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); break; case X86::ADD16rr: { unsigned Src2 = MI->getOperand(2).getReg(); @@ -1195,10 +1192,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // well be shifting and then extracting the lower 16-bits. BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); InsMI2 = - BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) - .addReg(leaInReg2) - .addReg(Src2, getKillRegState(isKill2)) - .addImm(X86::sub_16bit); + BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) + .addReg(leaInReg2, RegState::Define, X86::sub_16bit) + .addReg(Src2, getKillRegState(isKill2)); addRegReg(MIB, leaInReg, true, leaInReg2, true); } if (LV && isKill2 && InsMI2) @@ -1209,10 +1205,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstr *NewMI = MIB; MachineInstr *ExtMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(leaOutReg, RegState::Kill) - .addImm(X86::sub_16bit); + .addReg(leaOutReg, RegState::Kill, X86::sub_16bit); if (LV) { // Update live variables @@ -1283,7 +1278,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) .addReg(Src, getKillRegState(isKill)) - .addImm(0); + .addImm(0).addReg(0); break; } case X86::SHL32ri: { @@ -1297,7 +1292,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)).addImm(0); + .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0); break; } case X86::SHL16ri: { @@ -1313,7 +1308,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) .addReg(Src, getKillRegState(isKill)) - .addImm(0); + .addImm(0).addReg(0); break; } default: { @@ -1331,7 +1326,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, 1); @@ -1353,7 +1348,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, -1); @@ -1401,7 +1396,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri32: case X86::ADD64ri8: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1410,7 +1405,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD32ri8: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1421,7 +1416,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1845,9 +1840,8 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc operand - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -1856,7 +1850,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (Cond.empty()) { // Unconditional branch? assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB); return 1; } @@ -1866,27 +1860,27 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, switch (CC) { case X86::COND_NP_OR_E: // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB); ++Count; break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. - BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB); ++Count; break; default: { unsigned Opc = GetCondBranchFromCond(CC); - BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); + BuildMI(&MBB, DL, get(Opc)).addMBB(TBB); ++Count; } } if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB); + BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB); ++Count; } return Count; @@ -1897,237 +1891,153 @@ static bool isHReg(unsigned Reg) { return X86::GR8_ABCD_HRegClass.contains(Reg); } -bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - // Determine if DstRC and SrcRC have a common superclass in common. - const TargetRegisterClass *CommonRC = DestRC; - if (DestRC == SrcRC) - /* Source and destination have the same register class. */; - else if (CommonRC->hasSuperClass(SrcRC)) - CommonRC = SrcRC; - else if (!DestRC->hasSubClass(SrcRC)) { - // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, - // but we want to copy them as GR64. Similarly, for GR32_NOREX and - // GR32_NOSP, copy as GR32. - if (SrcRC->hasSuperClass(&X86::GR64RegClass) && - DestRC->hasSuperClass(&X86::GR64RegClass)) - CommonRC = &X86::GR64RegClass; - else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && - DestRC->hasSuperClass(&X86::GR32RegClass)) - CommonRC = &X86::GR32RegClass; +void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + // First deal with the normal symmetric copies. + unsigned Opc = 0; + if (X86::GR64RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV64rr; + else if (X86::GR32RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV32rr; + else if (X86::GR16RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV16rr; + else if (X86::GR8RegClass.contains(DestReg, SrcReg)) { + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if ((isHReg(DestReg) || isHReg(SrcReg)) && + TM.getSubtarget<X86Subtarget>().is64Bit()) + Opc = X86::MOV8rr_NOREX; else - CommonRC = 0; - } - - if (CommonRC) { - unsigned Opc; - if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32RegClass || - CommonRC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16RegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8RegClass) { - // Copying to or from a physical H register on x86-64 requires a NOREX - // move. Otherwise use a normal move. - if ((isHReg(DestReg) || isHReg(SrcReg)) && - TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rr_NOREX; - else - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { - if (TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rr_NOREX; - else - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_NOREXRegClass || - CommonRC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64rr_TC; - } else if (CommonRC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32rr_TC; - } else if (CommonRC == &X86::RFP32RegClass) { - Opc = X86::MOV_Fp3232; - } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { - Opc = X86::MOV_Fp6464; - } else if (CommonRC == &X86::RFP80RegClass) { - Opc = X86::MOV_Fp8080; - } else if (CommonRC == &X86::FR32RegClass) { - Opc = X86::FsMOVAPSrr; - } else if (CommonRC == &X86::FR64RegClass) { - Opc = X86::FsMOVAPDrr; - } else if (CommonRC == &X86::VR128RegClass) { - Opc = X86::MOVAPSrr; - } else if (CommonRC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64rr; - } else { - return false; - } - BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); - return true; + } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOVAPSrr; + else if (X86::VR64RegClass.contains(DestReg, SrcReg)) + Opc = X86::MMX_MOVQ64rr; + + if (Opc) { + BuildMI(MBB, MI, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } // Moving EFLAGS to / from another register requires a push and a pop. - if (SrcRC == &X86::CCRRegClass) { - if (SrcReg != X86::EFLAGS) - return false; - if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { + if (SrcReg == X86::EFLAGS) { + if (X86::GR64RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); - return true; - } else if (DestRC == &X86::GR32RegClass || - DestRC == &X86::GR32_NOSPRegClass) { + return; + } else if (X86::GR32RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); - return true; + return; } - } else if (DestRC == &X86::CCRRegClass) { - if (DestReg != X86::EFLAGS) - return false; - if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); + } + if (DestReg == X86::EFLAGS) { + if (X86::GR64RegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSH64r)) + .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF64)); - return true; - } else if (SrcRC == &X86::GR32RegClass || - DestRC == &X86::GR32_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); + return; + } else if (X86::GR32RegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSH32r)) + .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF32)); - return true; - } - } - - // Moving from ST(0) turns into FpGET_ST0_32 etc. - if (SrcRC == &X86::RSTRegClass) { - // Copying from ST(0)/ST(1). - if (SrcReg != X86::ST0 && SrcReg != X86::ST1) - // Can only copy from ST(0)/ST(1) right now - return false; - bool isST0 = SrcReg == X86::ST0; - unsigned Opc; - if (DestRC == &X86::RFP32RegClass) - Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; - else if (DestRC == &X86::RFP64RegClass) - Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; - else { - if (DestRC != &X86::RFP80RegClass) - return false; - Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; + return; } - BuildMI(MBB, MI, DL, get(Opc), DestReg); - return true; } - // Moving to ST(0) turns into FpSET_ST0_32 etc. - if (DestRC == &X86::RSTRegClass) { - // Copying to ST(0) / ST(1). - if (DestReg != X86::ST0 && DestReg != X86::ST1) - // Can only copy to TOS right now - return false; - bool isST0 = DestReg == X86::ST0; - unsigned Opc; - if (SrcRC == &X86::RFP32RegClass) - Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; - else if (SrcRC == &X86::RFP64RegClass) - Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; - else { - if (SrcRC != &X86::RFP80RegClass) - return false; - Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; - } - BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); - return true; - } - - // Not yet supported! - return false; + DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) + << " to " << RI.getName(DestReg) << '\n'); + llvm_unreachable("Cannot emit physreg copy instruction"); } -static unsigned getStoreRegOpcode(unsigned SrcReg, - const TargetRegisterClass *RC, - bool isStackAligned, - TargetMachine &TM) { - unsigned Opc = 0; - if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16RegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8RegClass) { +static unsigned getLoadStoreRegOpcode(unsigned Reg, + const TargetRegisterClass *RC, + bool isStackAligned, + const TargetMachine &TM, + bool load) { + switch (RC->getID()) { + default: + llvm_unreachable("Unknown regclass"); + case X86::GR64RegClassID: + case X86::GR64_NOSPRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32RegClassID: + case X86::GR32_NOSPRegClassID: + case X86::GR32_ADRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16RegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8RegClassID: // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. - if (isHReg(SrcReg) && + if (isHReg(Reg) && TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8mr_NOREX; + return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8_ABCD_LRegClass) { - Opc = X86::MOV8mr; - } else if (RC == &X86::GR8_ABCD_HRegClass) { + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_ABCDRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32_ABCDRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16_ABCDRegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8_ABCD_LRegClassID: + return load ? X86::MOV8rm :X86::MOV8mr; + case X86::GR8_ABCD_HRegClassID: if (TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8mr_NOREX; + return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_NOREXRegClass || - RC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64mr_TC; - } else if (RC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32mr_TC; - } else if (RC == &X86::RFP80RegClass) { - Opc = X86::ST_FpP80m; // pops - } else if (RC == &X86::RFP64RegClass) { - Opc = X86::ST_Fp64m; - } else if (RC == &X86::RFP32RegClass) { - Opc = X86::ST_Fp32m; - } else if (RC == &X86::FR32RegClass) { - Opc = X86::MOVSSmr; - } else if (RC == &X86::FR64RegClass) { - Opc = X86::MOVSDmr; - } else if (RC == &X86::VR128RegClass) { + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_NOREXRegClassID: + case X86::GR64_NOREX_NOSPRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32_NOREXRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16_NOREXRegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8_NOREXRegClassID: + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_TCRegClassID: + return load ? X86::MOV64rm_TC : X86::MOV64mr_TC; + case X86::GR32_TCRegClassID: + return load ? X86::MOV32rm_TC : X86::MOV32mr_TC; + case X86::RFP80RegClassID: + return load ? X86::LD_Fp80m : X86::ST_FpP80m; + case X86::RFP64RegClassID: + return load ? X86::LD_Fp64m : X86::ST_Fp64m; + case X86::RFP32RegClassID: + return load ? X86::LD_Fp32m : X86::ST_Fp32m; + case X86::FR32RegClassID: + return load ? X86::MOVSSrm : X86::MOVSSmr; + case X86::FR64RegClassID: + return load ? X86::MOVSDrm : X86::MOVSDmr; + case X86::VR128RegClassID: // If stack is realigned we can use aligned stores. - Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr; - } else if (RC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64mr; - } else { - llvm_unreachable("Unknown regclass"); + if (isStackAligned) + return load ? X86::MOVAPSrm : X86::MOVAPSmr; + else + return load ? X86::MOVUPSrm : X86::MOVUPSmr; + case X86::VR64RegClassID: + return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; } +} + +static unsigned getStoreRegOpcode(unsigned SrcReg, + const TargetRegisterClass *RC, + bool isStackAligned, + TargetMachine &TM) { + return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false); +} - return Opc; + +static unsigned getLoadRegOpcode(unsigned DestReg, + const TargetRegisterClass *RC, + bool isStackAligned, + const TargetMachine &TM) { + return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true); } void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -2150,7 +2060,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -2161,72 +2071,6 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, NewMIs.push_back(MIB); } -static unsigned getLoadRegOpcode(unsigned DestReg, - const TargetRegisterClass *RC, - bool isStackAligned, - const TargetMachine &TM) { - unsigned Opc = 0; - if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16RegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8RegClass) { - // Copying to or from a physical H register on x86-64 requires a NOREX - // move. Otherwise use a normal move. - if (isHReg(DestReg) && - TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rm_NOREX; - else - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8_ABCD_LRegClass) { - Opc = X86::MOV8rm; - } else if (RC == &X86::GR8_ABCD_HRegClass) { - if (TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rm_NOREX; - else - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_NOREXRegClass || - RC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64rm_TC; - } else if (RC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32rm_TC; - } else if (RC == &X86::RFP80RegClass) { - Opc = X86::LD_Fp80m; - } else if (RC == &X86::RFP64RegClass) { - Opc = X86::LD_Fp64m; - } else if (RC == &X86::RFP32RegClass) { - Opc = X86::LD_Fp32m; - } else if (RC == &X86::FR32RegClass) { - Opc = X86::MOVSSrm; - } else if (RC == &X86::FR64RegClass) { - Opc = X86::MOVSDrm; - } else if (RC == &X86::VR128RegClass) { - // If stack is realigned we can use aligned loads. - Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm; - } else if (RC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64rm; - } else { - llvm_unreachable("Unknown regclass"); - } - - return Opc; -} void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -2246,7 +2090,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2277,18 +2121,17 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); if (Reg == FPReg) // X86RegisterInfo::emitPrologue will handle spilling of frame register. continue; - if (RegClass != &X86::VR128RegClass && !isWin64) { + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { CalleeFrameSize += SlotSize; BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass, - &RI); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), + &X86::VR128RegClass, &RI); } } @@ -2315,11 +2158,11 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (Reg == FPReg) // X86RegisterInfo::emitEpilogue will handle restoring of frame register. continue; - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass != &X86::VR128RegClass && !isWin64) { + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + &X86::VR128RegClass, &RI); } } return true; @@ -2492,7 +2335,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } // No fusion - if (PrintFailedFusing) + if (PrintFailedFusing && !MI->isCopy()) dbgs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; } @@ -2610,7 +2453,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } else if (Ops.size() != 1) return NULL; - SmallVector<MachineOperand,X86AddrNumOperands> MOs; + SmallVector<MachineOperand,X86::AddrNumOperands> MOs; switch (LoadMI->getOpcode()) { case X86::V_SET0PS: case X86::V_SET0PD: @@ -2632,7 +2475,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (TM.getSubtarget<X86Subtarget>().is64Bit()) PICBase = X86::RIP; else - // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); + // FIXME: PICBase = getGlobalBaseReg(&MF); // This doesn't work for several reasons. // 1. GlobalBaseReg may have been spilled. // 2. It may not be live at MI. @@ -2664,7 +2507,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, default: { // Folding a normal load. Just copy the load's address operands. unsigned NumOps = LoadMI->getDesc().getNumOperands(); - for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) + for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) MOs.push_back(LoadMI->getOperand(i)); break; } @@ -2727,7 +2570,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (I != OpcodeTablePtr->end()) return true; } - return false; + return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops); } bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, @@ -2751,13 +2594,20 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetInstrDesc &TID = get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; const TargetRegisterClass *RC = TOI.getRegClass(&RI); - SmallVector<MachineOperand, X86AddrNumOperands> AddrOps; + if (!MI->hasOneMemOperand() && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Without memoperands, loadRegFromAddr and storeRegToStackSlot will + // conservatively assume the address is unaligned. That's bad for + // performance. + return false; + SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps; SmallVector<MachineOperand,2> BeforeOps; SmallVector<MachineOperand,2> AfterOps; SmallVector<MachineOperand,4> ImpOps; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); - if (i >= Index && i < Index + X86AddrNumOperands) + if (i >= Index && i < Index + X86::AddrNumOperands) AddrOps.push_back(Op); else if (Op.isReg() && Op.isImplicit()) ImpOps.push_back(Op); @@ -2776,7 +2626,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); if (UnfoldStore) { // Address operands cannot be marked isKill. - for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { + for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { MachineOperand &MO = NewMIs[0]->getOperand(i); if (MO.isReg()) MO.setIsKill(false); @@ -2873,7 +2723,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned NumOps = N->getNumOperands(); for (unsigned i = 0; i != NumOps-1; ++i) { SDValue Op = N->getOperand(i); - if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) + if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands) AddrOps.push_back(Op); else if (i < Index-NumDefs) BeforeOps.push_back(Op); @@ -2892,7 +2742,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), cast<MachineSDNode>(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned load. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); @@ -2929,7 +2784,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), cast<MachineSDNode>(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned store. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, @@ -3065,16 +2925,16 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, EVT VT = Load1->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: { + default: // XMM registers. In 64-bit mode we can be a bit more aggressive since we // have 16 of them to play with. if (TM.getSubtargetImpl()->is64Bit()) { if (NumLoads >= 3) return false; - } else if (NumLoads) + } else if (NumLoads) { return false; + } break; - } case MVT::i8: case MVT::i16: case MVT::i32: @@ -3083,6 +2943,7 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, case MVT::f64: if (NumLoads) return false; + break; } return true; @@ -3123,6 +2984,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: + case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: return true; } return false; @@ -3194,7 +3057,7 @@ unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: case X86II::MRMDestMem: { - unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); + unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); i = isTwoAddr ? 1 : 0; if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) REX |= 1 << 2; @@ -3546,7 +3409,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::MRMDestMem: { ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86AddrNumOperands + 1; + CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) { ++CurOp; FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); @@ -3565,16 +3428,9 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, break; case X86II::MRMSrcMem: { - int AddrOperands; - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - AddrOperands = X86AddrNumOperands - 1; // No segment register - else - AddrOperands = X86AddrNumOperands; - ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); - CurOp += AddrOperands + 1; + CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) { ++CurOp; FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); @@ -3628,7 +3484,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86AddrNumOperands; + CurOp += X86::AddrNumOperands; if (CurOp != NumOps) { const MachineOperand &MO = MI.getOperand(CurOp++); @@ -3694,6 +3550,8 @@ unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. /// +/// TODO: Eliminate this and move the code to X86MachineFunctionInfo. +/// unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && "X86-64 PIC uses RIP relative addressing"); @@ -3703,30 +3561,10 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { if (GlobalBaseReg != 0) return GlobalBaseReg; - // Insert the set of GlobalBaseReg into the first MBB of the function - MachineBasicBlock &FirstMBB = MF->front(); - MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + // Create the register. The code to initialize it is inserted + // later, by the CGBR pass (below). MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); - - const TargetInstrInfo *TII = TM.getInstrInfo(); - // Operand of MovePCtoStack is completely ignored by asm printer. It's - // only used in JIT code emission as displacement to pc. - BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); - - // If we're using vanilla 'GOT' PIC style, we should use relative addressing - // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. - if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { - GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); - // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register - BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) - .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", - X86II::MO_GOT_ABSOLUTE_ADDRESS); - } else { - GlobalBaseReg = PC; - } - + GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); X86FI->setGlobalBaseReg(GlobalBaseReg); return GlobalBaseReg; } @@ -3784,3 +3622,65 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } +namespace { + /// CGBR - Create Global Base Reg pass. This initializes the PIC + /// global base register for x86-32. + struct CGBR : public MachineFunctionPass { + static char ID; + CGBR() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + const X86TargetMachine *TM = + static_cast<const X86TargetMachine *>(&MF.getTarget()); + + assert(!TM->getSubtarget<X86Subtarget>().is64Bit() && + "X86-64 PIC uses RIP relative addressing"); + + // Only emit a global base reg in PIC mode. + if (TM->getRelocationModel() != Reloc::PIC_) + return false; + + // Insert the set of GlobalBaseReg into the first MBB of the function + MachineBasicBlock &FirstMBB = MF.front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const X86InstrInfo *TII = TM->getInstrInfo(); + + unsigned PC; + if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) + PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); + else + PC = TII->getGlobalBaseReg(&MF); + + // Operand of MovePCtoStack is completely ignored by asm printer. It's + // only used in JIT code emission as displacement to pc. + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); + + // If we're using vanilla 'GOT' PIC style, we should use relative addressing + // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. + if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) { + unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF); + // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) + .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", + X86II::MO_GOT_ABSOLUTE_ADDRESS); + } + + return true; + } + + virtual const char *getPassName() const { + return "X86 PIC Global Base Reg Initialization"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char CGBR::ID = 0; +FunctionPass* +llvm::createGlobalBaseRegPass() { return new CGBR(); } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 62d7c74..f762b58 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -24,6 +24,24 @@ namespace llvm { class X86TargetMachine; namespace X86 { + // Enums for memory operand decoding. Each memory operand is represented with + // a 5 operand sequence in the form: + // [BaseReg, ScaleAmt, IndexReg, Disp, Segment] + // These enums help decode this. + enum { + AddrBaseReg = 0, + AddrScaleAmt = 1, + AddrIndexReg = 2, + AddrDisp = 3, + + /// AddrSegmentReg - The operand # of the segment in the memory operand. + AddrSegmentReg = 4, + + /// AddrNumOperands - Total number of operands in a memory reference. + AddrNumOperands = 5 + }; + + // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. enum CondCode { @@ -173,7 +191,19 @@ namespace X86II { /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", /// which is a PIC-base-relative reference to a hidden dyld lazy pointer /// stub. - MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE + MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, + + /// MO_TLVP - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// This is the TLS offset for the Darwin TLS mechanism. + MO_TLVP, + + /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate + /// is some TLS offset from the picbase. + /// + /// This is the 32-bit TLS offset for Darwin TLS in PIC mode. + MO_TLVP_PIC_BASE }; } @@ -203,6 +233,7 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) { case X86II::MO_PIC_BASE_OFFSET: // Darwin local global. case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Darwin/32 external global. case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global. + case X86II::MO_TLVP: // ??? Pretty sure.. return true; default: return false; @@ -347,9 +378,10 @@ namespace X86II { Imm8 = 1 << ImmShift, Imm8PCRel = 2 << ImmShift, Imm16 = 3 << ImmShift, - Imm32 = 4 << ImmShift, - Imm32PCRel = 5 << ImmShift, - Imm64 = 6 << ImmShift, + Imm16PCRel = 4 << ImmShift, + Imm32 = 5 << ImmShift, + Imm32PCRel = 6 << ImmShift, + Imm64 = 7 << ImmShift, //===------------------------------------------------------------------===// // FP Instruction Classification... Zero is non-fp instruction. @@ -403,28 +435,47 @@ namespace X86II { SSEDomainShift = 22, OpcodeShift = 24, - OpcodeMask = 0xFF << OpcodeShift + OpcodeMask = 0xFF << OpcodeShift, + + //===------------------------------------------------------------------===// + // VEX - The opcode prefix used by AVX instructions + VEX = 1ULL << 32, + + // VEX_W - Has a opcode specific functionality, but is used in the same + // way as REX_W is for regular SSE instructions. + VEX_W = 1ULL << 33, + + // VEX_4V - Used to specify an additional AVX/SSE register. Several 2 + // address instructions in SSE are represented as 3 address ones in AVX + // and the additional register is encoded in VEX_VVVV prefix. + VEX_4V = 1ULL << 34, + + // VEX_I8IMM - Specifies that the last register used in a AVX instruction, + // must be encoded in the i8 immediate field. This usually happens in + // instructions with 4 operands. + VEX_I8IMM = 1ULL << 35 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the // specified machine instruction. // - static inline unsigned char getBaseOpcodeFor(unsigned TSFlags) { + static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { return TSFlags >> X86II::OpcodeShift; } - static inline bool hasImm(unsigned TSFlags) { + static inline bool hasImm(uint64_t TSFlags) { return (TSFlags & X86II::ImmMask) != 0; } /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. - static inline unsigned getSizeOfImm(unsigned TSFlags) { + static inline unsigned getSizeOfImm(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { default: assert(0 && "Unknown immediate size"); case X86II::Imm8: case X86II::Imm8PCRel: return 1; - case X86II::Imm16: return 2; + case X86II::Imm16: + case X86II::Imm16PCRel: return 2; case X86II::Imm32: case X86II::Imm32PCRel: return 4; case X86II::Imm64: return 8; @@ -433,23 +484,77 @@ namespace X86II { /// isImmPCRel - Return true if the immediate of the specified instruction's /// TSFlags indicates that it is pc relative. - static inline unsigned isImmPCRel(unsigned TSFlags) { + static inline unsigned isImmPCRel(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { - default: assert(0 && "Unknown immediate size"); - case X86II::Imm8PCRel: - case X86II::Imm32PCRel: - return true; - case X86II::Imm8: - case X86II::Imm16: - case X86II::Imm32: - case X86II::Imm64: - return false; + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8PCRel: + case X86II::Imm16PCRel: + case X86II::Imm32PCRel: + return true; + case X86II::Imm8: + case X86II::Imm16: + case X86II::Imm32: + case X86II::Imm64: + return false; + } + } + + /// getMemoryOperandNo - The function returns the MCInst operand # for the + /// first field of the memory operand. If the instruction doesn't have a + /// memory operand, this returns -1. + /// + /// Note that this ignores tied operands. If there is a tied register which + /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only + /// counted as one operand. + /// + static inline int getMemoryOperandNo(uint64_t TSFlags) { + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form"); + default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!"); + case X86II::Pseudo: + case X86II::RawFrm: + case X86II::AddRegFrm: + case X86II::MRMDestReg: + case X86II::MRMSrcReg: + return -1; + case X86II::MRMDestMem: + return 0; + case X86II::MRMSrcMem: { + bool HasVEX_4V = TSFlags & X86II::VEX_4V; + unsigned FirstMemOp = 1; + if (HasVEX_4V) + ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). + + // FIXME: Maybe lea should have its own form? This is a horrible hack. + //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + // Opcode == X86::LEA16r || Opcode == X86::LEA32r) + return FirstMemOp; } - } + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + return -1; + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + return 0; + case X86II::MRM_C1: + case X86II::MRM_C2: + case X86II::MRM_C3: + case X86II::MRM_C4: + case X86II::MRM_C8: + case X86II::MRM_C9: + case X86II::MRM_E8: + case X86II::MRM_F0: + case X86II::MRM_F8: + case X86II::MRM_F9: + return -1; + } + } } -const int X86AddrNumOperands = 5; - inline static bool isScale(const MachineOperand &MO) { return MO.isImm() && (MO.getImm() == 1 || MO.getImm() == 2 || @@ -555,7 +660,7 @@ public: void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo &TRI) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target @@ -585,13 +690,12 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0d59c42..1efef5a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -72,6 +72,8 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; + def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>; def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -182,6 +184,9 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void, [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + +def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, + []>; //===----------------------------------------------------------------------===// // X86 Operand Definitions. @@ -197,13 +202,9 @@ def X86MemAsmOperand : AsmOperandClass { let Name = "Mem"; let SuperClasses = []; } -def X86NoSegMemAsmOperand : AsmOperandClass { - let Name = "NoSegMem"; - let SuperClasses = [X86MemAsmOperand]; -} def X86AbsMemAsmOperand : AsmOperandClass { let Name = "AbsMem"; - let SuperClasses = [X86NoSegMemAsmOperand]; + let SuperClasses = [X86MemAsmOperand]; } class X86MemOperand<string printMethod> : Operand<iPTR> { let PrintMethod = printMethod; @@ -226,7 +227,7 @@ def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; -//def f256mem : X86MemOperand<"printf256mem">; +def f256mem : X86MemOperand<"printf256mem">; // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of // plain GR64, so that it doesn't potentially require a REX prefix. @@ -245,15 +246,11 @@ def i32mem_TC : Operand<i32> { let ParserMatchClass = X86MemAsmOperand; } -def lea32mem : Operand<i32> { - let PrintMethod = "printlea32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); - let ParserMatchClass = X86NoSegMemAsmOperand; -} let ParserMatchClass = X86AbsMemAsmOperand, PrintMethod = "print_pcrel_imm" in { def i32imm_pcrel : Operand<i32>; +def i16imm_pcrel : Operand<i16>; def offset8 : Operand<i64>; def offset16 : Operand<i64>; @@ -283,26 +280,31 @@ class ImmSExtAsmOperandClass : AsmOperandClass { // 64-bit immediates, but for a 16-bit target value we want to accept both "-1" // (which will be a -1ULL), and "0xFF" (-1 in 16-bits). -// [0, 0x7FFFFFFF] | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF] +// [0, 0x7FFFFFFF] | +// [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF] def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti64i32"; } -// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | +// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti16i8"; let SuperClasses = [ImmSExti64i32AsmOperand]; } -// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | +// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti32i8"; } -// [0, 0x0000007F] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +// [0, 0x0000007F] | +// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti64i8"; - let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand]; + let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, + ImmSExti64i32AsmOperand]; } // A couple of more descriptive operand definitions. @@ -321,10 +323,10 @@ def i32i8imm : Operand<i32> { // Define X86 specific addressing mode. def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>; -def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr", +def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr", [add, sub, mul, X86mul_imm, shl, or, frameindex], []>; -def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr", +def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; //===----------------------------------------------------------------------===// @@ -704,6 +706,12 @@ let isCall = 1 in "lcall{w}\t{*}$dst", []>, OpSize; def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), "lcall{l}\t{*}$dst", []>; + + // callw for 16 bit code for the assembler. + let isAsmParserOnly = 1 in + def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm, + (outs), (ins i16imm_pcrel:$dst, variable_ops), + "callw\t$dst", []>, OpSize; } // Constructing a stack frame. @@ -737,18 +745,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in "jmp\t$dst # TAILCALL", []>; def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), - "jmp{l}\t{*}$dst # TAILCALL", - []>; + "", []>; // FIXME: Remove encoding when JIT is dead. let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; - - // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL - // marker on instructions, while still being able to relax. - let isCodeGenOnly = 1 in { - def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), - "jmp\t$dst # TAILCALL", []>; - } } //===----------------------------------------------------------------------===// @@ -815,7 +815,18 @@ def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, Requires<[In32BitMode]>; } -let isTwoAddress = 1 in // GR32 = bswap GR32 +let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], + mayLoad=1, neverHasSideEffects=1 in { +def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>, + Requires<[In32BitMode]>; +} +let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], + mayStore=1, neverHasSideEffects=1 in { +def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>, + Requires<[In32BitMode]>; +} + +let Uses = [EFLAGS], Constraints = "$src = $dst" in // GR32 = bswap GR32 def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "bswap{l}\t$dst", @@ -855,11 +866,11 @@ def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, - (outs GR16:$dst), (ins lea32mem:$src), + (outs GR16:$dst), (ins i32mem:$src), "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; let isReMaterializable = 1 in def LEA32r : I<0x8D, MRMSrcMem, - (outs GR32:$dst), (ins lea32mem:$src), + (outs GR32:$dst), (ins i32mem:$src), "lea{l}\t{$src|$dst}, {$dst|$src}", [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; @@ -1239,7 +1250,7 @@ def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), //===----------------------------------------------------------------------===// // Two address Instructions. // -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { // Conditional moves let Uses = [EFLAGS] in { @@ -1640,7 +1651,7 @@ def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32] // i8 register pressure. Note that CMOV_GR8 is conservatively considered to // clobber EFLAGS, because if one of the operands is zero, the expansion // could involve an xor. -let usesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in { +let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in { def CMOV_GR8 : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond), "#CMOV_GR8 PSEUDO!", @@ -1659,86 +1670,106 @@ def CMOV_GR16 : I<0, Pseudo, [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>; def CMOV_RFP32 : I<0, Pseudo, - (outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2, i8imm:$cond), + (outs RFP32:$dst), + (ins RFP32:$src1, RFP32:$src2, i8imm:$cond), "#CMOV_RFP32 PSEUDO!", - [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond, + [(set RFP32:$dst, + (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond, EFLAGS))]>; def CMOV_RFP64 : I<0, Pseudo, - (outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2, i8imm:$cond), + (outs RFP64:$dst), + (ins RFP64:$src1, RFP64:$src2, i8imm:$cond), "#CMOV_RFP64 PSEUDO!", - [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond, + [(set RFP64:$dst, + (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond, EFLAGS))]>; def CMOV_RFP80 : I<0, Pseudo, - (outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2, i8imm:$cond), + (outs RFP80:$dst), + (ins RFP80:$src1, RFP80:$src2, i8imm:$cond), "#CMOV_RFP80 PSEUDO!", - [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, + [(set RFP80:$dst, + (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, EFLAGS))]>; } // Predicates = [NoCMov] -} // UsesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] +} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] } // Uses = [EFLAGS] // unary instructions let CodeSize = 2 in { let Defs = [EFLAGS] in { -def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b}\t$dst", - [(set GR8:$dst, (ineg GR8:$src)), +def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), + "neg{b}\t$dst", + [(set GR8:$dst, (ineg GR8:$src1)), (implicit EFLAGS)]>; -def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w}\t$dst", - [(set GR16:$dst, (ineg GR16:$src)), +def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1), + "neg{w}\t$dst", + [(set GR16:$dst, (ineg GR16:$src1)), (implicit EFLAGS)]>, OpSize; -def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l}\t$dst", - [(set GR32:$dst, (ineg GR32:$src)), +def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), + "neg{l}\t$dst", + [(set GR32:$dst, (ineg GR32:$src1)), (implicit EFLAGS)]>; -let isTwoAddress = 0 in { - def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", + +let Constraints = "" in { + def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), + "neg{b}\t$dst", [(store (ineg (loadi8 addr:$dst)), addr:$dst), (implicit EFLAGS)]>; - def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst", + def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), + "neg{w}\t$dst", [(store (ineg (loadi16 addr:$dst)), addr:$dst), (implicit EFLAGS)]>, OpSize; - def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst", + def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), + "neg{l}\t$dst", [(store (ineg (loadi32 addr:$dst)), addr:$dst), (implicit EFLAGS)]>; -} +} // Constraints = "" } // Defs = [EFLAGS] // Match xor -1 to not. Favors these over a move imm + xor to save code size. let AddedComplexity = 15 in { -def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst", - [(set GR8:$dst, (not GR8:$src))]>; -def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst", - [(set GR16:$dst, (not GR16:$src))]>, OpSize; -def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst", - [(set GR32:$dst, (not GR32:$src))]>; +def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), + "not{b}\t$dst", + [(set GR8:$dst, (not GR8:$src1))]>; +def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1), + "not{w}\t$dst", + [(set GR16:$dst, (not GR16:$src1))]>, OpSize; +def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), + "not{l}\t$dst", + [(set GR32:$dst, (not GR32:$src1))]>; } -let isTwoAddress = 0 in { - def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", +let Constraints = "" in { + def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), + "not{b}\t$dst", [(store (not (loadi8 addr:$dst)), addr:$dst)]>; - def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst", + def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), + "not{w}\t$dst", [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize; - def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst", + def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), + "not{l}\t$dst", [(store (not (loadi32 addr:$dst)), addr:$dst)]>; -} +} // Constraints = "" } // CodeSize // TODO: inc/dec is slow for P4, but fast for Pentium-M. let Defs = [EFLAGS] in { let CodeSize = 2 in -def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src))]>; +def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), + "inc{b}\t$dst", + [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), +def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>, + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, OpSize, Requires<[In32BitMode]>; -def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), +def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>, + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, Requires<[In32BitMode]>; } -let isTwoAddress = 0, CodeSize = 2 in { +let Constraints = "", CodeSize = 2 in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)]>; @@ -1750,23 +1781,24 @@ let isTwoAddress = 0, CodeSize = 2 in { [(store (add (loadi32 addr:$dst), 1), addr:$dst), (implicit EFLAGS)]>, Requires<[In32BitMode]>; -} +} // Constraints = "", CodeSize = 2 let CodeSize = 2 in -def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src))]>; +def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), + "dec{b}\t$dst", + [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), +def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>, + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, OpSize, Requires<[In32BitMode]>; -def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), +def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>, + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, Requires<[In32BitMode]>; -} +} // CodeSize = 2 -let isTwoAddress = 0, CodeSize = 2 in { +let Constraints = "", CodeSize = 2 in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)]>; @@ -1778,7 +1810,7 @@ let isTwoAddress = 0, CodeSize = 2 in { [(store (add (loadi32 addr:$dst), -1), addr:$dst), (implicit EFLAGS)]>, Requires<[In32BitMode]>; -} +} // Constraints = "", CodeSize = 2 } // Defs = [EFLAGS] // Logical operators... @@ -1857,7 +1889,7 @@ def AND32ri8 : Ii8<0x83, MRM4r, [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def AND8mr : I<0x20, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "and{b}\t{$src, $dst|$dst, $src}", @@ -1909,7 +1941,7 @@ let isTwoAddress = 0 in { def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src), "and{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y @@ -1983,7 +2015,7 @@ def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), "or{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "or{b}\t{$src, $dst|$dst, $src}", [(store (or (load addr:$dst), GR8:$src), addr:$dst), @@ -2025,7 +2057,7 @@ let isTwoAddress = 0 in { "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src), "or{l}\t{$src, %eax|%eax, $src}", []>; -} // isTwoAddress = 0 +} // Constraints = "" let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y @@ -2102,7 +2134,7 @@ def XOR32ri8 : Ii8<0x83, MRM6r, [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def XOR8mr : I<0x30, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "xor{b}\t{$src, $dst|$dst, $src}", @@ -2153,26 +2185,27 @@ let isTwoAddress = 0 in { "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src), "xor{l}\t{$src, %eax|%eax, $src}", []>; -} // isTwoAddress = 0 +} // Constraints = "" } // Defs = [EFLAGS] // Shift instructions let Defs = [EFLAGS] in { let Uses = [CL] in { -def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src), +def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), "shl{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (shl GR8:$src, CL))]>; -def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (shl GR8:$src1, CL))]>; +def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1), "shl{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize; -def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize; +def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1), "shl{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (shl GR32:$src, CL))]>; + [(set GR32:$dst, (shl GR32:$src1, CL))]>; } // Uses = [CL] def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "shl{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>; + let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "shl{w}\t{$src2, $dst|$dst, $src2}", @@ -2193,7 +2226,7 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), } // isConvertibleToThreeAddress = 1 -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst), "shl{b}\t{%cl, $dst|$dst, CL}", @@ -2227,18 +2260,18 @@ let isTwoAddress = 0 in { def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), "shl{l}\t$dst", [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" let Uses = [CL] in { -def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src), +def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), "shr{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (srl GR8:$src, CL))]>; -def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (srl GR8:$src1, CL))]>; +def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1), "shr{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize; -def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize; +def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1), "shr{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (srl GR32:$src, CL))]>; + [(set GR32:$dst, (srl GR32:$src1, CL))]>; } def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), @@ -2262,7 +2295,7 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), "shr{l}\t$dst", [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), "shr{b}\t{%cl, $dst|$dst, CL}", @@ -2296,18 +2329,18 @@ let isTwoAddress = 0 in { def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), "shr{l}\t$dst", [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" let Uses = [CL] in { -def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src), +def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), "sar{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (sra GR8:$src, CL))]>; -def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (sra GR8:$src1, CL))]>; +def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1), "sar{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize; -def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize; +def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1), "sar{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (sra GR32:$src, CL))]>; + [(set GR32:$dst, (sra GR32:$src1, CL))]>; } def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), @@ -2332,7 +2365,7 @@ def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), "sar{l}\t$dst", [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), "sar{b}\t{%cl, $dst|$dst, CL}", @@ -2366,65 +2399,65 @@ let isTwoAddress = 0 in { def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), "sar{l}\t$dst", [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" // Rotate instructions -def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src), +def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src), +def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t{%cl, $dst|$dst, CL}", []>; } -def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), +def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src), +def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1), "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { -def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src), +def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1), "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } -def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), +def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src), +def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1), "rcl{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src), +def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), "rcl{l}\t{%cl, $dst|$dst, CL}", []>; } -def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), +def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src), +def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), "rcr{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src), +def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), "rcr{b}\t{%cl, $dst|$dst, CL}", []>; } -def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), +def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src), +def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1), "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { -def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src), +def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1), "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } -def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), +def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src), +def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "rcr{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src), +def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "rcr{l}\t{%cl, $dst|$dst, CL}", []>; } -def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), +def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; -let isTwoAddress = 0 in { +let Constraints = "" in { def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), "rcl{b}\t{1, $dst|$dst, 1}", []>; def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), @@ -2464,19 +2497,19 @@ def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), "rcr{l}\t{%cl, $dst|$dst, CL}", []>; } -} +} // Constraints = "" // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { -def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), +def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "rol{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotl GR8:$src, CL))]>; -def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (rotl GR8:$src1, CL))]>; +def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "rol{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize; -def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize; +def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "rol{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotl GR32:$src, CL))]>; + [(set GR32:$dst, (rotl GR32:$src1, CL))]>; } def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), @@ -2501,7 +2534,7 @@ def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "rol{l}\t$dst", [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), "rol{b}\t{%cl, $dst|$dst, CL}", @@ -2535,18 +2568,18 @@ let isTwoAddress = 0 in { def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), "rol{l}\t$dst", [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" let Uses = [CL] in { -def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), +def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "ror{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotr GR8:$src, CL))]>; -def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (rotr GR8:$src1, CL))]>; +def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "ror{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize; -def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize; +def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "ror{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotr GR32:$src, CL))]>; + [(set GR32:$dst, (rotr GR32:$src1, CL))]>; } def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), @@ -2571,7 +2604,7 @@ def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "ror{l}\t$dst", [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), "ror{b}\t{%cl, $dst|$dst, CL}", @@ -2605,8 +2638,7 @@ let isTwoAddress = 0 in { def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), "ror{l}\t$dst", [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} - +} // Constraints = "" // Double shift instructions (generalizations of rotate) @@ -2662,7 +2694,7 @@ def SHRD16rri8 : Ii8<0xAC, MRMDestReg, TB, OpSize; } -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", @@ -2708,7 +2740,7 @@ let isTwoAddress = 0 in { [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, (i8 imm:$src3)), addr:$dst)]>, TB, OpSize; -} +} // Constraints = "" } // Defs = [EFLAGS] @@ -2794,7 +2826,7 @@ def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst), (X86add_flag GR32:$src1, i32immSExt8:$src2))]>; } -let isTwoAddress = 0 in { +let Constraints = "" in { // Memory-Register Addition def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "add{b}\t{$src2, $dst|$dst, $src2}", @@ -2838,7 +2870,7 @@ let isTwoAddress = 0 in { "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src), "add{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let Uses = [EFLAGS] in { let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y @@ -2900,7 +2932,7 @@ def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst), "adc{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "adc{b}\t{$src2, $dst|$dst, $src2}", [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>; @@ -2935,7 +2967,7 @@ let isTwoAddress = 0 in { "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src), "adc{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" } // Uses = [EFLAGS] // Register-Register Subtraction @@ -3007,7 +3039,7 @@ def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), [(set GR32:$dst, EFLAGS, (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { // Memory-Register Subtraction def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", @@ -3052,7 +3084,7 @@ let isTwoAddress = 0 in { "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src), "sub{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let Uses = [EFLAGS] in { def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst), @@ -3068,7 +3100,7 @@ def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst), "sbb{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>; @@ -3103,7 +3135,7 @@ let isTwoAddress = 0 in { "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src), "sbb{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let isCodeGenOnly = 1 in { def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), @@ -3811,6 +3843,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", // Thread Local Storage Instructions // +// ELF TLS Support // All calls clobber the non-callee saved registers. ESP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. @@ -3819,12 +3852,24 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [ESP] in -def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym), +def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "leal\t$sym, %eax; " "call\t___tls_get_addr@PLT", [(X86tlsaddr tls32addr:$sym)]>, Requires<[In32BitMode]>; +// Darwin TLS Support +// For i386, the address of the thunk is passed on the stack, on return the +// address of the variable is in %eax. %ecx is trashed during the function +// call. All other registers are preserved. +let Defs = [EAX, ECX], + Uses = [ESP], + usesCustomInserter = 1 in +def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + "# TLSCall_32", + [(X86TLSCall addr:$sym)]>, + Requires<[In32BitMode]>; + let AddedComplexity = 5, isCodeGenOnly = 1 in def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movl\t%gs:$src, $dst", @@ -4783,14 +4828,14 @@ def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>; // Patterns for nodes that do not produce flags, for instructions that do. // Increment reg. -def : Pat<(add GR8:$src , 1), (INC8r GR8:$src)>; -def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>; -def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR8:$src1 , 1), (INC8r GR8:$src1)>; +def : Pat<(add GR16:$src1, 1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src1, 1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>; // Decrement reg. -def : Pat<(add GR8:$src , -1), (DEC8r GR8:$src)>; -def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>; -def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR8:$src1 , -1), (DEC8r GR8:$src1)>; +def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>; // or reg/reg. def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 0952fc8..6cf7ac8 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -513,30 +513,20 @@ def : Pat<(store (v4i16 VR64:$src), addr:$dst), (MMX_MOVQ64mr addr:$dst, VR64:$src)>; def : Pat<(store (v2i32 VR64:$src), addr:$dst), (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v2f32 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; def : Pat<(store (v1i64 VR64:$src), addr:$dst), (MMX_MOVQ64mr addr:$dst, VR64:$src)>; // Bit convert. def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>; def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 VR64:$src))), (v4i16 VR64:$src)>; def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>; def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v2f32 VR64:$src))), (v2i32 VR64:$src)>; def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>; def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v1i64 VR64:$src))), (v2f32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v2i32 VR64:$src))), (v2f32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 VR64:$src))), (v2f32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 VR64:$src))), (v2f32 VR64:$src)>; def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v2f32 VR64:$src))), (v1i64 VR64:$src)>; def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>; def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>; @@ -545,8 +535,6 @@ def : Pat<(v1i64 (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(v2i32 (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v2f32 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(v4i16 (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(v8i8 (bitconvert (i64 GR64:$src))), @@ -555,8 +543,6 @@ def : Pat<(i64 (bitconvert (v1i64 VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; def : Pat<(i64 (bitconvert (v2i32 VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v2f32 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; def : Pat<(i64 (bitconvert (v4i16 VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; def : Pat<(i64 (bitconvert (v8i8 VR64:$src))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5580ba7..ab0005b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -15,322 +15,6 @@ //===----------------------------------------------------------------------===// -// SSE specific DAG Nodes. -//===----------------------------------------------------------------------===// - -def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, - SDTCisFP<0>, SDTCisInt<2> ]>; -def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, - SDTCisFP<1>, SDTCisVT<3, i8>]>; - -def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; -def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; -def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; -def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; -def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; -def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; -def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; -def X86pshufb : SDNode<"X86ISD::PSHUFB", - SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>>; -def X86pextrb : SDNode<"X86ISD::PEXTRB", - SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; -def X86pextrw : SDNode<"X86ISD::PEXTRW", - SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; -def X86pinsrb : SDNode<"X86ISD::PINSRB", - SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def X86pinsrw : SDNode<"X86ISD::PINSRW", - SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, - SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def X86insrtps : SDNode<"X86ISD::INSERTPS", - SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, - SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; -def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", - SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; -def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, - [SDNPHasChain, SDNPMayLoad]>; -def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>; -def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>; -def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>; -def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>; -def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>; -def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; -def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; -def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; - -def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, - SDTCisVT<1, v4f32>, - SDTCisVT<2, v4f32>]>; -def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; - -//===----------------------------------------------------------------------===// -// SSE Complex Patterns -//===----------------------------------------------------------------------===// - -// These are 'extloads' from a scalar to the low element of a vector, zeroing -// the top elements. These are used for the SSE 'ss' and 'sd' instruction -// forms. -def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [], - [SDNPHasChain, SDNPMayLoad]>; -def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [], - [SDNPHasChain, SDNPMayLoad]>; - -def ssmem : Operand<v4f32> { - let PrintMethod = "printf32mem"; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; -} -def sdmem : Operand<v2f64> { - let PrintMethod = "printf64mem"; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; -} - -//===----------------------------------------------------------------------===// -// SSE pattern fragments -//===----------------------------------------------------------------------===// - -def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; -def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; -def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; -def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; - -// Like 'store', but always requires vector alignment. -def alignedstore : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return cast<StoreSDNode>(N)->getAlignment() >= 16; -}]>; - -// Like 'load', but always requires vector alignment. -def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast<LoadSDNode>(N)->getAlignment() >= 16; -}]>; - -def alignedloadfsf32 : PatFrag<(ops node:$ptr), - (f32 (alignedload node:$ptr))>; -def alignedloadfsf64 : PatFrag<(ops node:$ptr), - (f64 (alignedload node:$ptr))>; -def alignedloadv4f32 : PatFrag<(ops node:$ptr), - (v4f32 (alignedload node:$ptr))>; -def alignedloadv2f64 : PatFrag<(ops node:$ptr), - (v2f64 (alignedload node:$ptr))>; -def alignedloadv4i32 : PatFrag<(ops node:$ptr), - (v4i32 (alignedload node:$ptr))>; -def alignedloadv2i64 : PatFrag<(ops node:$ptr), - (v2i64 (alignedload node:$ptr))>; - -// Like 'load', but uses special alignment checks suitable for use in -// memory operands in most SSE instructions, which are required to -// be naturally aligned on some targets but not on others. If the subtarget -// allows unaligned accesses, match any load, though this may require -// setting a feature bit in the processor (on startup, for example). -// Opteron 10h and later implement such a feature. -def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return Subtarget->hasVectorUAMem() - || cast<LoadSDNode>(N)->getAlignment() >= 16; -}]>; - -def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; -def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; -def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; -def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; -def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; -def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; -def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; - -// SSSE3 uses MMX registers for some instructions. They aren't aligned on a -// 16-byte boundary. -// FIXME: 8 byte alignment for mmx reads is not required -def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast<LoadSDNode>(N)->getAlignment() >= 8; -}]>; - -def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>; -def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; -def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; -def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; - -// MOVNT Support -// Like 'store', but requires the non-temporal bit to be set -def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) - return ST->isNonTemporal(); - return false; -}]>; - -def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) - return ST->isNonTemporal() && !ST->isTruncatingStore() && - ST->getAddressingMode() == ISD::UNINDEXED && - ST->getAlignment() >= 16; - return false; -}]>; - -def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) - return ST->isNonTemporal() && - ST->getAlignment() < 16; - return false; -}]>; - -def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; -def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; -def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; -def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; -def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; -def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; - -def vzmovl_v2i64 : PatFrag<(ops node:$src), - (bitconvert (v2i64 (X86vzmovl - (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; -def vzmovl_v4i32 : PatFrag<(ops node:$src), - (bitconvert (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; - -def vzload_v2i64 : PatFrag<(ops node:$src), - (bitconvert (v2i64 (X86vzload node:$src)))>; - - -def fp32imm0 : PatLeaf<(f32 fpimm), [{ - return N->isExactlyValue(+0.0); -}]>; - -// BYTE_imm - Transform bit immediates into byte immediates. -def BYTE_imm : SDNodeXForm<imm, [{ - // Transformation function: imm >> 3 - return getI32Imm(N->getZExtValue() >> 3); -}]>; - -// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, -// SHUFP* etc. imm. -def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShuffleSHUFImmediate(N)); -}]>; - -// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to -// PSHUFHW imm. -def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShufflePSHUFHWImmediate(N)); -}]>; - -// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to -// PSHUFLW imm. -def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); -}]>; - -// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to -// a PALIGNR imm. -def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShufflePALIGNRImmediate(N)); -}]>; - -def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - return SVOp->isSplat() && SVOp->getSplatIndex() == 0; -}]>; - -def movddup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movhlps : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movlhps : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movlp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movshdup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movsldup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def pshufd : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_shuf_imm>; - -def shufp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_shuf_imm>; - -def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_pshufhw_imm>; - -def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_pshuflw_imm>; - -def palign : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_palign_imm>; - -//===----------------------------------------------------------------------===// // SSE scalar FP Instructions //===----------------------------------------------------------------------===// @@ -368,857 +52,642 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in { } //===----------------------------------------------------------------------===// -// SSE1 Instructions +// SSE 1 & 2 Instructions Classes //===----------------------------------------------------------------------===// -// Move Instructions. Register-to-register movss is not used for FR32 -// register copies because it's a partial register update; FsMOVAPSrr is -// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG -// because INSERT_SUBREG requires that the insert be implementable in terms of -// a copy, and just mentioned, we don't use movss for copies. -let Constraints = "$src1 = $dst" in -def MOVSSrr : SSI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", - [(set (v4f32 VR128:$dst), - (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>; +/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class +multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, X86MemOperand x86memop, + bit Is2Addr = 1> { + let isCommutable = 1 in { + def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>; + } + def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>; +} + +/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class +multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, + string asm, string SSEVer, string FPSizeStr, + Operand memopr, ComplexPattern mem_cpat, + bit Is2Addr = 1> { + def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, RC:$src2))]>; + def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, mem_cpat:$src2))]>; +} + +/// sse12_fp_packed - SSE 1 & 2 packed instructions class +multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag, + Domain d, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>; + let mayLoad = 1 in + def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>; +} + +/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class +multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, + string OpcodeStr, X86MemOperand x86memop, + list<dag> pat_rr, list<dag> pat_rm, + bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + pat_rr, d>; + def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + pat_rm, d>; +} + +/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class +multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, + string asm, string SSEVer, string FPSizeStr, + X86MemOperand x86memop, PatFrag mem_frag, + Domain d, bit Is2Addr = 1> { + def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, RC:$src2))], d>; + def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2), + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, (mem_frag addr:$src2)))], d>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move Instructions +//===----------------------------------------------------------------------===// + +class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> : + SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm, + [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>; + +// Loading from memory automatically zeroing upper bits. +class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_pat, string OpcodeStr> : + SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))]>; + +// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 +// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr +// is used instead. Register-to-register movss/movsd is not modeled as an +// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable +// in terms of a copy, and just mentioned, we don't use movss/movsd for copies. +let isAsmParserOnly = 1 in { + def VMOVSSrr : sse12_move_rr<FR32, v4f32, + "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V; + def VMOVSDrr : sse12_move_rr<FR64, v2f64, + "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V; + + let canFoldAsLoad = 1, isReMaterializable = 1 in { + def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX; + + let AddedComplexity = 20 in + def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX; + } +} + +let Constraints = "$src1 = $dst" in { + def MOVSSrr : sse12_move_rr<FR32, v4f32, + "movss\t{$src2, $dst|$dst, $src2}">, XS; + def MOVSDrr : sse12_move_rr<FR64, v2f64, + "movsd\t{$src2, $dst|$dst, $src2}">, XD; +} + +let canFoldAsLoad = 1, isReMaterializable = 1 in { + def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS; + let AddedComplexity = 20 in + def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD; +} + +let AddedComplexity = 15 in { // Extract the low 32-bit value from one vector and insert it into another. -let AddedComplexity = 15 in def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), (MOVSSrr (v4f32 VR128:$src1), (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; +// Extract the low 64-bit value from one vector and insert it into another. +def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; +} // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; +// Implicitly promote a 64-bit scalar to a vector. +def : Pat<(v2f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; -// Loading from memory automatically zeroing upper bits. -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (loadf32 addr:$src))]>; - +let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. -let AddedComplexity = 20 in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; +// MOVSDrm zeros the high parts of the register; represent this +// with SUBREG_TO_REG. +def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; } // Store scalar value to memory. def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", [(store FR32:$src, addr:$dst)]>; +def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(store FR64:$src, addr:$dst)]>; + +let isAsmParserOnly = 1 in { +def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), + "movss\t{$src, $dst|$dst, $src}", + [(store FR32:$src, addr:$dst)]>, XS, VEX_4V; +def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(store FR64:$src, addr:$dst)]>, XD, VEX_4V; +} // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), (MOVSSmr addr:$dst, (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; -// Conversion instructions -def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint FR32:$src))]>; -def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>; -def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), - "cvtsi2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp GR32:$src))]>; -def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), - "cvtsi2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>; - -// Match intrinsics which expect XMM operand(s). -def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), - "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>; -def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>; - -def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvtss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>; -def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvtss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse_cvtss2si - (load addr:$src)))]>; - -// Match intrinsics which expect MM and XMM operand(s). -def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvtps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>; -def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), - "cvtps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtps2pi - (load addr:$src)))]>; -def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>; -def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttps2pi - (load addr:$src)))]>; -let Constraints = "$src1 = $dst" in { - def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR64:$src2), - "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, - VR64:$src2))]>; - def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2), - "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, - (load addr:$src2)))]>; -} - -// Aliases for intrinsics -def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, - (int_x86_sse_cvttss2si VR128:$src))]>; -def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, - (int_x86_sse_cvttss2si(load addr:$src)))]>; - -let Constraints = "$src1 = $dst" in { - def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, GR32:$src2), - "cvtsi2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1, - GR32:$src2))]>; - def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2), - "cvtsi2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1, - (loadi32 addr:$src2)))]>; -} - -// Comparison instructions -let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - def CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in - def CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; - - // Accept explicit immediate argument form instead of comparison code. -let isAsmParserOnly = 1 in { - def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -let mayLoad = 1 in - def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -} -} - -let Defs = [EFLAGS] in { -def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>; -def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>; - -def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", []>; -def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", []>; - -} // Defs = [EFLAGS] - -// Aliases to match intrinsics which expect XMM operand(s). -let Constraints = "$src1 = $dst" in { - def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ss - VR128:$src1, - VR128:$src, imm:$cc))]>; - def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, f32mem:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, - (load addr:$src), imm:$cc))]>; -} - -let Defs = [EFLAGS] in { -def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), - VR128:$src2))]>; -def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), - (load addr:$src2)))]>; - -def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v4f32 VR128:$src1), - VR128:$src2))]>; -def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v4f32 VR128:$src1), - (load addr:$src2)))]>; -} // Defs = [EFLAGS] - -// Aliases of packed SSE1 instructions for scalar use. These all have names -// that start with 'Fs'. - -// Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, - canFoldAsLoad = 1 in - // FIXME: Set encoding to pseudo! -def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, - Requires<[HasSSE1]>, TB, OpSize; - -// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are -// disregarded. +// Move Aligned/Unaligned floating point values +multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, + X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d, + bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in -def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; - -// Alias instruction to load FR32 from f128mem using movaps. Upper bits are -// disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1 in -def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), - "movaps\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; - -// Alias bitwise logical operations using SSE logical ops on packed FP values. -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { - def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst), - (ins FR32:$src1, FR32:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>; - def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst), - (ins FR32:$src1, FR32:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>; - def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst), - (ins FR32:$src1, FR32:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>; -} - -def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f128mem:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fand FR32:$src1, - (memopfsf32 addr:$src2)))]>; -def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f128mem:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86for FR32:$src1, - (memopfsf32 addr:$src2)))]>; -def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f128mem:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fxor FR32:$src1, - (memopfsf32 addr:$src2)))]>; - -let neverHasSideEffects = 1 in { -def FsANDNPSrr : PSI<0x55, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", []>; -let mayLoad = 1 in -def FsANDNPSrm : PSI<0x55, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", []>; -} -} - -/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms. -/// -/// In addition, we also have a special variant of the scalar form here to -/// represent the associated intrinsic operation. This form is unlike the -/// plain scalar form, in that it takes an entire vector (instead of a scalar) -/// and leaves the top elements unmodified (therefore these cannot be commuted). -/// -/// These three forms can each be reg+reg or reg+mem, so there are a total of -/// six "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, Intrinsic F32Int, - bit Commutable = 0> { - // Scalar operation, reg+reg. - def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f32mem:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>; - - // Vector operation, reg+reg. - def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>; - - // Intrinsic operation, reg+reg. - def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]>; - - // Intrinsic operation, reg+mem. - def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F32Int VR128:$src1, - sse_load_f32:$src2))]>; + def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>; +let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in + def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (ld_frag addr:$src))], d>; } -} - -// Arithmetic instructions -defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>; -defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>; -defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>; -defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>; -/// sse1_fp_binop_rm - Other SSE1 binops -/// -/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of -/// instructions for a full-vector intrinsic form. Operations that map -/// onto C operators don't use this form since they just use the plain -/// vector form instead of having a separate vector intrinsic form. -/// -/// This provides a total of eight "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F32Int, - Intrinsic V4F32Int, - bit Commutable = 0> { - - // Scalar operation, reg+reg. - def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f32mem:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>; - - // Vector operation, reg+reg. - def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>; - - // Intrinsic operation, reg+reg. - def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } - - // Intrinsic operation, reg+mem. - def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F32Int VR128:$src1, - sse_load_f32:$src2))]>; - - // Vector intrinsic operation, reg+reg. - def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } - - // Vector intrinsic operation, reg+mem. - def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (V4F32Int VR128:$src1, (memopv4f32 addr:$src2)))]>; -} +let isAsmParserOnly = 1 in { +defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, + "movaps", SSEPackedSingle>, VEX; +defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, + "movapd", SSEPackedDouble>, OpSize, VEX; +defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, + "movups", SSEPackedSingle>, VEX; +defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, + "movupd", SSEPackedDouble, 0>, OpSize, VEX; + +defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, + "movaps", SSEPackedSingle>, VEX; +defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, + "movapd", SSEPackedDouble>, OpSize, VEX; +defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, + "movups", SSEPackedSingle>, VEX; +defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, + "movupd", SSEPackedDouble, 0>, OpSize, VEX; } +defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, + "movaps", SSEPackedSingle>, TB; +defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, + "movapd", SSEPackedDouble>, TB, OpSize; +defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, + "movups", SSEPackedSingle>, TB; +defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, + "movupd", SSEPackedDouble, 0>, TB, OpSize; -defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax, - int_x86_sse_max_ss, int_x86_sse_max_ps>; -defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin, - int_x86_sse_min_ss, int_x86_sse_min_ps>; - -//===----------------------------------------------------------------------===// -// SSE packed FP Instructions - -// Move Instructions -let neverHasSideEffects = 1 in -def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), +let isAsmParserOnly = 1 in { +def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>; - + [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX; +def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX; +def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movups\t{$src, $dst|$dst, $src}", + [(store (v4f32 VR128:$src), addr:$dst)]>, VEX; +def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(store (v2f64 VR128:$src), addr:$dst)]>, VEX; +def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX; +def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX; +def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movups\t{$src, $dst|$dst, $src}", + [(store (v8f32 VR256:$src), addr:$dst)]>, VEX; +def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; +} def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; - -let neverHasSideEffects = 1 in -def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movups\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movups\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (loadv4f32 addr:$src))]>; +def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", [(store (v4f32 VR128:$src), addr:$dst)]>; +def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(store (v2f64 VR128:$src), addr:$dst)]>; -// Intrinsic forms of MOVUPS load and store +// Intrinsic forms of MOVUPS/D load and store +let isAsmParserOnly = 1 in { + let canFoldAsLoad = 1, isReMaterializable = 1 in + def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "movups\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX; + def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX; + def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movups\t{$src, $dst|$dst, $src}", + [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX; + def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX; +} let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; +def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>; + def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>; +def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>; -let Constraints = "$src1 = $dst" in { - let AddedComplexity = 20 in { - def MOVLPSrm : PSI<0x12, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movlps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (movlp VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; - def MOVHPSrm : PSI<0x16, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (movlhps VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" - +// Move Low/High packed floating point values +multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, + PatFrag mov_frag, string base_opc, + string asm_opr> { + def PSrm : PI<opc, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), + !strconcat(!strconcat(base_opc,"s"), asm_opr), + [(set RC:$dst, + (mov_frag RC:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], + SSEPackedSingle>, TB; + + def PDrm : PI<opc, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f64mem:$src2), + !strconcat(!strconcat(base_opc,"d"), asm_opr), + [(set RC:$dst, (v2f64 (mov_frag RC:$src1, + (scalar_to_vector (loadf64 addr:$src2)))))], + SSEPackedDouble>, TB, OpSize; +} -def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), - (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; +let isAsmParserOnly = 1, AddedComplexity = 20 in { + defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; + defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; +} +let Constraints = "$src1 = $dst", AddedComplexity = 20 in { + defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", + "\t{$src2, $dst|$dst, $src2}">; + defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", + "\t{$src2, $dst|$dst, $src2}">; +} +let isAsmParserOnly = 1 in { +def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movlps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), + (iPTR 0))), addr:$dst)]>, VEX; +def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movlpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; +} def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), (iPTR 0))), addr:$dst)]>; +def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movlpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128:$src), + (iPTR 0))), addr:$dst)]>; // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. +let isAsmParserOnly = 1 in { +def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movhps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (unpckh (bc_v2f64 (v4f32 VR128:$src)), + (undef)), (iPTR 0))), addr:$dst)]>, + VEX; +def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movhpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (unpckh VR128:$src, (undef))), + (iPTR 0))), addr:$dst)]>, + VEX; +} def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (unpckh (bc_v2f64 (v4f32 VR128:$src)), (undef)), (iPTR 0))), addr:$dst)]>; +def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movhpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (unpckh VR128:$src, (undef))), + (iPTR 0))), addr:$dst)]>; -let Constraints = "$src1 = $dst" in { -let AddedComplexity = 20 in { -def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - "movlhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; - -def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - "movhlps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; -} // AddedComplexity -} // Constraints = "$src1 = $dst" +let isAsmParserOnly = 1, AddedComplexity = 20 in { + def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>, + VEX_4V; + def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>, + VEX_4V; +} +let Constraints = "$src1 = $dst", AddedComplexity = 20 in { + def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movlhps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; + def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movhlps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; +} +def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; let AddedComplexity = 20 in { -def : Pat<(v4f32 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; -def : Pat<(v2i64 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; + def : Pat<(v4f32 (movddup VR128:$src, (undef))), + (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; + def : Pat<(v2i64 (movddup VR128:$src, (undef))), + (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; } +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Conversion Instructions +//===----------------------------------------------------------------------===// +multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, + string asm> { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (OpNode SrcRC:$src))]>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>; +} -// Arithmetic - -/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms. -/// -/// In addition, we also have a special variant of the scalar form here to -/// represent the associated intrinsic operation. This form is unlike the -/// plain scalar form, in that it takes an entire vector (instead of a -/// scalar) and leaves the top elements undefined. -/// -/// And, we have a special variant form for a full-vector intrinsic form. -/// -/// These four forms can each have a reg or a mem operand, so there are a -/// total of eight "instructions". -/// -multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F32Int, - Intrinsic V4F32Int, - bit Commutable = 0> { - // Scalar operation, reg. - def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]> { - let isCommutable = Commutable; - } - - // Scalar operation, mem. - def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS, - Requires<[HasSSE1, OptForSize]>; +multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d> { + def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (OpNode SrcRC:$src))], d>; + def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>; +} - // Vector operation, reg. - def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> { - let isCommutable = Commutable; - } +multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, + string asm> { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), + asm, []>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), + (ins DstRC:$src1, x86memop:$src), asm, []>; +} - // Vector operation, mem. - def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>; +let isAsmParserOnly = 1 in { +defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, + "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX; +defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; +defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, + "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS, + VEX_4V; +defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, + "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD, + VEX_4V; +} - // Intrinsic operation, reg. - def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int VR128:$src))]> { - let isCommutable = Commutable; - } +defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, + "cvttss2si\t{$src, $dst|$dst, $src}">, XS; +defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD; +defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, + "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS; +defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, + "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD; + +// Conversion Instructions Intrinsics - Match intrinsics which expect MM +// and/or XMM operand(s). +multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d> { + def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (Int SrcRC:$src))], d>; + def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>; +} - // Intrinsic operation, mem. - def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int sse_load_f32:$src))]>; +multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, + string asm> { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (Int SrcRC:$src))]>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>; +} - // Vector intrinsic operation, reg - def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V4F32Int VR128:$src))]> { - let isCommutable = Commutable; - } +multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, + RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, + PatFrag ld_frag, string asm, Domain d> { + def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), + asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>; + def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), + (ins DstRC:$src1, x86memop:$src2), asm, + [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>; +} - // Vector intrinsic operation, mem - def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>; +multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, + RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, + PatFrag ld_frag, string asm> { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), + asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), + (ins DstRC:$src1, x86memop:$src2), asm, + [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>; } -// Square root. -defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt, - int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>; +let isAsmParserOnly = 1 in { + defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS, + VEX; + defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, + VEX; +} +defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS; +defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD; -// Reciprocal approximations. Note that these typically require refinement -// in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt, - int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>; -defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp, - int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>; -// Logical let Constraints = "$src1 = $dst" in { - let isCommutable = 1 in { - def ANDPSrr : PSI<0x54, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 - (and VR128:$src1, VR128:$src2)))]>; - def ORPSrr : PSI<0x56, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 - (or VR128:$src1, VR128:$src2)))]>; - def XORPSrr : PSI<0x57, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 - (xor VR128:$src1, VR128:$src2)))]>; - } - - def ANDPSrm : PSI<0x54, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ORPSrm : PSI<0x56, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def XORPSrm : PSI<0x57, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ANDNPSrr : PSI<0x55, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (and (xor VR128:$src1, - (bc_v2i64 (v4i32 immAllOnesV))), - VR128:$src2)))]>; - def ANDNPSrm : PSI<0x55, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)), - (bc_v2i64 (v4i32 immAllOnesV))), - (memopv2i64 addr:$src2))))]>; + defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse_cvtsi2ss, i32mem, loadi32, + "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS; + defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse2_cvtsi2sd, i32mem, loadi32, + "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD; } +// Instructions below don't have an AVX form. +defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, + f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; +defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi, + f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi, + f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; +defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi, + f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd, + i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; let Constraints = "$src1 = $dst" in { - def CMPPSrri : PSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, - VR128:$src, imm:$cc))]>; - def CMPPSrmi : PSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), - "cmp${cc}ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, - (memop addr:$src), imm:$cc))]>; - - // Accept explicit immediate argument form instead of comparison code. -let isAsmParserOnly = 1 in { - def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), - "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; - def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), - "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; + defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128, + int_x86_sse_cvtpi2ps, + i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; } -} -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; - -// Shuffle and unpack instructions -let Constraints = "$src1 = $dst" in { - let isConvertibleToThreeAddress = 1 in // Convert to pshufd - def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, - VR128:$src2, i8imm:$src3), - "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; - def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, - f128mem:$src2, i8imm:$src3), - "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v4f32 (shufp:$src3 - VR128:$src1, (memopv4f32 addr:$src2))))]>; - - let AddedComplexity = 10 in { - def UNPCKHPSrr : PSI<0x15, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpckhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPSrm : PSI<0x15, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpckhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, - (memopv4f32 addr:$src2))))]>; - - def UNPCKLPSrr : PSI<0x14, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpcklps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPSrm : PSI<0x14, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpcklps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" - -// Mask creation -def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "movmskps\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>; -def MOVMSKPDrr : PDI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "movmskpd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>; - -// Prefetch intrinsic. -def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), - "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; -def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), - "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>; -def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), - "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>; -def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), - "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; -// Non-temporal stores -def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; +/// SSE 1 Only -let AddedComplexity = 400 in { // Prefer non-temporal versions -def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; - -def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; - -def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "movnti\t{$src, $dst|$dst, $src}", - [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, - TB, Requires<[HasSSE2]>; - -def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "movnti\t{$src, $dst|$dst, $src}", - [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, - TB, Requires<[HasSSE2]>; +// Aliases for intrinsics +let isAsmParserOnly = 1, Pattern = []<dag> in { +defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32, + int_x86_sse_cvttss2si, f32mem, load, + "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS; +defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32, + int_x86_sse2_cvttsd2si, f128mem, load, + "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD; } - -// Load, store, and memory fence -def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, - TB, Requires<[HasSSE1]>; - -// MXCSR register -def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), - "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>; -def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), - "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>; - -// Alias instructions that map zero vector to pxor / xorp* for sse. -// We set canFoldAsLoad because this can be converted to a constant-pool -// load of an all-zeros value if folding it would be beneficial. -// FIXME: Change encoding to pseudo! -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1 in { -def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4f32 immAllZerosV))]>; -def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v2f64 immAllZerosV))]>; -let ExeDomain = SSEPackedInt in -def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllZerosV))]>; +defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, + f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">, + XS; +defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, + f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">, + XD; + +let isAsmParserOnly = 1, Pattern = []<dag> in { +defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, + "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX; +defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB, VEX; } - -def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; -def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; -def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; - -def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - -//===---------------------------------------------------------------------===// -// SSE2 Instructions -//===---------------------------------------------------------------------===// - -// Move Instructions. Register-to-register movsd is not used for FR64 -// register copies because it's a partial register update; FsMOVAPDrr is -// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG -// because INSERT_SUBREG requires that the insert be implementable in terms of -// a copy, and just mentioned, we don't use movsd for copies. -let Constraints = "$src1 = $dst" in -def MOVSDrr : SDI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", - [(set (v2f64 VR128:$dst), - (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>; - -// Extract the low 64-bit value from one vector and insert it into another. -let AddedComplexity = 15 in -def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; - -// Implicitly promote a 64-bit scalar to a vector. -def : Pat<(v2f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; - -// Loading from memory automatically zeroing upper bits. -let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in -def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (loadf64 addr:$src))]>; - -// MOVSDrm zeros the high parts of the register; represent this -// with SUBREG_TO_REG. -let AddedComplexity = 20 in { -def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +let Pattern = []<dag> in { +defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, + "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS; +defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */ } -// Store scalar value to memory. -def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>; - -// Extract and store. -def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - addr:$dst), - (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; +/// SSE 2 Only -// Conversion instructions -def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint FR64:$src))]>; -def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>; +// Convert scalar double to scalar single +let isAsmParserOnly = 1 in { +def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), + (ins FR64:$src1, FR64:$src2), + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; +def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), + (ins FR64:$src1, f64mem:$src2), + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V; +} def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))]>; @@ -1226,35 +695,28 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD, Requires<[HasSSE2, OptForSize]>; -def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src), - "cvtsi2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp GR32:$src))]>; -def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src), - "cvtsi2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>; -def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; -def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; -def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>; -def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PSrr : PSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PSrm : PSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", []>; -def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", []>; -def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", []>; - -// SSE2 instructions with XS prefix +let isAsmParserOnly = 1 in +defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, + int_x86_sse2_cvtsd2ss, f64mem, load, + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, + XS, VEX_4V; +let Constraints = "$src1 = $dst" in +defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, + int_x86_sse2_cvtsd2ss, f64mem, load, + "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS; + +// Convert scalar single to scalar double +let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix +def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), + (ins FR32:$src1, FR32:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, XS, Requires<[HasAVX]>, VEX_4V; +def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), + (ins FR32:$src1, f32mem:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>; +} def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))]>, XS, @@ -1264,394 +726,51 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, Requires<[HasSSE2, OptForSize]>; -def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, - Requires<[HasSSE2, OptForSpeed]>; - -// Match intrinsics which expect XMM operand(s). -def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvtsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>; -def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), - "cvtsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_cvtsd2si - (load addr:$src)))]>; - -// Match intrinsics which expect MM and XMM operand(s). -def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvtpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>; -def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), - "cvtpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtpd2pi - (memop addr:$src)))]>; -def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>; -def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttpd2pi - (memop addr:$src)))]>; -def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>; -def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cvtpi2pd - (load addr:$src)))]>; - -// Aliases for intrinsics -def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, - (int_x86_sse2_cvttsd2si VR128:$src))]>; -def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_cvttsd2si - (load addr:$src)))]>; - -// Comparison instructions -let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - def CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in - def CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; - - // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1 in { - def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -let mayLoad = 1 in - def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -} -} - -let Defs = [EFLAGS] in { -def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>; -def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>; -} // Defs = [EFLAGS] - -// Aliases to match intrinsics which expect XMM operand(s). -let Constraints = "$src1 = $dst" in { - def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, - VR128:$src, imm:$cc))]>; - def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, f64mem:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, - (load addr:$src), imm:$cc))]>; -} - -let Defs = [EFLAGS] in { -def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), - VR128:$src2))]>; -def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), - (load addr:$src2)))]>; - -def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v2f64 VR128:$src1), - VR128:$src2))]>; -def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v2f64 VR128:$src1), - (load addr:$src2)))]>; -} // Defs = [EFLAGS] - -// Aliases of packed SSE2 instructions for scalar use. These all have names -// that start with 'Fs'. - -// Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, - canFoldAsLoad = 1 in -def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, - Requires<[HasSSE2]>, TB, OpSize; - -// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are -// disregarded. -let neverHasSideEffects = 1 in -def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; - -// Alias instruction to load FR64 from f128mem using movapd. Upper bits are -// disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1 in -def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; - -// Alias bitwise logical operations using SSE logical ops on packed FP values. -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { - def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst), - (ins FR64:$src1, FR64:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>; - def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst), - (ins FR64:$src1, FR64:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>; - def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst), - (ins FR64:$src1, FR64:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>; -} - -def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f128mem:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fand FR64:$src1, - (memopfsf64 addr:$src2)))]>; -def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f128mem:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86for FR64:$src1, - (memopfsf64 addr:$src2)))]>; -def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f128mem:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fxor FR64:$src1, - (memopfsf64 addr:$src2)))]>; - -let neverHasSideEffects = 1 in { -def FsANDNPDrr : PDI<0x55, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", []>; -let mayLoad = 1 in -def FsANDNPDrm : PDI<0x55, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", []>; -} -} - -/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms. -/// -/// In addition, we also have a special variant of the scalar form here to -/// represent the associated intrinsic operation. This form is unlike the -/// plain scalar form, in that it takes an entire vector (instead of a scalar) -/// and leaves the top elements unmodified (therefore these cannot be commuted). -/// -/// These three forms can each be reg+reg or reg+mem, so there are a total of -/// six "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, Intrinsic F64Int, - bit Commutable = 0> { - // Scalar operation, reg+reg. - def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f64mem:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>; - - // Vector operation, reg+reg. - def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>; - - // Intrinsic operation, reg+reg. - def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]>; - - // Intrinsic operation, reg+mem. - def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, sdmem:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F64Int VR128:$src1, - sse_load_f64:$src2))]>; +def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + VR128:$src2))]>, XS, VEX_4V, + Requires<[HasAVX]>; +def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + (load addr:$src2)))]>, XS, VEX_4V, + Requires<[HasAVX]>; } +let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix +def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "cvtss2sd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + VR128:$src2))]>, XS, + Requires<[HasSSE2]>; +def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + "cvtss2sd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + (load addr:$src2)))]>, XS, + Requires<[HasSSE2]>; } -// Arithmetic instructions -defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>; -defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>; -defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>; -defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>; - -/// sse2_fp_binop_rm - Other SSE2 binops -/// -/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of -/// instructions for a full-vector intrinsic form. Operations that map -/// onto C operators don't use this form since they just use the plain -/// vector form instead of having a separate vector intrinsic form. -/// -/// This provides a total of eight "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F64Int, - Intrinsic V2F64Int, - bit Commutable = 0> { - - // Scalar operation, reg+reg. - def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f64mem:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>; - - // Vector operation, reg+reg. - def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>; - - // Intrinsic operation, reg+reg. - def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } - - // Intrinsic operation, reg+mem. - def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, sdmem:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F64Int VR128:$src1, - sse_load_f64:$src2))]>; - - // Vector intrinsic operation, reg+reg. - def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } +def : Pat<(extloadf32 addr:$src), + (CVTSS2SDrr (MOVSSrm addr:$src))>, + Requires<[HasSSE2, OptForSpeed]>; - // Vector intrinsic operation, reg+mem. - def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (V2F64Int VR128:$src1, - (memopv2f64 addr:$src2)))]>; -} +// Convert doubleword to packed single/double fp +let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix +def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, + TB, VEX, Requires<[HasAVX]>; +def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps + (bitconvert (memopv2i64 addr:$src))))]>, + TB, VEX, Requires<[HasAVX]>; } - -defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax, - int_x86_sse2_max_sd, int_x86_sse2_max_pd>; -defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin, - int_x86_sse2_min_sd, int_x86_sse2_min_pd>; - -//===---------------------------------------------------------------------===// -// SSE packed FP Instructions - -// Move Instructions -let neverHasSideEffects = 1 in -def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>; - -def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; - -let neverHasSideEffects = 1 in -def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1 in -def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (loadv2f64 addr:$src))]>; -def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(store (v2f64 VR128:$src), addr:$dst)]>; - -// Intrinsic forms of MOVUPD load and store -def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>; -def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>; - -let Constraints = "$src1 = $dst" in { - let AddedComplexity = 20 in { - def MOVLPDrm : PDI<0x12, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movlpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (movlp VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))))]>; - def MOVHPDrm : PDI<0x16, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (movlhps VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" - -def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), - "movlpd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract (v2f64 VR128:$src), - (iPTR 0))), addr:$dst)]>; - -// v2f64 extract element 1 is always custom lowered to unpack high to low -// and extract element 0 so the non-store version isn't too horrible. -def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), - "movhpd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract - (v2f64 (unpckh VR128:$src, (undef))), - (iPTR 0))), addr:$dst)]>; - -// SSE2 instructions without OpSize prefix def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, @@ -1662,7 +781,18 @@ def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), (bitconvert (memopv2i64 addr:$src))))]>, TB, Requires<[HasSSE2]>; -// SSE2 instructions with XS prefix +// FIXME: why the non-intrinsic version is described as SSE3? +let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix +def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, + XS, VEX, Requires<[HasAVX]>; +def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd + (bitconvert (memopv2i64 addr:$src))))]>, + XS, VEX, Requires<[HasAVX]>; +} def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, @@ -1673,6 +803,29 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (bitconvert (memopv2i64 addr:$src))))]>, XS, Requires<[HasSSE2]>; +// Convert packed single/double fp to doubleword +let isAsmParserOnly = 1 in { +def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +} +def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>; +def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>; + +let isAsmParserOnly = 1 in { +def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>, + VEX; +def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq + (memop addr:$src)))]>, VEX; +} def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>; @@ -1680,12 +833,54 @@ def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memop addr:$src)))]>; -// SSE2 packed instructions with XS prefix + +let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix +def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + XD, VEX, Requires<[HasAVX]>; +def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq + (memop addr:$src)))]>, + XD, VEX, Requires<[HasAVX]>; +} +def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + XD, Requires<[HasSSE2]>; +def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq + (memop addr:$src)))]>, + XD, Requires<[HasSSE2]>; + + +// Convert with truncation packed single/double fp to doubleword +let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix +def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +} def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>; def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>; + +let isAsmParserOnly = 1 in { +def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq VR128:$src))]>, + XS, VEX, Requires<[HasAVX]>; +def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttps2dq + (memop addr:$src)))]>, + XS, VEX, Requires<[HasAVX]>; +} def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -1697,17 +892,18 @@ def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (memop addr:$src)))]>, XS, Requires<[HasSSE2]>; -// SSE2 packed instructions with XD prefix -def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, - XD, Requires<[HasSSE2]>; -def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))]>, - XD, Requires<[HasSSE2]>; - +let isAsmParserOnly = 1 in { +def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>, + VEX; +def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memop addr:$src)))]>, VEX; +} def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; @@ -1716,12 +912,31 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>; -// SSE2 instructions without OpSize prefix +// Convert packed single to packed double +let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix +def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, + Requires<[HasAVX]>; +def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, + Requires<[HasAVX]>; +} def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; +let isAsmParserOnly = 1 in { +def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, + VEX, Requires<[HasAVX]>; +def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd + (load addr:$src)))]>, + VEX, Requires<[HasAVX]>; +} def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, @@ -1732,12 +947,29 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), (load addr:$src)))]>, TB, Requires<[HasSSE2]>; +// Convert packed double to packed single +let isAsmParserOnly = 1 in { +def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; +// FIXME: the memory form of this instruction should described using +// use extra asm syntax +} def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; +let isAsmParserOnly = 1 in { +def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; +def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps + (memop addr:$src)))]>; +} def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; @@ -1746,269 +978,1039 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memop addr:$src)))]>; -// Match intrinsics which expect XMM operand(s). -// Aliases for intrinsics +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Compare Instructions +//===----------------------------------------------------------------------===// + +// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions +multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, + string asm, string asm_alt> { + def rr : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), + asm, []>; + let mayLoad = 1 in + def rm : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc), + asm, []>; + // Accept explicit immediate argument form instead of comparison code. + let isAsmParserOnly = 1 in { + def rr_alt : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, []>; + let mayLoad = 1 in + def rm_alt : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2), + asm_alt, []>; + } +} + +let neverHasSideEffects = 1, isAsmParserOnly = 1 in { + defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, + "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">, + XS, VEX_4V; + defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, + "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">, + XD, VEX_4V; +} + +let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { + defm CMPSS : sse12_cmp_scalar<FR32, f32mem, + "cmp${cc}ss\t{$src, $dst|$dst, $src}", + "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}">, XS; + defm CMPSD : sse12_cmp_scalar<FR64, f64mem, + "cmp${cc}sd\t{$src, $dst|$dst, $src}", + "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}">, XD; +} + +multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop, + Intrinsic Int, string asm> { + def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src, SSECC:$cc), asm, + [(set VR128:$dst, (Int VR128:$src1, + VR128:$src, imm:$cc))]>; + def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm, + [(set VR128:$dst, (Int VR128:$src1, + (load addr:$src), imm:$cc))]>; +} + +// Aliases to match intrinsics which expect XMM operand(s). +let isAsmParserOnly = 1 in { + defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, + "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">, + XS, VEX_4V; + defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, + "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">, + XD, VEX_4V; +} let Constraints = "$src1 = $dst" in { -def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, GR32:$src2), - "cvtsi2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1, - GR32:$src2))]>; -def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2), - "cvtsi2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1, - (loadi32 addr:$src2)))]>; -def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "cvtsd2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, - VR128:$src2))]>; -def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "cvtsd2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, - (load addr:$src2)))]>; -def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - VR128:$src2))]>, XS, - Requires<[HasSSE2]>; -def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), - "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - (load addr:$src2)))]>, XS, - Requires<[HasSSE2]>; + defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, + "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS; + defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, + "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD; +} + + +// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS +multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, + ValueType vt, X86MemOperand x86memop, + PatFrag ld_frag, string OpcodeStr, Domain d> { + def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>; + def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (OpNode (vt RC:$src1), + (ld_frag addr:$src2)))], d>; +} + +let Defs = [EFLAGS] in { + let isAsmParserOnly = 1 in { + defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + "ucomiss", SSEPackedSingle>, VEX; + defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + "ucomisd", SSEPackedDouble>, OpSize, VEX; + let Pattern = []<dag> in { + defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, VEX; + defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, OpSize, VEX; + } + + defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss", SSEPackedSingle>, VEX; + defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd", SSEPackedDouble>, OpSize, VEX; + + defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, + load, "comiss", SSEPackedSingle>, VEX; + defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, + load, "comisd", SSEPackedDouble>, OpSize, VEX; + } + defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + "ucomiss", SSEPackedSingle>, TB; + defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + "ucomisd", SSEPackedDouble>, TB, OpSize; + + let Pattern = []<dag> in { + defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, TB; + defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, TB, OpSize; + } + + defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss", SSEPackedSingle>, TB; + defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd", SSEPackedDouble>, TB, OpSize; + + defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, TB; + defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, TB, OpSize; +} // Defs = [EFLAGS] + +// sse12_cmp_packed - sse 1 & 2 compared packed instructions +multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, + Intrinsic Int, string asm, string asm_alt, + Domain d> { + def rri : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; + def rmi : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; + // Accept explicit immediate argument form instead of comparison code. + let isAsmParserOnly = 1 in { + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, [], d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2), + asm_alt, [], d>; + } +} + +let isAsmParserOnly = 1 in { + defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, + "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedSingle>, VEX_4V; + defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, + "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; +} +let Constraints = "$src1 = $dst" in { + defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, + "cmp${cc}ps\t{$src, $dst|$dst, $src}", + "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}", + SSEPackedSingle>, TB; + defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, + "cmp${cc}pd\t{$src, $dst|$dst, $src}", + "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", + SSEPackedDouble>, TB, OpSize; +} + +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Shuffle Instructions +//===----------------------------------------------------------------------===// + +/// sse12_shuffle - sse 1 & 2 shuffle instructions +multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, + ValueType vt, string asm, PatFrag mem_frag, + Domain d, bit IsConvertibleToThreeAddress = 0> { + def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm, + [(set VR128:$dst, (vt (shufp:$src3 + VR128:$src1, (mem_frag addr:$src2))))], d>; + let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in + def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm, + [(set VR128:$dst, + (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>; +} + +let isAsmParserOnly = 1 in { + defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, + "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + memopv4f32, SSEPackedSingle>, VEX_4V; + defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, + "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", + memopv2f64, SSEPackedDouble>, OpSize, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { + defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, + "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", + memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>, + TB; + defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, + "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", + memopv2f64, SSEPackedDouble>, TB, OpSize; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Unpack Instructions +//===----------------------------------------------------------------------===// + +/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave +multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt, + PatFrag mem_frag, RegisterClass RC, + X86MemOperand x86memop, string asm, + Domain d> { + def rr : PI<opc, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2), + asm, [(set RC:$dst, + (vt (OpNode RC:$src1, RC:$src2)))], d>; + def rm : PI<opc, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + asm, [(set RC:$dst, + (vt (OpNode RC:$src1, + (mem_frag addr:$src2))))], d>; +} + +let AddedComplexity = 10 in { + let isAsmParserOnly = 1 in { + defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + + defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, + VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, + VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, + VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, + VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + } + + let Constraints = "$src1 = $dst" in { + defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; + defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; + } // Constraints = "$src1 = $dst" +} // AddedComplexity + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Extract Floating-Point Sign mask +//===----------------------------------------------------------------------===// + +/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave +multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, + Domain d> { + def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set GR32:$dst, (Int RC:$src))], d>; +} + +// Mask creation +defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", + SSEPackedSingle>, TB; +defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", + SSEPackedDouble>, TB, OpSize; + +let isAsmParserOnly = 1 in { + defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, + "movmskps", SSEPackedSingle>, VEX; + defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, + "movmskpd", SSEPackedDouble>, OpSize, + VEX; + // FIXME: merge with multiclass above when the intrinsics come. + def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; + def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, + VEX; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions +//===----------------------------------------------------------------------===// + +// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have +// names that start with 'Fs'. + +// Alias instructions that map fld0 to pxor for sse. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, + canFoldAsLoad = 1 in { + // FIXME: Set encoding to pseudo! +def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasSSE1]>, TB, OpSize; +def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, + Requires<[HasSSE2]>, TB, OpSize; } -// Arithmetic +// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper +// bits are disregarded. +let neverHasSideEffects = 1 in { +def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + "movaps\t{$src, $dst|$dst, $src}", []>; +def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), + "movapd\t{$src, $dst|$dst, $src}", []>; +} -/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms. +// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper +// bits are disregarded. +let canFoldAsLoad = 1, isReMaterializable = 1 in { +def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; +def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Logical Instructions +//===----------------------------------------------------------------------===// + +/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops +/// +multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, + SDNode OpNode> { + let isAsmParserOnly = 1 in { + defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, + FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V; + + defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, + FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V; + } + + let Constraints = "$src1 = $dst" in { + defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, + f32, f128mem, memopfsf32, SSEPackedSingle>, TB; + + defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64, + f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize; + } +} + +// Alias bitwise logical operations using SSE logical ops on packed FP values. +let mayLoad = 0 in { + defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>; + defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>; + defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>; +} + +let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in + defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>; + +/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops /// +multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, + SDNode OpNode, int HasPat = 0, + list<list<dag>> Pattern = []> { + let isAsmParserOnly = 1, Pattern = []<dag> in { + defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, + !strconcat(OpcodeStr, "ps"), f128mem, + !if(HasPat, Pattern[0], // rr + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, + VR128:$src2)))]), + !if(HasPat, Pattern[2], // rm + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))]), 0>, + VEX_4V; + + defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, + !strconcat(OpcodeStr, "pd"), f128mem, + !if(HasPat, Pattern[1], // rr + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 + VR128:$src2))))]), + !if(HasPat, Pattern[3], // rm + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))]), 0>, + OpSize, VEX_4V; + } + let Constraints = "$src1 = $dst" in { + defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, + !strconcat(OpcodeStr, "ps"), f128mem, + !if(HasPat, Pattern[0], // rr + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, + VR128:$src2)))]), + !if(HasPat, Pattern[2], // rm + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))])>, TB; + + defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, + !strconcat(OpcodeStr, "pd"), f128mem, + !if(HasPat, Pattern[1], // rr + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 + VR128:$src2))))]), + !if(HasPat, Pattern[3], // rm + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))])>, + TB, OpSize; + } +} + +/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms +/// +let isAsmParserOnly = 1 in { +multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> { + defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, + !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V; + + defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, + !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V; +} +} + +// AVX 256-bit packed logical ops forms +defm VAND : sse12_fp_packed_logical_y<0x54, "and">; +defm VOR : sse12_fp_packed_logical_y<0x56, "or">; +defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">; +let isCommutable = 0 in + defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">; + +defm AND : sse12_fp_packed_logical<0x54, "and", and>; +defm OR : sse12_fp_packed_logical<0x56, "or", or>; +defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; +let isCommutable = 0 in + defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [ + // single r+r + [(set VR128:$dst, (v2i64 (and (xor VR128:$src1, + (bc_v2i64 (v4i32 immAllOnesV))), + VR128:$src2)))], + // double r+r + [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), + (bc_v2i64 (v2f64 VR128:$src2))))], + // single r+m + [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)), + (bc_v2i64 (v4i32 immAllOnesV))), + (memopv2i64 addr:$src2))))], + // double r+m + [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), + (memopv2i64 addr:$src2)))]]>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Arithmetic Instructions +//===----------------------------------------------------------------------===// + +/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and +/// vector forms. +/// +/// In addition, we also have a special variant of the scalar form here to +/// represent the associated intrinsic operation. This form is unlike the +/// plain scalar form, in that it takes an entire vector (instead of a scalar) +/// and leaves the top elements unmodified (therefore these cannot be commuted). +/// +/// These three forms can each be reg+reg or reg+mem. +/// +multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, + bit Is2Addr = 1> { + defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), + OpNode, FR32, f32mem, Is2Addr>, XS; + defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), + OpNode, FR64, f64mem, Is2Addr>, XD; +} + +multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, + bit Is2Addr = 1> { + let mayLoad = 0 in { + defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, + v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB; + defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, + v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize; + } +} + +multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr, + SDNode OpNode> { + let mayLoad = 0 in { + defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, + v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB; + defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, + v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize; + } +} + +multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, + bit Is2Addr = 1> { + defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, + !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS; + defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, + !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD; +} + +multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr, + bit Is2Addr = 1> { + defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128, + !strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32, + SSEPackedSingle, Is2Addr>, TB; + + defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128, + !strconcat(OpcodeStr, "pd"), "2", "_pd", f128mem, memopv2f64, + SSEPackedDouble, Is2Addr>, TB, OpSize; +} + +// Binary Arithmetic instructions +let isAsmParserOnly = 1 in { + defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; + defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; + + let isCommutable = 0 in { + defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; + defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; + defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V; + defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; + } +} + +let Constraints = "$src1 = $dst" in { + defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>, + basic_sse12_fp_binop_p<0x58, "add", fadd>, + basic_sse12_fp_binop_s_int<0x58, "add">; + defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>, + basic_sse12_fp_binop_p<0x59, "mul", fmul>, + basic_sse12_fp_binop_s_int<0x59, "mul">; + + let isCommutable = 0 in { + defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>, + basic_sse12_fp_binop_p<0x5C, "sub", fsub>, + basic_sse12_fp_binop_s_int<0x5C, "sub">; + defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>, + basic_sse12_fp_binop_p<0x5E, "div", fdiv>, + basic_sse12_fp_binop_s_int<0x5E, "div">; + defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_s_int<0x5F, "max">, + basic_sse12_fp_binop_p_int<0x5F, "max">; + defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>, + basic_sse12_fp_binop_p<0x5D, "min", X86fmin>, + basic_sse12_fp_binop_s_int<0x5D, "min">, + basic_sse12_fp_binop_p_int<0x5D, "min">; + } +} + +/// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the /// plain scalar form, in that it takes an entire vector (instead of a /// scalar) and leaves the top elements undefined. /// /// And, we have a special variant form for a full-vector intrinsic form. -/// -/// These four forms can each have a reg or a mem operand, so there are a -/// total of eight "instructions". -/// -multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F64Int, - Intrinsic V2F64Int, - bit Commutable = 0> { - // Scalar operation, reg. + +/// sse1_fp_unop_s - SSE1 unops in scalar form. +multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, + SDNode OpNode, Intrinsic F32Int> { + def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode FR32:$src))]>; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. + def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS, + Requires<[HasSSE1, OptForSize]>; + def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (F32Int VR128:$src))]>; + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (F32Int sse_load_f32:$src))]>; +} + +/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. +multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr, + SDNode OpNode, Intrinsic F32Int> { + def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, XS, Requires<[HasAVX, OptForSize]>; + def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, ssmem:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; +} + +/// sse1_fp_unop_p - SSE1 unops in packed form. +multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> { + def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>; + def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>; +} + +/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form. +multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> { + def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>; + def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>; +} + +/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. +multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, + Intrinsic V4F32Int> { + def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (V4F32Int VR128:$src))]>; + def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>; +} + + +/// sse2_fp_unop_s - SSE2 unops in scalar form. +multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, + SDNode OpNode, Intrinsic F64Int> { def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode FR64:$src))]> { - let isCommutable = Commutable; - } - - // Scalar operation, mem. - def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), + [(set FR64:$dst, (OpNode FR64:$src))]>; + // See the comments in sse1_fp_unop_s for why this is OptForSize. + def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode (load addr:$src)))]>; + [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD, + Requires<[HasSSE2, OptForSize]>; + def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (F64Int VR128:$src))]>; + def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), + !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (F64Int sse_load_f64:$src))]>; +} + +/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. +multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr, + SDNode OpNode, Intrinsic F64Int> { + def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst), + (ins FR64:$src1, f64mem:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, sdmem:$src2), + !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; +} - // Vector operation, reg. +/// sse2_fp_unop_p - SSE2 unops in vector forms. +multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, + SDNode OpNode> { def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> { - let isCommutable = Commutable; - } - - // Vector operation, mem. + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>; def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>; +} - // Intrinsic operation, reg. - def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int VR128:$src))]> { - let isCommutable = Commutable; - } - - // Intrinsic operation, mem. - def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), - !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int sse_load_f64:$src))]>; +/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms. +multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> { + def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>; + def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>; +} - // Vector intrinsic operation, reg +/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms. +multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr, + Intrinsic V2F64Int> { def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V2F64Int VR128:$src))]> { - let isCommutable = Commutable; - } - - // Vector intrinsic operation, mem + [(set VR128:$dst, (V2F64Int VR128:$src))]>; def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // Square root. + defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, + sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, + VEX_4V; + + defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>, + sse2_fp_unop_p<0x51, "vsqrt", fsqrt>, + sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>, + sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>, + VEX; + + // Reciprocal approximations. Note that these typically require refinement + // in order to obtain suitable precision. + defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt, + int_x86_sse_rsqrt_ss>, VEX_4V; + defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>, + sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX; + + defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, + VEX_4V; + defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>, + sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX; +} + // Square root. -defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt, - int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>; +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt>, + sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, + sse2_fp_unop_p<0x51, "sqrt", fsqrt>, + sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>; + +// Reciprocal approximations. Note that these typically require refinement +// in order to obtain suitable precision. +defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>, + sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>; +defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, + sse1_fp_unop_p<0x53, "rcp", X86frcp>, + sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>; // There is no f64 version of the reciprocal approximation instructions. -// Logical -let Constraints = "$src1 = $dst" in { - let isCommutable = 1 in { - def ANDPDrr : PDI<0x54, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))]>; - def ORPDrr : PDI<0x56, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (or (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))]>; - def XORPDrr : PDI<0x57, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (xor (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))]>; - } +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Non-temporal stores +//===----------------------------------------------------------------------===// - def ANDPDrm : PDI<0x54, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ORPDrm : PDI<0x56, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (or (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def XORPDrm : PDI<0x57, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (xor (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ANDNPDrr : PDI<0x55, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), - (bc_v2i64 (v2f64 VR128:$src2))))]>; - def ANDNPDrm : PDI<0x55, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), - (memopv2i64 addr:$src2)))]>; +let isAsmParserOnly = 1 in { + def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX; + def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX; + + let ExeDomain = SSEPackedInt in + def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX; + + let AddedComplexity = 400 in { // Prefer non-temporal versions + def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)]>, VEX; + def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)]>, VEX; + def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)]>, VEX; + let ExeDomain = SSEPackedInt in + def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)]>, VEX; + + def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)]>, VEX; + def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)]>, VEX; + def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)]>, VEX; + let ExeDomain = SSEPackedInt in + def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)]>, VEX; + } } -let Constraints = "$src1 = $dst" in { - def CMPPDrri : PDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, - VR128:$src, imm:$cc))]>; - def CMPPDrmi : PDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), - "cmp${cc}pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, - (memop addr:$src), imm:$cc))]>; +def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; +def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; + +let ExeDomain = SSEPackedInt in +def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; + +let AddedComplexity = 400 in { // Prefer non-temporal versions +def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; +def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; + +def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; + +let ExeDomain = SSEPackedInt in +def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + +// There is no AVX form for instructions below this point +def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; + +def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; - // Accept explicit immediate argument form instead of comparison code. -let isAsmParserOnly = 1 in { - def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), - "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; - def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), - "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; } +def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, + TB, Requires<[HasSSE2]>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Misc Instructions (No AVX form) +//===----------------------------------------------------------------------===// + +// Prefetch intrinsic. +def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), + "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; +def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), + "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>; +def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), + "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>; +def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), + "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; + +// Load, store, and memory fence +def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, + TB, Requires<[HasSSE1]>; + +// Alias instructions that map zero vector to pxor / xorp* for sse. +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-zeros value if folding it would be beneficial. +// FIXME: Change encoding to pseudo! +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1 in { +def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4f32 immAllZerosV))]>; +def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v2f64 immAllZerosV))]>; +let ExeDomain = SSEPackedInt in +def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllZerosV))]>; } -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; -// Shuffle and unpack instructions -let Constraints = "$src1 = $dst" in { - def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; - def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, - f128mem:$src2, i8imm:$src3), - "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v2f64 (shufp:$src3 - VR128:$src1, (memopv2f64 addr:$src2))))]>; +def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; - let AddedComplexity = 10 in { - def UNPCKHPDrr : PDI<0x15, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpckhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPDrm : PDI<0x15, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpckhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, - (memopv2f64 addr:$src2))))]>; - - def UNPCKLPDrr : PDI<0x14, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpcklpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPDrm : PDI<0x14, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpcklpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" +def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Load/Store XCSR register +//===----------------------------------------------------------------------===// +let isAsmParserOnly = 1 in { + def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), + "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX; + def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), + "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX; +} + +def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), + "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>; +def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), + "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>; //===---------------------------------------------------------------------===// -// SSE integer instructions -let ExeDomain = SSEPackedInt in { +// SSE2 - Move Aligned/Unaligned Packed Integer Instructions +//===---------------------------------------------------------------------===// +let ExeDomain = SSEPackedInt in { // SSE integer instructions + +let isAsmParserOnly = 1 in { + let neverHasSideEffects = 1 in + def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; + + let canFoldAsLoad = 1, mayLoad = 1 in { + def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", + [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>, + VEX; + def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, + XS, VEX, Requires<[HasAVX]>; + } + + let mayStore = 1 in { + def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", + [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX; + def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, + XS, VEX, Requires<[HasAVX]>; + } +} -// Move Instructions let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, mayLoad = 1 in + +let canFoldAsLoad = 1, mayLoad = 1 in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; -let mayStore = 1 in -def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", - [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; -let canFoldAsLoad = 1, mayLoad = 1 in def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, XS, Requires<[HasSSE2]>; -let mayStore = 1 in +} + +let mayStore = 1 in { +def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", + [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, XS, Requires<[HasSSE2]>; +} // Intrinsic forms of MOVDQU load and store +let isAsmParserOnly = 1 in { +let canFoldAsLoad = 1 in +def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>, + XS, VEX, Requires<[HasAVX]>; +def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, + XS, VEX, Requires<[HasAVX]>; +} + let canFoldAsLoad = 1 in def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", @@ -2019,55 +2021,72 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, XS, Requires<[HasSSE2]>; -let Constraints = "$src1 = $dst" in { +} // ExeDomain = SSEPackedInt + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Arithmetic Instructions +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { // SSE integer instructions multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, - bit Commutable = 0> { + bit IsCommutable = 0, bit Is2Addr = 1> { + let isCommutable = IsCommutable in def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId VR128:$src1, - (bitconvert (memopv2i64 - addr:$src2))))]>; + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))))]>; } multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, - string OpcodeStr, - Intrinsic IntId, Intrinsic IntId2> { + string OpcodeStr, Intrinsic IntId, + Intrinsic IntId2, bit Is2Addr = 1> { def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId VR128:$src1, + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>; def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; + (ins VR128:$src1, i32i8imm:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; } /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Commutable = 0> { + ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> { + let isCommutable = IsCommutable in def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpVT (OpNode VR128:$src1, + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpVT (OpNode VR128:$src1, (bitconvert (memopv2i64 addr:$src2)))))]>; } @@ -2077,64 +2096,177 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, /// to collapse (bitconvert VT to VT) into its operand. /// multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode, - bit Commutable = 0> { + bit IsCommutable = 0, bit Is2Addr = 1> { + let isCommutable = IsCommutable in def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, - (memopv2i64 addr:$src2)))]>; + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>; } -} // Constraints = "$src1 = $dst" } // ExeDomain = SSEPackedInt // 128-bit Integer Arithmetic -defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>; -defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>; -defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>; -defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; - -defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>; -defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>; -defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>; -defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V; +defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V; +defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V; +defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V; +defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V; +defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V; +defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V; +defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V; +defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V; + +// Intrinsic forms +defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>, + VEX_4V; +defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>, + VEX_4V; +defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>, + VEX_4V; +defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>, + VEX_4V; +defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>, + VEX_4V; +defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>, + VEX_4V; +defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>, + VEX_4V; +defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>, + VEX_4V; +defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>, + VEX_4V; +defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>, + VEX_4V; +defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>, + VEX_4V; +defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>, + VEX_4V; +defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>, + VEX_4V; +defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>, + VEX_4V; +defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>, + VEX_4V; +defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>, + VEX_4V; +defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>, + VEX_4V; +defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>, + VEX_4V; +defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>, + VEX_4V; +} +let Constraints = "$src1 = $dst" in { +defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>; +defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>; +defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>; +defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; +defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>; defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>; defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>; defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>; defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>; +// Intrinsic forms defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>; defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>; defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>; defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>; - -defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>; - +defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>; +defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>; +defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>; +defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>; defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>; -defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>; +defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>; defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>; - defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>; +defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>; +defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>; +defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>; +defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>; +defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>; +defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>; +defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; + +} // Constraints = "$src1 = $dst" -defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>; -defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Logical Instructions +//===---------------------------------------------------------------------===// +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", + int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>, + VEX_4V; +defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", + int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>, + VEX_4V; +defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", + int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>, + VEX_4V; + +defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", + int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>, + VEX_4V; +defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", + int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>, + VEX_4V; +defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", + int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>, + VEX_4V; + +defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", + int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>, + VEX_4V; +defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", + int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>, + VEX_4V; + +defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V; +defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V; +defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V; -defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>; -defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>; -defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>; -defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>; -defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; +let ExeDomain = SSEPackedInt in { + let neverHasSideEffects = 1 in { + // 128-bit logical shifts. + def VPSLLDQri : PDIi8<0x73, MRM7r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + def VPSRLDQri : PDIi8<0x73, MRM3r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + // PSRADQri doesn't exist in SSE[1-3]. + } + def VPANDNrr : PDI<0xDF, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + VR128:$src2)))]>, VEX_4V; + def VPANDNrm : PDI<0xDF, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + (memopv2i64 addr:$src2))))]>, + VEX_4V; +} +} +let Constraints = "$src1 = $dst" in { defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w, int_x86_sse2_pslli_w>; defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", @@ -2154,17 +2286,34 @@ defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d, int_x86_sse2_psrai_d>; -// 128-bit logical shifts. -let Constraints = "$src1 = $dst", neverHasSideEffects = 1, - ExeDomain = SSEPackedInt in { - def PSLLDQri : PDIi8<0x73, MRM7r, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - "pslldq\t{$src2, $dst|$dst, $src2}", []>; - def PSRLDQri : PDIi8<0x73, MRM3r, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - "psrldq\t{$src2, $dst|$dst, $src2}", []>; - // PSRADQri doesn't exist in SSE[1-3]. +defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; +defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>; +defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>; + +let ExeDomain = SSEPackedInt in { + let neverHasSideEffects = 1 in { + // 128-bit logical shifts. + def PSLLDQri : PDIi8<0x73, MRM7r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "pslldq\t{$src2, $dst|$dst, $src2}", []>; + def PSRLDQri : PDIi8<0x73, MRM3r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "psrldq\t{$src2, $dst|$dst, $src2}", []>; + // PSRADQri doesn't exist in SSE[1-3]. + } + def PANDNrr : PDI<0xDF, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + VR128:$src2)))]>; + + def PANDNrm : PDI<0xDF, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + (memopv2i64 addr:$src2))))]>; } +} // Constraints = "$src1 = $dst" let Predicates = [HasSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), @@ -2185,32 +2334,33 @@ let Predicates = [HasSSE2] in { (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; } -// Logical -defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; -defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>; -defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>; - -let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in { - def PANDNrr : PDI<0xDF, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - VR128:$src2)))]>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Comparison Instructions +//===---------------------------------------------------------------------===// - def PANDNrm : PDI<0xDF, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - (memopv2i64 addr:$src2))))]>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1, + 0>, VEX_4V; + defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1, + 0>, VEX_4V; + defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1, + 0>, VEX_4V; + defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0, + 0>, VEX_4V; + defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0, + 0>, VEX_4V; + defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0, + 0>, VEX_4V; } -// SSE2 Integer comparison -defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>; -defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>; -defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>; -defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>; -defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>; -defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>; +let Constraints = "$src1 = $dst" in { + defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>; + defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>; + defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>; + defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>; + defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>; + defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>; +} // Constraints = "$src1 = $dst" def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), (PCMPEQBrr VR128:$src1, VR128:$src2)>; @@ -2238,94 +2388,147 @@ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), (PCMPGTDrm VR128:$src1, addr:$src2)>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Pack Instructions +//===---------------------------------------------------------------------===// -// Pack instructions +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128, + 0, 0>, VEX_4V; +defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128, + 0, 0>, VEX_4V; +defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128, + 0, 0>, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>; defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>; defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; +} // Constraints = "$src1 = $dst" + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Shuffle Instructions +//===---------------------------------------------------------------------===// let ExeDomain = SSEPackedInt in { +multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag, + PatFrag bc_frag> { +def ri : Ii8<0x70, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1, + (undef))))]>; +def mi : Ii8<0x70, MRMSrcMem, + (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (vt (pshuf_frag:$src2 + (bc_frag (memopv2i64 addr:$src1)), + (undef))))]>; +} +} // ExeDomain = SSEPackedInt -// Shuffle and unpack instructions -let AddedComplexity = 5 in { -def PSHUFDri : PDIi8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (pshufd:$src2 - VR128:$src1, (undef))))]>; -def PSHUFDmi : PDIi8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (pshufd:$src2 - (bc_v4i32 (memopv2i64 addr:$src1)), - (undef))))]>; -} - -// SSE2 with ImmT == Imm8 and XS prefix. -def PSHUFHWri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1, - (undef))))]>, - XS, Requires<[HasSSE2]>; -def PSHUFHWmi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshufhw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, - XS, Requires<[HasSSE2]>; - -// SSE2 with ImmT == Imm8 and XD prefix. -def PSHUFLWri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1, - (undef))))]>, - XD, Requires<[HasSSE2]>; -def PSHUFLWmi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshuflw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, - XD, Requires<[HasSSE2]>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + let AddedComplexity = 5 in + defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize, + VEX; + // SSE2 with ImmT == Imm8 and XS prefix. + defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS, + VEX; -let Constraints = "$src1 = $dst" in { - def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2))))]>; - def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", + // SSE2 with ImmT == Imm8 and XD prefix. + defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD, + VEX; +} + +let Predicates = [HasSSE2] in { + let AddedComplexity = 5 in + defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize; + + // SSE2 with ImmT == Imm8 and XS prefix. + defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS; + + // SSE2 with ImmT == Imm8 and XD prefix. + defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD; +} + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Unpack Instructions +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { +multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, + PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> { + def rr : PDI<opc, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>; + def rm : PDI<opc, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (unp_frag VR128:$src1, + (bc_frag (memopv2i64 + addr:$src2))))]>; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8, + 0>, VEX_4V; + defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16, + 0>, VEX_4V; + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32, + 0>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", + (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V; + def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2))))]>; - def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", + (v2i64 (unpckl VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; + + defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8, + 0>, VEX_4V; + defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16, + 0>, VEX_4V; + defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32, + 0>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, + (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V; + def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>; + (v2i64 (unpckh VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>; + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>; + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", @@ -2338,39 +2541,12 @@ let Constraints = "$src1 = $dst" in { (v2i64 (unpckl VR128:$src1, (memopv2i64 addr:$src2))))]>; - def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2))))]>; - def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2))))]>; - def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>; + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>; + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>; + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", @@ -2384,102 +2560,117 @@ let Constraints = "$src1 = $dst" in { (memopv2i64 addr:$src2))))]>; } -// Extract / Insert +} // ExeDomain = SSEPackedInt + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Extract and Insert +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { +multiclass sse2_pinsrw<bit Is2Addr = 1> { + def rri : Ii8<0xC4, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, + GR32:$src2, i32i8imm:$src3), + !if(Is2Addr, + "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", + "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>; + def rmi : Ii8<0xC4, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, + i16mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", + "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), + imm:$src3))]>; +} + +// Extract +let isAsmParserOnly = 1, Predicates = [HasAVX] in +def VPEXTRWri : Ii8<0xC5, MRMSrcReg, + (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), + imm:$src2))]>, OpSize, VEX; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), imm:$src2))]>; -let Constraints = "$src1 = $dst" in { - def PINSRWrri : PDIi8<0xC4, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, - GR32:$src2, i32i8imm:$src3), - "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>; - def PINSRWrmi : PDIi8<0xC4, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, - i16mem:$src2, i32i8imm:$src3), - "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))]>; -} -// Mask creation -def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; +// Insert +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V; -// Conditional store -let Uses = [EDI] in -def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), - "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; - -let Uses = [RDI] in -def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), - "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; +let Constraints = "$src1 = $dst" in + defm VPINSRW : sse2_pinsrw, TB, OpSize; } // ExeDomain = SSEPackedInt -// Non-temporal stores -def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; -let ExeDomain = SSEPackedInt in -def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; -def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "movnti\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, - TB, Requires<[HasSSE2]>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Mask Creation +//===---------------------------------------------------------------------===// -let AddedComplexity = 400 in { // Prefer non-temporal versions -def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; +let ExeDomain = SSEPackedInt in { -let ExeDomain = SSEPackedInt in -def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; -} +let isAsmParserOnly = 1 in +def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX; +def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; -// Flush cache -def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), - "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, - TB, Requires<[HasSSE2]>; +} // ExeDomain = SSEPackedInt -// Load, store, and memory fence -def LFENCE : I<0xAE, MRM_E8, (outs), (ins), - "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; -def MFENCE : I<0xAE, MRM_F0, (outs), (ins), - "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; +//===---------------------------------------------------------------------===// +// SSE2 - Conditional Store +//===---------------------------------------------------------------------===// -// Pause. This "instruction" is encoded as "rep; nop", so even though it -// was introduced with SSE2, it's backward compatible. -def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; +let ExeDomain = SSEPackedInt in { -//TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 0)), (NOOP)>; -def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; -def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 1)), (MFENCE)>; +let isAsmParserOnly = 1 in { +let Uses = [EDI] in +def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), + (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX; +let Uses = [RDI] in +def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), + (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX; +} -// Alias instructions that map zero vector to pxor / xorp* for sse. -// We set canFoldAsLoad because this can be converted to a constant-pool -// load of an all-ones value if folding it would be beneficial. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt in - // FIXME: Change encoding to pseudo. - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>; +let Uses = [EDI] in +def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; +let Uses = [RDI] in +def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; + +} // ExeDomain = SSEPackedInt + +//===---------------------------------------------------------------------===// +// SSE2 - Move Doubleword +//===---------------------------------------------------------------------===// +// Move Int Doubleword to Packed Double Int +let isAsmParserOnly = 1 in { +def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector GR32:$src)))]>, VEX; +def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, + VEX; +} def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2489,6 +2680,18 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; + +// Move Int Doubleword to Single Scalar +let isAsmParserOnly = 1 in { +def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX; + +def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, + VEX; +} def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>; @@ -2497,20 +2700,18 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>; -// SSE2 instructions with XS prefix -def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "movq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, - Requires<[HasSSE2]>; -def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), - "movq\t{$src, $dst|$dst, $src}", - [(store (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))), addr:$dst)]>; - -def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; - +// Move Packed Doubleword Int to Packed Double Int +let isAsmParserOnly = 1 in { +def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), + (iPTR 0)))]>, VEX; +def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, VR128:$src), + "movd\t{$src, $dst|$dst, $src}", + [(store (i32 (vector_extract (v4i32 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; +} def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2520,6 +2721,15 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)]>; +// Move Scalar Single to Double Int +let isAsmParserOnly = 1 in { +def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX; +def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX; +} def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>; @@ -2527,25 +2737,38 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; -// Store / copy lower 64-bits of a XMM register. -def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), - "movq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; - // movd / movq to XMM register zero-extends +let AddedComplexity = 15, isAsmParserOnly = 1 in { +def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector GR32:$src)))))]>, + VEX; +def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only + [(set VR128:$dst, (v2i64 (X86vzmovl + (v2i64 (scalar_to_vector GR64:$src)))))]>, + VEX, VEX_W; +} let AddedComplexity = 15 in { def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))))]>; -// This is X86-64 only. def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))))]>; } let AddedComplexity = 20 in { +let isAsmParserOnly = 1 in +def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (X86vzmovl (v4i32 (scalar_to_vector + (loadi32 addr:$src))))))]>, + VEX; def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2558,13 +2781,63 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; +} +//===---------------------------------------------------------------------===// +// SSE2 - Move Quadword +//===---------------------------------------------------------------------===// + +// Move Quadword Int to Packed Quadword Int +let isAsmParserOnly = 1 in +def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, + VEX, Requires<[HasAVX]>; +def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, + Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix + +// Move Packed Quadword Int to Quadword Int +let isAsmParserOnly = 1 in +def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; +def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))), addr:$dst)]>; + +def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + +// Store / copy lower 64-bits of a XMM register. +let isAsmParserOnly = 1 in +def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; +def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; + +let AddedComplexity = 20, isAsmParserOnly = 1 in +def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (X86vzmovl (v2i64 (scalar_to_vector + (loadi64 addr:$src))))))]>, + XS, VEX, Requires<[HasAVX]>; + +let AddedComplexity = 20 in { def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector - (loadi64 addr:$src))))))]>, XS, - Requires<[HasSSE2]>; + (loadi64 addr:$src))))))]>, + XS, Requires<[HasSSE2]>; def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZQI2PQIrm addr:$src)>; @@ -2575,12 +2848,23 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. +let isAsmParserOnly = 1, AddedComplexity = 15 in +def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, + XS, VEX, Requires<[HasAVX]>; let AddedComplexity = 15 in def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, XS, Requires<[HasSSE2]>; +let AddedComplexity = 20, isAsmParserOnly = 1 in +def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (X86vzmovl + (loadv2i64 addr:$src))))]>, + XS, VEX, Requires<[HasAVX]>; let AddedComplexity = 20 in { def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movq\t{$src, $dst|$dst, $src}", @@ -2592,49 +2876,136 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), (MOVZPQILo2PQIrm addr:$src)>; } +// Instructions to match in the assembler +let isAsmParserOnly = 1 in { +// This instructions is in fact an alias to movd with 64 bit dst +def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; +def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; +} + // Instructions for the disassembler // xr = XMM register // xm = mem64 +let isAsmParserOnly = 1 in +def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", []>, XS; //===---------------------------------------------------------------------===// -// SSE3 Instructions +// SSE2 - Misc Instructions //===---------------------------------------------------------------------===// -// Move Instructions -def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (movshdup - VR128:$src, (undef))))]>; -def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (movshdup - (memopv4f32 addr:$src), (undef)))]>; +// Flush cache +def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), + "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, + TB, Requires<[HasSSE2]>; + +// Load, store, and memory fence +def LFENCE : I<0xAE, MRM_E8, (outs), (ins), + "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; +def MFENCE : I<0xAE, MRM_F0, (outs), (ins), + "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; + +// Pause. This "instruction" is encoded as "rep; nop", so even though it +// was introduced with SSE2, it's backward compatible. +def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; + +//TODO: custom lower this so as to never even generate the noop +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), + (i8 0)), (NOOP)>; +def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; +def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), + (i8 1)), (MFENCE)>; + +// Alias instructions that map zero vector to pxor / xorp* for sse. +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-ones value if folding it would be beneficial. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1, ExeDomain = SSEPackedInt in + // FIXME: Change encoding to pseudo. + def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>; -def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (movsldup +//===---------------------------------------------------------------------===// +// SSE3 - Conversion Instructions +//===---------------------------------------------------------------------===// + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +} + +def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; + +//===---------------------------------------------------------------------===// +// SSE3 - Move Instructions +//===---------------------------------------------------------------------===// + +// Replicate Single FP +multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> { +def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (v4f32 (rep_frag VR128:$src, (undef))))]>; -def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (movsldup +def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (rep_frag (memopv4f32 addr:$src), (undef)))]>; +} -def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movddup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; -def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "movddup\t{$src, $dst|$dst, $src}", +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; +defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; +} +defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">; +defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">; + +// Replicate Double FP +multiclass sse3_replicate_dfp<string OpcodeStr> { +def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; +def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)), (undef))))]>; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; +defm MOVDDUP : sse3_replicate_dfp<"movddup">; + +// Move Unaligned Integer +let isAsmParserOnly = 1 in + def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vlddqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; +def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "lddqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; +// Several Move patterns let AddedComplexity = 5 in { def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; @@ -2646,52 +3017,98 @@ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; } -// Arithmetic -let Constraints = "$src1 = $dst" in { - def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "addsubps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1, - VR128:$src2))]>; - def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "addsubps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1, - (memop addr:$src2)))]>; - def ADDSUBPDrr : S3I<0xD0, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "addsubpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1, - VR128:$src2))]>; - def ADDSUBPDrm : S3I<0xD0, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "addsubpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1, - (memop addr:$src2)))]>; +// vector_shuffle v1, <undef> <1, 1, 3, 3> +let AddedComplexity = 15 in +def : Pat<(v4i32 (movshdup VR128:$src, (undef))), + (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; +let AddedComplexity = 20 in +def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), + (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; + +// vector_shuffle v1, <undef> <0, 0, 2, 2> +let AddedComplexity = 15 in + def : Pat<(v4i32 (movsldup VR128:$src, (undef))), + (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; +let AddedComplexity = 20 in + def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), + (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; + +//===---------------------------------------------------------------------===// +// SSE3 - Arithmetic +//===---------------------------------------------------------------------===// + +multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> { + def rr : I<0xD0, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (Int VR128:$src1, + VR128:$src2))]>; + def rm : I<0xD0, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (Int VR128:$src1, + (memop addr:$src2)))]>; + } -def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "lddqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; +let isAsmParserOnly = 1, Predicates = [HasAVX], + ExeDomain = SSEPackedDouble in { + defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD, + VEX_4V; + defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize, + VEX_4V; +} +let Constraints = "$src1 = $dst", Predicates = [HasSSE3], + ExeDomain = SSEPackedDouble in { + defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD; + defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize; +} + +//===---------------------------------------------------------------------===// +// SSE3 Instructions +//===---------------------------------------------------------------------===// // Horizontal ops -class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId> +class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>; -class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId> +class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>; -class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId> +class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>; -class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId> +class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; + def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; + def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; + def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; + def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; + def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; + def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; + def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; +} + let Constraints = "$src1 = $dst" in { def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>; def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>; @@ -2703,35 +3120,14 @@ let Constraints = "$src1 = $dst" in { def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; } -// Thread synchronization -def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", - [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; -def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", - [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; - -// vector_shuffle v1, <undef> <1, 1, 3, 3> -let AddedComplexity = 15 in -def : Pat<(v4i32 (movshdup VR128:$src, (undef))), - (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; -let AddedComplexity = 20 in -def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), - (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; - -// vector_shuffle v1, <undef> <0, 0, 2, 2> -let AddedComplexity = 15 in - def : Pat<(v4i32 (movsldup VR128:$src, (undef))), - (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; -let AddedComplexity = 20 in - def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), - (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; - //===---------------------------------------------------------------------===// -// SSSE3 Instructions +// SSSE3 - Packed Absolute Instructions //===---------------------------------------------------------------------===// -/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8. -multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128> { +/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. +multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, + PatFrag mem_frag64, PatFrag mem_frag128, + Intrinsic IntId64, Intrinsic IntId128> { def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 VR64:$src))]>; @@ -2739,7 +3135,7 @@ multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr, def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, - (IntId64 (bitconvert (memopv8i8 addr:$src))))]>; + (IntId64 (bitconvert (mem_frag64 addr:$src))))]>; def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -2752,240 +3148,203 @@ multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 - (bitconvert (memopv16i8 addr:$src))))]>, OpSize; + (bitconvert (mem_frag128 addr:$src))))]>, OpSize; } -/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16. -multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, (IntId64 VR64:$src))]>; - - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins i64mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, - (IntId64 - (bitconvert (memopv4i16 addr:$src))))]>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8, + int_x86_ssse3_pabs_b, + int_x86_ssse3_pabs_b_128>, VEX; + defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16, + int_x86_ssse3_pabs_w, + int_x86_ssse3_pabs_w_128>, VEX; + defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32, + int_x86_ssse3_pabs_d, + int_x86_ssse3_pabs_d_128>, VEX; +} - def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (IntId128 VR128:$src))]>, - OpSize; +defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8, + int_x86_ssse3_pabs_b, + int_x86_ssse3_pabs_b_128>; +defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16, + int_x86_ssse3_pabs_w, + int_x86_ssse3_pabs_w_128>; +defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32, + int_x86_ssse3_pabs_d, + int_x86_ssse3_pabs_d_128>; - def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), - (ins i128mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, - (IntId128 - (bitconvert (memopv8i16 addr:$src))))]>, OpSize; -} +//===---------------------------------------------------------------------===// +// SSSE3 - Packed Binary Operator Instructions +//===---------------------------------------------------------------------===// -/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32. -multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128> { +/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. +multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, + PatFrag mem_frag64, PatFrag mem_frag128, + Intrinsic IntId64, Intrinsic IntId128, + bit Is2Addr = 1> { + let isCommutable = 1 in def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, (IntId64 VR64:$src))]>; - + (ins VR64:$src1, VR64:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins i64mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, - (IntId64 - (bitconvert (memopv2i32 addr:$src))))]>; + (ins VR64:$src1, i64mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR64:$dst, + (IntId64 VR64:$src1, + (bitconvert (memopv8i8 addr:$src2))))]>; + let isCommutable = 1 in def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (IntId128 VR128:$src))]>, - OpSize; - + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, + OpSize; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), - (ins i128mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, - (IntId128 - (bitconvert (memopv4i32 addr:$src))))]>, OpSize; + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb", - int_x86_ssse3_pabs_b, - int_x86_ssse3_pabs_b_128>; -defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw", - int_x86_ssse3_pabs_w, - int_x86_ssse3_pabs_w_128>; -defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd", - int_x86_ssse3_pabs_d, - int_x86_ssse3_pabs_d_128>; - -/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8. -let Constraints = "$src1 = $dst" in { - multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128, - bit Commutable = 0> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> { - let isCommutable = Commutable; - } - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, - (IntId64 VR64:$src1, - (bitconvert (memopv8i8 addr:$src2))))]>; - - def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; - } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let isCommutable = 0 in { + defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_w, + int_x86_ssse3_phadd_w_128, 0>, VEX_4V; + defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32, + int_x86_ssse3_phadd_d, + int_x86_ssse3_phadd_d_128, 0>, VEX_4V; + defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_sw, + int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; + defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_w, + int_x86_ssse3_phsub_w_128, 0>, VEX_4V; + defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32, + int_x86_ssse3_phsub_d, + int_x86_ssse3_phsub_d_128, 0>, VEX_4V; + defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_sw, + int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; + defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8, + int_x86_ssse3_pmadd_ub_sw, + int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; + defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8, + int_x86_ssse3_pshuf_b, + int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; + defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8, + int_x86_ssse3_psign_b, + int_x86_ssse3_psign_b_128, 0>, VEX_4V; + defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16, + int_x86_ssse3_psign_w, + int_x86_ssse3_psign_w_128, 0>, VEX_4V; + defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32, + int_x86_ssse3_psign_d, + int_x86_ssse3_psign_d_128, 0>, VEX_4V; } - -/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16. -let Constraints = "$src1 = $dst" in { - multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128, - bit Commutable = 0> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> { - let isCommutable = Commutable; - } - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, - (IntId64 VR64:$src1, - (bitconvert (memopv4i16 addr:$src2))))]>; - - def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv8i16 addr:$src2))))]>, OpSize; - } +defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16, + int_x86_ssse3_pmul_hr_sw, + int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; } -/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32. -let Constraints = "$src1 = $dst" in { - multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128, - bit Commutable = 0> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> { - let isCommutable = Commutable; - } - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, - (IntId64 VR64:$src1, - (bitconvert (memopv2i32 addr:$src2))))]>; - - def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv4i32 addr:$src2))))]>, OpSize; - } +// None of these have i8 immediate fields. +let ImmT = NoImm, Constraints = "$src1 = $dst" in { +let isCommutable = 0 in { + defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_w, + int_x86_ssse3_phadd_w_128>; + defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32, + int_x86_ssse3_phadd_d, + int_x86_ssse3_phadd_d_128>; + defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_sw, + int_x86_ssse3_phadd_sw_128>; + defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_w, + int_x86_ssse3_phsub_w_128>; + defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32, + int_x86_ssse3_phsub_d, + int_x86_ssse3_phsub_d_128>; + defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_sw, + int_x86_ssse3_phsub_sw_128>; + defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8, + int_x86_ssse3_pmadd_ub_sw, + int_x86_ssse3_pmadd_ub_sw_128>; + defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8, + int_x86_ssse3_pshuf_b, + int_x86_ssse3_pshuf_b_128>; + defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8, + int_x86_ssse3_psign_b, + int_x86_ssse3_psign_b_128>; + defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16, + int_x86_ssse3_psign_w, + int_x86_ssse3_psign_w_128>; + defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32, + int_x86_ssse3_psign_d, + int_x86_ssse3_psign_d_128>; +} +defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16, + int_x86_ssse3_pmul_hr_sw, + int_x86_ssse3_pmul_hr_sw_128>; } -let ImmT = NoImm in { // None of these have i8 immediate fields. -defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw", - int_x86_ssse3_phadd_w, - int_x86_ssse3_phadd_w_128>; -defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd", - int_x86_ssse3_phadd_d, - int_x86_ssse3_phadd_d_128>; -defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw", - int_x86_ssse3_phadd_sw, - int_x86_ssse3_phadd_sw_128>; -defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw", - int_x86_ssse3_phsub_w, - int_x86_ssse3_phsub_w_128>; -defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd", - int_x86_ssse3_phsub_d, - int_x86_ssse3_phsub_d_128>; -defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw", - int_x86_ssse3_phsub_sw, - int_x86_ssse3_phsub_sw_128>; -defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw", - int_x86_ssse3_pmadd_ub_sw, - int_x86_ssse3_pmadd_ub_sw_128>; -defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw", - int_x86_ssse3_pmul_hr_sw, - int_x86_ssse3_pmul_hr_sw_128, 1>; - -defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb", - int_x86_ssse3_pshuf_b, - int_x86_ssse3_pshuf_b_128>; -defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb", - int_x86_ssse3_psign_b, - int_x86_ssse3_psign_b_128>; -defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw", - int_x86_ssse3_psign_w, - int_x86_ssse3_psign_w_128>; -defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd", - int_x86_ssse3_psign_d, - int_x86_ssse3_psign_d_128>; -} - -// palignr patterns. -let Constraints = "$src1 = $dst" in { - def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>; - def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>; - - def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, OpSize; - def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, OpSize; +def : Pat<(X86pshufb VR128:$src, VR128:$mask), + (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; +def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), + (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; + +//===---------------------------------------------------------------------===// +// SSSE3 - Packed Align Instruction Patterns +//===---------------------------------------------------------------------===// + +multiclass sse3_palign<string asm, bit Is2Addr = 1> { + def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>; + def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>; + + def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; + def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PALIGN : sse3_palign<"palignr">; + let AddedComplexity = 5 in { def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)), @@ -2996,10 +3355,6 @@ def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)), (PALIGNR64rr VR64:$src2, VR64:$src1, (SHUFFLE_get_palign_imm VR64:$src3))>, Requires<[HasSSSE3]>; -def : Pat<(v2f32 (palign:$src3 VR64:$src1, VR64:$src2)), - (PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)), (PALIGNR64rr VR64:$src2, VR64:$src1, (SHUFFLE_get_palign_imm VR64:$src3))>, @@ -3027,10 +3382,15 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), Requires<[HasSSSE3]>; } -def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; -def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; +//===---------------------------------------------------------------------===// +// SSSE3 Misc Instructions +//===---------------------------------------------------------------------===// + +// Thread synchronization +def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", + [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; +def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", + [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; //===---------------------------------------------------------------------===// // Non-Instruction Patterns @@ -3311,287 +3671,9 @@ def : Pat<(store (v16i8 VR128:$src), addr:$dst), (MOVUPSmr addr:$dst, VR128:$src)>; //===----------------------------------------------------------------------===// -// SSE4.1 Instructions +// SSE4.1 - Packed Move with Sign/Zero Extend //===----------------------------------------------------------------------===// -multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, - string OpcodeStr, - Intrinsic V4F32Int, - Intrinsic V2F64Int> { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def PSr_Int : SS4AIi8<opcps, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>, - OpSize; - - // Vector intrinsic operation, mem - def PSm_Int : Ii8<opcps, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, - (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>, - TA, OpSize, - Requires<[HasSSE41]>; - - // Vector intrinsic operation, reg - def PDr_Int : SS4AIi8<opcpd, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>, - OpSize; - - // Vector intrinsic operation, mem - def PDm_Int : SS4AIi8<opcpd, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, - (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>, - OpSize; -} - -let Constraints = "$src1 = $dst" in { -multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, - string OpcodeStr, - Intrinsic F32Int, - Intrinsic F64Int> { - // Intrinsic operation, reg. - def SSr_Int : SS4AIi8<opcss, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize; - - // Intrinsic operation, mem. - def SSm_Int : SS4AIi8<opcss, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, - OpSize; - - // Intrinsic operation, reg. - def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize; - - // Intrinsic operation, mem. - def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, - OpSize; -} -} - -// FP round - roundss, roundps, roundsd, roundpd -defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", - int_x86_sse41_round_ps, int_x86_sse41_round_pd>; -defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", - int_x86_sse41_round_ss, int_x86_sse41_round_sd>; - -// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. -multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, - Intrinsic IntId128> { - def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize; - def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins i128mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, - (IntId128 - (bitconvert (memopv8i16 addr:$src))))]>, OpSize; -} - -defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", - int_x86_sse41_phminposuw>; - -/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; - } -} - -defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", - int_x86_sse41_pcmpeqq, 1>; -defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", - int_x86_sse41_packusdw, 0>; -defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", - int_x86_sse41_pminsb, 1>; -defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", - int_x86_sse41_pminsd, 1>; -defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", - int_x86_sse41_pminud, 1>; -defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", - int_x86_sse41_pminuw, 1>; -defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", - int_x86_sse41_pmaxsb, 1>; -defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", - int_x86_sse41_pmaxsd, 1>; -defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", - int_x86_sse41_pmaxud, 1>; -defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", - int_x86_sse41_pmaxuw, 1>; - -defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>; - -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), - (PCMPEQQrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), - (PCMPEQQrm VR128:$src1, addr:$src2)>; - -/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT, - SDNode OpNode, Intrinsic IntId128, - bit Commutable = 0> { - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode (OpVT VR128:$src1), - VR128:$src2))]>, OpSize { - let isCommutable = Commutable; - } - def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize; - def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, (memop addr:$src2)))]>, - OpSize; - } -} - -/// SS48I_binop_rm - Simple SSE41 binary operator. -let Constraints = "$src1 = $dst" in { -multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Commutable = 0> { - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>, - OpSize; -} -} - -defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>; - -/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate -let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize { - let isCommutable = Commutable; - } - def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>, - OpSize; - } -} - -defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", - int_x86_sse41_blendps, 0>; -defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", - int_x86_sse41_blendpd, 0>; -defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", - int_x86_sse41_pblendw, 0>; -defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", - int_x86_sse41_dpps, 1>; -defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", - int_x86_sse41_dppd, 1>; -defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", - int_x86_sse41_mpsadbw, 0>; - - -/// SS41I_ternary_int - SSE 4.1 ternary operator -let Uses = [XMM0], Constraints = "$src1 = $dst" in { - multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> { - def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, - "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"), - [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>, - OpSize; - - def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, - "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"), - [(set VR128:$dst, - (IntId VR128:$src1, - (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize; - } -} - -defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; -defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; -defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; - - multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> { def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -3604,6 +3686,21 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, + VEX; +defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>, + VEX; +defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>, + VEX; +defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>, + VEX; +defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>, + VEX; +defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, + VEX; +} + defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; @@ -3655,6 +3752,17 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, + VEX; +defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>, + VEX; +defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>, + VEX; +defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>, + VEX; +} + defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; @@ -3685,6 +3793,12 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, + VEX; +defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, + VEX; +} defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; @@ -3699,6 +3813,9 @@ def : Pat<(int_x86_sse41_pmovzxbq (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>; +//===----------------------------------------------------------------------===// +// SSE4.1 - Extract Instructions +//===----------------------------------------------------------------------===// /// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { @@ -3718,6 +3835,9 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; + defm PEXTRB : SS41I_extract8<0x14, "pextrb">; @@ -3733,6 +3853,9 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { // (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; + defm PEXTRW : SS41I_extract16<0x15, "pextrw">; @@ -3752,8 +3875,31 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { addr:$dst)]>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; + defm PEXTRD : SS41I_extract32<0x16, "pextrd">; +/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination +multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { + def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), + (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set GR64:$dst, + (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W; + def mr : SS4AIi8<opc, MRMDestMem, (outs), + (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(store (extractelt (v2i64 VR128:$src1), imm:$src2), + addr:$dst)]>, OpSize, REX_W; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; + +defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; /// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory /// destination @@ -3773,6 +3919,8 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { addr:$dst)]>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; // Also match an EXTRACTPS store when the store is done as f32 instead of i32. @@ -3782,78 +3930,530 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, Requires<[HasSSE41]>; -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize; - def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), - imm:$src3))]>, OpSize; - } +//===----------------------------------------------------------------------===// +// SSE4.1 - Insert Instructions +//===----------------------------------------------------------------------===// + +multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize; + def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), + imm:$src3))]>, OpSize; } -defm PINSRB : SS41I_insert8<0x20, "pinsrb">; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PINSRB : SS41I_insert8<0x20, "pinsrb">; + +multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, + OpSize; + def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), + imm:$src3)))]>, OpSize; +} -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, - OpSize; - def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), - imm:$src3)))]>, OpSize; - } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PINSRD : SS41I_insert32<0x22, "pinsrd">; + +multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, + OpSize; + def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), + imm:$src3)))]>, OpSize; } -defm PINSRD : SS41I_insert32<0x22, "pinsrd">; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; +let Constraints = "$src1 = $dst" in + defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; // insertps has a few different modes, there's the first two here below which // are optimized inserts that won't zero arbitrary elements in the destination // vector. The next one matches the intrinsic and could zero arbitrary elements // in the target vector. -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, +multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, OpSize; - def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86insrtps VR128:$src1, - (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, OpSize; - } + def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (X86insrtps VR128:$src1, + (v4f32 (scalar_to_vector (loadf32 addr:$src2))), + imm:$src3))]>, OpSize; } -defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +let Constraints = "$src1 = $dst" in + defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; +//===----------------------------------------------------------------------===// +// SSE4.1 - Round Instructions +//===----------------------------------------------------------------------===// + +multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, + string OpcodeStr, + Intrinsic V4F32Int, + Intrinsic V2F64Int> { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr_Int : SS4AIi8<opcps, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>, + OpSize; + + // Vector intrinsic operation, mem + def PSm_Int : Ii8<opcps, MRMSrcMem, + (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>, + TA, OpSize, + Requires<[HasSSE41]>; + + // Vector intrinsic operation, reg + def PDr_Int : SS4AIi8<opcpd, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>, + OpSize; + + // Vector intrinsic operation, mem + def PDm_Int : SS4AIi8<opcpd, MRMSrcMem, + (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>, + OpSize; +} + +multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd, + string OpcodeStr> { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr : SS4AIi8<opcps, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, OpSize; + + // Vector intrinsic operation, mem + def PSm : Ii8<opcps, MRMSrcMem, + (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, TA, OpSize, Requires<[HasSSE41]>; + + // Vector intrinsic operation, reg + def PDr : SS4AIi8<opcpd, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, OpSize; + + // Vector intrinsic operation, mem + def PDm : SS4AIi8<opcpd, MRMSrcMem, + (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, OpSize; +} + +multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, + string OpcodeStr, + Intrinsic F32Int, + Intrinsic F64Int, bit Is2Addr = 1> { + // Intrinsic operation, reg. + def SSr_Int : SS4AIi8<opcss, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, + OpSize; + + // Intrinsic operation, mem. + def SSm_Int : SS4AIi8<opcss, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, + OpSize; + + // Intrinsic operation, reg. + def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, + OpSize; + + // Intrinsic operation, mem. + def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, + OpSize; +} + +multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd, + string OpcodeStr> { + // Intrinsic operation, reg. + def SSr : SS4AIi8<opcss, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; + + // Intrinsic operation, mem. + def SSm : SS4AIi8<opcss, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; + + // Intrinsic operation, reg. + def SDr : SS4AIi8<opcsd, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; + + // Intrinsic operation, mem. + def SDm : SS4AIi8<opcsd, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; +} + +// FP round - roundss, roundps, roundsd, roundpd +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // Intrinsic form + defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", + int_x86_sse41_round_ps, int_x86_sse41_round_pd>, + VEX; + defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", + int_x86_sse41_round_ss, int_x86_sse41_round_sd, + 0>, VEX_4V; + // Instructions for the assembler + defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX; + defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V; +} + +defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", + int_x86_sse41_round_ps, int_x86_sse41_round_pd>; +let Constraints = "$src1 = $dst" in +defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", + int_x86_sse41_round_ss, int_x86_sse41_round_sd>; + +//===----------------------------------------------------------------------===// +// SSE4.1 - Misc Instructions +//===----------------------------------------------------------------------===// + +// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. +multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, + Intrinsic IntId128> { + def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize; + def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, + (IntId128 + (bitconvert (memopv8i16 addr:$src))))]>, OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in +defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", + int_x86_sse41_phminposuw>, VEX; +defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", + int_x86_sse41_phminposuw>; + +/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator +multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize; + def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, + 0>, VEX_4V; + defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq, + 0>, VEX_4V; + defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, + 0>, VEX_4V; + defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, + 0>, VEX_4V; + defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud, + 0>, VEX_4V; + defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw, + 0>, VEX_4V; + defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb, + 0>, VEX_4V; + defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd, + 0>, VEX_4V; + defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud, + 0>, VEX_4V; + defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw, + 0>, VEX_4V; + defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, + 0>, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { + let isCommutable = 0 in + defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; + defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>; + defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; + defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; + defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; + defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>; + defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>; + defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>; + defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>; + defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>; + defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; +} + +def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), + (PCMPEQQrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), + (PCMPEQQrm VR128:$src1, addr:$src2)>; + +/// SS48I_binop_rm - Simple SSE41 binary operator. +multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, + OpSize; + def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpNode VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2))))]>, + OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; + +/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate +multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + let isCommutable = 1 in + def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>, + OpSize; + def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>, + OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + let isCommutable = 0 in { + defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, + 0>, VEX_4V; + defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, + 0>, VEX_4V; + defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, + 0>, VEX_4V; + defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, + 0>, VEX_4V; + } + defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, + 0>, VEX_4V; + defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, + 0>, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { + let isCommutable = 0 in { + defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>; + defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>; + defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>; + defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>; + } + defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>; + defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>; +} + +/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> { + def rr : I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + + def rm : I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + } +} + +defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">; +defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">; +defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">; + +/// SS41I_ternary_int - SSE 4.1 ternary operator +let Uses = [XMM0], Constraints = "$src1 = $dst" in { + multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> { + def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, + "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"), + [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>, + OpSize; + + def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !strconcat(OpcodeStr, + "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"), + [(set VR128:$dst, + (IntId VR128:$src1, + (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize; + } +} + +defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; +defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; +defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; + // ptest instruction we'll lower to this in X86ISelLowering primarily from // the intel intrinsic that corresponds to this. +let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, + OpSize, VEX; +def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, + OpSize, VEX; +} + let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest \t{$src2, $src1|$src1, $src2}", @@ -3865,43 +4465,207 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in +def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovntdqa\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, + OpSize, VEX; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, OpSize; - //===----------------------------------------------------------------------===// -// SSE4.2 Instructions +// SSE4.2 - Compare Instructions //===----------------------------------------------------------------------===// /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; - } +multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, + OpSize; + def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, + 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), (PCMPGTQrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), (PCMPGTQrm VR128:$src1, addr:$src2)>; +//===----------------------------------------------------------------------===// +// SSE4.2 - String/text Processing Instructions +//===----------------------------------------------------------------------===// + +// Packed Compare Implicit Length Strings, Return Mask +let Defs = [EFLAGS], usesCustomInserter = 1 in { + def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "#PCMPISTRM128rr PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, + imm:$src3))]>, OpSize; + def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "#PCMPISTRM128rm PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 + VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize; +} + +let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1, + Predicates = [HasAVX] in { + def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; + def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; +} + +let Defs = [XMM0, EFLAGS] in { + def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; + def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; +} + +// Packed Compare Explicit Length Strings, Return Mask +let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { + def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "#PCMPESTRM128rr PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; + + def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "#PCMPESTRM128rm PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, + OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX], + Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { + def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; + def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; +} + +let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { + def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; + def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; +} + +// Packed Compare Implicit Length Strings, Return Index +let Defs = [ECX, EFLAGS] in { + multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> { + def rr : SS42AI<0x63, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), + (implicit EFLAGS)]>, OpSize; + def rm : SS42AI<0x63, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), + (implicit EFLAGS)]>, OpSize; + } +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">, + VEX; +defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">, + VEX; +defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">, + VEX; +defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">, + VEX; +defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">, + VEX; +defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">, + VEX; +} + +defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>; +defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>; +defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>; +defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>; +defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>; +defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>; + +// Packed Compare Explicit Length Strings, Return Index +let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in { + multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> { + def rr : SS42AI<0x61, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), + (implicit EFLAGS)]>, OpSize; + def rm : SS42AI<0x61, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + [(set ECX, + (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), + (implicit EFLAGS)]>, OpSize; + } +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">, + VEX; +defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">, + VEX; +defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">, + VEX; +defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">, + VEX; +defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">, + VEX; +defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">, + VEX; +} + +defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>; +defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>; +defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>; +defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>; +defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>; +defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>; + +//===----------------------------------------------------------------------===// +// SSE4.2 - CRC Instructions +//===----------------------------------------------------------------------===// + +// No CRC instructions have AVX equivalents + // crc intrinsic instruction // This set of instructions are only rm, the only difference is the size // of r and m. @@ -3969,133 +4733,52 @@ let Constraints = "$src1 = $dst" in { REX_W; } -// String/text processing instructions. -let Defs = [EFLAGS], usesCustomInserter = 1 in { -def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "#PCMPISTRM128rr PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, - imm:$src3))]>, OpSize; -def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "#PCMPISTRM128rm PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, (load addr:$src2), - imm:$src3))]>, OpSize; -} - -let Defs = [XMM0, EFLAGS] in { -def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; -def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; -} +//===----------------------------------------------------------------------===// +// AES-NI Instructions +//===----------------------------------------------------------------------===// -let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { -def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "#PCMPESTRM128rr PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; - -def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "#PCMPESTRM128rm PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, - OpSize; +multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, + OpSize; + def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { -def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; -def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; +// Perform One Round of an AES Encryption/Decryption Flow +let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { + defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", + int_x86_aesni_aesenc, 0>, VEX_4V; + defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", + int_x86_aesni_aesenclast, 0>, VEX_4V; + defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", + int_x86_aesni_aesdec, 0>, VEX_4V; + defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", + int_x86_aesni_aesdeclast, 0>, VEX_4V; } -let Defs = [ECX, EFLAGS] in { - multiclass SS42AI_pcmpistri<Intrinsic IntId128> { - def rr : SS42AI<0x63, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", - [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), - (implicit EFLAGS)]>, OpSize; - def rm : SS42AI<0x63, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", - [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), - (implicit EFLAGS)]>, OpSize; - } -} - -defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>; -defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>; -defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>; -defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>; -defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>; -defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>; - -let Defs = [ECX, EFLAGS] in { -let Uses = [EAX, EDX] in { - multiclass SS42AI_pcmpestri<Intrinsic IntId128> { - def rr : SS42AI<0x61, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; - def rm : SS42AI<0x61, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, - (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; - } -} -} - -defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>; -defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>; -defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>; -defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>; -defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>; -defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>; - -//===----------------------------------------------------------------------===// -// AES-NI Instructions -//===----------------------------------------------------------------------===// - let Constraints = "$src1 = $dst" in { - multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; - } + defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", + int_x86_aesni_aesenc>; + defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", + int_x86_aesni_aesenclast>; + defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", + int_x86_aesni_aesdec>; + defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", + int_x86_aesni_aesdeclast>; } -defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", - int_x86_aesni_aesenc>; -defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", - int_x86_aesni_aesenclast>; -defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", - int_x86_aesni_aesdec>; -defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", - int_x86_aesni_aesdeclast>; - def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), (AESENCrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), @@ -4113,13 +4796,27 @@ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), (AESDECLASTrm VR128:$src1, addr:$src2)>; +// Perform the AES InvMixColumn Transformation +let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { + def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1), + "vaesimc\t{$src1, $dst|$dst, $src1}", + [(set VR128:$dst, + (int_x86_aesni_aesimc VR128:$src1))]>, + OpSize, VEX; + def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src1), + "vaesimc\t{$src1, $dst|$dst, $src1}", + [(set VR128:$dst, + (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>, + OpSize, VEX; +} def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, (int_x86_aesni_aesimc VR128:$src1))]>, OpSize; - def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", @@ -4127,6 +4824,22 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>, OpSize; +// AES Round Key Generation Assist +let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { + def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i8imm:$src2), + "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + OpSize, VEX; + def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src1, i8imm:$src2), + "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), + imm:$src2))]>, + OpSize, VEX; +} def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index a9681e6..633ddd4 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -30,7 +30,7 @@ class X86MCCodeEmitter : public MCCodeEmitter { MCContext &Ctx; bool Is64BitMode; public: - X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) + X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { Is64BitMode = is64Bit; } @@ -38,17 +38,18 @@ public: ~X86MCCodeEmitter() {} unsigned getNumFixupKinds() const { - return 4; + return 5; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo Infos[] = { { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel }, + { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel }, { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel } }; - + if (Kind < FirstTargetFixupKind) return MCCodeEmitter::getFixupKindInfo(Kind); @@ -56,16 +57,38 @@ public: "Invalid kind!"); return Infos[Kind - FirstTargetFixupKind]; } - + static unsigned GetX86RegNum(const MCOperand &MO) { return X86RegisterInfo::getX86RegNum(MO.getReg()); } - + + // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range + // 0-7 and the difference between the 2 groups is given by the REX prefix. + // In the VEX prefix, registers are seen sequencially from 0-15 and encoded + // in 1's complement form, example: + // + // ModRM field => XMM9 => 1 + // VEX.VVVV => XMM9 => ~9 + // + // See table 4-35 of Intel AVX Programming Reference for details. + static unsigned char getVEXRegisterEncoding(const MCInst &MI, + unsigned OpNum) { + unsigned SrcReg = MI.getOperand(OpNum).getReg(); + unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum)); + if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) || + (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15)) + SrcRegNum += 8; + + // The registers represented through VEX_VVVV should + // be encoded in 1's complement form. + return (~SrcRegNum) & 0xf; + } + void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const { OS << (char)C; ++CurByte; } - + void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, raw_ostream &OS) const { // Output the constant in little endian byte order. @@ -75,38 +98,49 @@ public: } } - void EmitImmediate(const MCOperand &Disp, + void EmitImmediate(const MCOperand &Disp, unsigned ImmSize, MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const; - + inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) { assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); return RM | (RegOpcode << 3) | (Mod << 6); } - + void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld, unsigned &CurByte, raw_ostream &OS) const { EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS); } - + void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base, unsigned &CurByte, raw_ostream &OS) const { // SIB byte is in the same format as the ModRMByte. EmitByte(ModRMByte(SS, Index, Base), CurByte, OS); } - - + + void EmitMemModRMByte(const MCInst &MI, unsigned Op, - unsigned RegOpcodeField, - unsigned TSFlags, unsigned &CurByte, raw_ostream &OS, + unsigned RegOpcodeField, + uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const; - + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const; - + + void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, + const MCInst &MI, const TargetInstrDesc &Desc, + raw_ostream &OS) const; + + void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte, + int MemOperand, const MCInst &MI, + raw_ostream &OS) const; + + void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, + const MCInst &MI, const TargetInstrDesc &Desc, + raw_ostream &OS) const; }; } // end anonymous namespace @@ -124,24 +158,23 @@ MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, return new X86MCCodeEmitter(TM, Ctx, true); } - -/// isDisp8 - Return true if this signed displacement fits in a 8-bit -/// sign-extended field. +/// isDisp8 - Return true if this signed displacement fits in a 8-bit +/// sign-extended field. static bool isDisp8(int Value) { return Value == (signed char)Value; } /// getImmFixupKind - Return the appropriate fixup kind to use for an immediate /// in an instruction with the specified TSFlags. -static MCFixupKind getImmFixupKind(unsigned TSFlags) { +static MCFixupKind getImmFixupKind(uint64_t TSFlags) { unsigned Size = X86II::getSizeOfImm(TSFlags); bool isPCRel = X86II::isImmPCRel(TSFlags); - + switch (Size) { default: assert(0 && "Unknown immediate size"); case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1; + case 2: return isPCRel ? MCFixupKind(X86::reloc_pcrel_2byte) : FK_Data_2; case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4; - case 2: assert(!isPCRel); return FK_Data_2; case 8: assert(!isPCRel); return FK_Data_8; } } @@ -162,29 +195,30 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, // If we have an immoffset, add it to the expression. const MCExpr *Expr = DispOp.getExpr(); - + // If the fixup is pc-relative, we need to bias the value to be relative to // the start of the field, not the end of the field. if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || FixupKind == MCFixupKind(X86::reloc_riprel_4byte) || FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load)) ImmOffset -= 4; + if (FixupKind == MCFixupKind(X86::reloc_pcrel_2byte)) + ImmOffset -= 2; if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) ImmOffset -= 1; - + if (ImmOffset) Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx), Ctx); - + // Emit a symbolic constant as a fixup and 4 zeros. Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); EmitConstant(0, Size, CurByte, OS); } - void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField, - unsigned TSFlags, unsigned &CurByte, + uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const{ const MCOperand &Disp = MI.getOperand(Op+3); @@ -192,43 +226,43 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, const MCOperand &Scale = MI.getOperand(Op+1); const MCOperand &IndexReg = MI.getOperand(Op+2); unsigned BaseReg = Base.getReg(); - + // Handle %rip relative addressing. if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode - assert(IndexReg.getReg() == 0 && Is64BitMode && - "Invalid rip-relative address"); + assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode"); + assert(IndexReg.getReg() == 0 && "Invalid rip-relative address"); EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); - + unsigned FixupKind = X86::reloc_riprel_4byte; - + // movq loads are handled with a special relocation form which allows the // linker to eliminate some loads for GOT references which end up in the // same linkage unit. if (MI.getOpcode() == X86::MOV64rm || MI.getOpcode() == X86::MOV64rm_TC) FixupKind = X86::reloc_riprel_4byte_movq_load; - + // rip-relative addressing is actually relative to the *next* instruction. // Since an immediate can follow the mod/rm byte for an instruction, this // means that we need to bias the immediate field of the instruction with // the size of the immediate field. If we have this case, add it into the // expression to emit. int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; - + EmitImmediate(Disp, 4, MCFixupKind(FixupKind), CurByte, OS, Fixups, -ImmSize); return; } - + unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U; - + // Determine whether a SIB byte is needed. - // If no BaseReg, issue a RIP relative instruction only if the MCE can + // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. if (// The SIB byte must be used if there is an index register. - IndexReg.getReg() == 0 && + IndexReg.getReg() == 0 && // The SIB byte must be used if the base is ESP/RSP/R12, all of which // encode to an R/M value of 4, which indicates that a SIB byte is // present. @@ -242,7 +276,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } - + // If the base is not EBP/ESP and there is no displacement, use simple // indirect register encoding, this handles addresses like [EAX]. The // encoding for [EBP] with no displacement means [disp32] so we handle it @@ -251,24 +285,24 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS); return; } - + // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. if (Disp.isImm() && isDisp8(Disp.getImm())) { EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); return; } - + // Otherwise, emit the most general non-SIB encoding: [REG+disp32] EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } - + // We need a SIB byte, so start by outputting the ModR/M byte first assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); - + bool ForceDisp32 = false; bool ForceDisp8 = false; if (BaseReg == 0) { @@ -294,13 +328,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Emit the normal disp32 encoding. EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS); } - + // Calculate what the SS field value should be... static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; unsigned SS = SSTable[Scale.getImm()]; - + if (BaseReg == 0) { - // Handle the SIB byte for the case where there is no base, see Intel + // Handle the SIB byte for the case where there is no base, see Intel // Manual 2A, table 2-7. The displacement has already been output. unsigned IndexRegNo; if (IndexReg.getReg()) @@ -316,7 +350,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, IndexRegNo = 4; // For example [ESP+1*<noreg>+4] EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS); } - + // Do we need to output a displacement? if (ForceDisp8) EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); @@ -324,26 +358,216 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); } +/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix +/// called VEX. +void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, + int MemOperand, const MCInst &MI, + const TargetInstrDesc &Desc, + raw_ostream &OS) const { + bool HasVEX_4V = false; + if (TSFlags & X86II::VEX_4V) + HasVEX_4V = true; + + // VEX_R: opcode externsion equivalent to REX.R in + // 1's complement (inverted) form + // + // 1: Same as REX_R=0 (must be 1 in 32-bit mode) + // 0: Same as REX_R=1 (64 bit mode only) + // + unsigned char VEX_R = 0x1; + + // VEX_X: equivalent to REX.X, only used when a + // register is used for index in SIB Byte. + // + // 1: Same as REX.X=0 (must be 1 in 32-bit mode) + // 0: Same as REX.X=1 (64-bit mode only) + unsigned char VEX_X = 0x1; + + // VEX_B: + // + // 1: Same as REX_B=0 (ignored in 32-bit mode) + // 0: Same as REX_B=1 (64 bit mode only) + // + unsigned char VEX_B = 0x1; + + // VEX_W: opcode specific (use like REX.W, or used for + // opcode extension, or ignored, depending on the opcode byte) + unsigned char VEX_W = 0; + + // VEX_5M (VEX m-mmmmm field): + // + // 0b00000: Reserved for future use + // 0b00001: implied 0F leading opcode + // 0b00010: implied 0F 38 leading opcode bytes + // 0b00011: implied 0F 3A leading opcode bytes + // 0b00100-0b11111: Reserved for future use + // + unsigned char VEX_5M = 0x1; + + // VEX_4V (VEX vvvv field): a register specifier + // (in 1's complement form) or 1111 if unused. + unsigned char VEX_4V = 0xf; + + // VEX_L (Vector Length): + // + // 0: scalar or 128-bit vector + // 1: 256-bit vector + // + unsigned char VEX_L = 0; + + // VEX_PP: opcode extension providing equivalent + // functionality of a SIMD prefix + // + // 0b00: None + // 0b01: 66 + // 0b10: F3 + // 0b11: F2 + // + unsigned char VEX_PP = 0; + + // Encode the operand size opcode prefix as needed. + if (TSFlags & X86II::OpSize) + VEX_PP = 0x01; + + if (TSFlags & X86II::VEX_W) + VEX_W = 1; + + switch (TSFlags & X86II::Op0Mask) { + default: assert(0 && "Invalid prefix!"); + case X86II::T8: // 0F 38 + VEX_5M = 0x2; + break; + case X86II::TA: // 0F 3A + VEX_5M = 0x3; + break; + case X86II::TF: // F2 0F 38 + VEX_PP = 0x3; + VEX_5M = 0x2; + break; + case X86II::XS: // F3 0F + VEX_PP = 0x2; + break; + case X86II::XD: // F2 0F + VEX_PP = 0x3; + break; + case X86II::TB: // Bypass: Not used by VEX + case 0: + break; // No prefix! + } + + // Set the vector length to 256-bit if YMM0-YMM15 is used + for (unsigned i = 0; i != MI.getNumOperands(); ++i) { + if (!MI.getOperand(i).isReg()) + continue; + unsigned SrcReg = MI.getOperand(i).getReg(); + if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) + VEX_L = 1; + } + + unsigned NumOps = MI.getNumOperands(); + unsigned CurOp = 0; + + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: + NumOps = CurOp = X86::AddrNumOperands; + case X86II::MRMSrcMem: + case X86II::MRMSrcReg: + if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; + + // CurOp and NumOps are equal when VEX_R represents a register used + // to index a memory destination (which is the last operand) + CurOp = (CurOp == NumOps) ? 0 : CurOp+1; + + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + CurOp++; + } + + // If the last register should be encoded in the immediate field + // do not use any bit from VEX prefix to this register, ignore it + if (TSFlags & X86II::VEX_I8IMM) + NumOps--; + + for (; CurOp != NumOps; ++CurOp) { + const MCOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + VEX_B = 0x0; + if (!VEX_B && MO.isReg() && + ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) && + X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + VEX_X = 0x0; + } + break; + default: // MRMDestReg, MRM0r-MRM7r + if (MI.getOperand(CurOp).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + + CurOp++; + for (; CurOp != NumOps; ++CurOp) { + const MCOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && !HasVEX_4V && + X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + VEX_R = 0x0; + } + break; + assert(0 && "Not implemented!"); + } + + // Emit segment override opcode prefix as needed. + EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS); + + // VEX opcode prefix can have 2 or 3 bytes + // + // 3 bytes: + // +-----+ +--------------+ +-------------------+ + // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | + // +-----+ +--------------+ +-------------------+ + // 2 bytes: + // +-----+ +-------------------+ + // | C5h | | R | vvvv | L | pp | + // +-----+ +-------------------+ + // + unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); + + if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix + EmitByte(0xC5, CurByte, OS); + EmitByte(LastByte | (VEX_R << 7), CurByte, OS); + return; + } + + // 3 byte VEX prefix + EmitByte(0xC4, CurByte, OS); + EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS); + EmitByte(LastByte | (VEX_W << 7), CurByte, OS); +} + /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand /// size, and 3) use of X86-64 extended registers. -static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, +static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, const TargetInstrDesc &Desc) { - // Pseudo instructions never have a rex byte. - if ((TSFlags & X86II::FormMask) == X86II::Pseudo) - return 0; - unsigned REX = 0; if (TSFlags & X86II::REX_W) - REX |= 1 << 3; - + REX |= 1 << 3; // set REX.W + if (MI.getNumOperands() == 0) return REX; - + unsigned NumOps = MI.getNumOperands(); // FIXME: MCInst should explicitize the two-addrness. bool isTwoAddr = NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; - + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. unsigned i = isTwoAddr ? 1 : 0; for (; i != NumOps; ++i) { @@ -353,34 +577,34 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue; // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything // that returns non-zero. - REX |= 0x40; + REX |= 0x40; // REX fixed encoding prefix break; } - + switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); case X86II::MRMSrcReg: if (MI.getOperand(0).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R i = isTwoAddr ? 2 : 1; for (; i != NumOps; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << 0; + REX |= 1 << 0; // set REX.B } break; case X86II::MRMSrcMem: { if (MI.getOperand(0).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R unsigned Bit = 0; i = isTwoAddr ? 2 : 1; for (; i != NumOps; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg()) { if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << Bit; + REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1) Bit++; } } @@ -391,17 +615,17 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: case X86II::MRMDestMem: { - unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); + unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); i = isTwoAddr ? 1 : 0; if (NumOps > e && MI.getOperand(e).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R unsigned Bit = 0; for (; i != e; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg()) { if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << Bit; + REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1) Bit++; } } @@ -410,39 +634,40 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, default: if (MI.getOperand(0).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) - REX |= 1 << 0; + REX |= 1 << 0; // set REX.B i = isTwoAddr ? 2 : 1; for (unsigned e = NumOps; i != e; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R } break; } return REX; } -void X86MCCodeEmitter:: -EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { - unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = TII.get(Opcode); - unsigned TSFlags = Desc.TSFlags; - - // Keep track of the current byte being emitted. - unsigned CurByte = 0; - - // FIXME: We should emit the prefixes in exactly the same order as GAS does, - // in order to provide diffability. - - // Emit the lock opcode prefix as needed. - if (TSFlags & X86II::LOCK) - EmitByte(0xF0, CurByte, OS); - - // Emit segment override opcode prefix as needed. +/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed +void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags, + unsigned &CurByte, int MemOperand, + const MCInst &MI, + raw_ostream &OS) const { switch (TSFlags & X86II::SegOvrMask) { default: assert(0 && "Invalid segment!"); - case 0: break; // No segment override! + case 0: + // No segment override, check for explicit one on memory operand. + if (MemOperand != -1) { // If the instruction has a memory operand. + switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) { + default: assert(0 && "Unknown segment register!"); + case 0: break; + case X86::CS: EmitByte(0x2E, CurByte, OS); break; + case X86::SS: EmitByte(0x36, CurByte, OS); break; + case X86::DS: EmitByte(0x3E, CurByte, OS); break; + case X86::ES: EmitByte(0x26, CurByte, OS); break; + case X86::FS: EmitByte(0x64, CurByte, OS); break; + case X86::GS: EmitByte(0x65, CurByte, OS); break; + } + } + break; case X86II::FS: EmitByte(0x64, CurByte, OS); break; @@ -450,19 +675,36 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(0x65, CurByte, OS); break; } - +} + +/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode. +/// +/// MemOperand is the operand # of the start of a memory operand if present. If +/// Not present, it is -1. +void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, + int MemOperand, const MCInst &MI, + const TargetInstrDesc &Desc, + raw_ostream &OS) const { + + // Emit the lock opcode prefix as needed. + if (TSFlags & X86II::LOCK) + EmitByte(0xF0, CurByte, OS); + + // Emit segment override opcode prefix as needed. + EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS); + // Emit the repeat opcode prefix as needed. if ((TSFlags & X86II::Op0Mask) == X86II::REP) EmitByte(0xF3, CurByte, OS); - + // Emit the operand size opcode prefix as needed. if (TSFlags & X86II::OpSize) EmitByte(0x66, CurByte, OS); - + // Emit the address size opcode prefix as needed. if (TSFlags & X86II::AdSize) EmitByte(0x67, CurByte, OS); - + bool Need0FPrefix = false; switch (TSFlags & X86II::Op0Mask) { default: assert(0 && "Invalid prefix!"); @@ -494,18 +736,18 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::DE: EmitByte(0xDE, CurByte, OS); break; case X86II::DF: EmitByte(0xDF, CurByte, OS); break; } - + // Handle REX prefix. // FIXME: Can this come before F2 etc to simplify emission? if (Is64BitMode) { if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc)) EmitByte(0x40 | REX, CurByte, OS); } - + // 0x0F escape code must be emitted just before the opcode. if (Need0FPrefix) EmitByte(0x0F, CurByte, OS); - + // FIXME: Pull this up into previous switch if REX can be moved earlier. switch (TSFlags & X86II::Op0Mask) { case X86II::TF: // F2 0F 38 @@ -516,8 +758,21 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(0x3A, CurByte, OS); break; } - +} + +void X86MCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = TII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + + // Pseudo instructions don't get encoded. + if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + return; + // If this is a two-address instruction, skip one of the register operands. + // FIXME: This should be handled during MCInst lowering. unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) @@ -525,56 +780,85 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 --NumOps; - + + // Keep track of the current byte being emitted. + unsigned CurByte = 0; + + // Is this instruction encoded using the AVX VEX prefix? + bool HasVEXPrefix = false; + + // It uses the VEX.VVVV field? + bool HasVEX_4V = false; + + if (TSFlags & X86II::VEX) + HasVEXPrefix = true; + if (TSFlags & X86II::VEX_4V) + HasVEX_4V = true; + + // Determine where the memory operand starts, if present. + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand != -1) MemoryOperand += CurOp; + + if (!HasVEXPrefix) + EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); + else + EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); + unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags); + unsigned SrcRegNum = 0; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n"; assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); - case X86II::Pseudo: return; // Pseudo instructions encode to nothing. + case X86II::Pseudo: + assert(0 && "Pseudo instruction shouldn't be emitted"); case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); break; - + case X86II::AddRegFrm: EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS); break; - + case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp), GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); CurOp += 2; break; - + case X86II::MRMDestMem: EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, - GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)), + GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)), TSFlags, CurByte, OS, Fixups); - CurOp += X86AddrNumOperands + 1; + CurOp += X86::AddrNumOperands + 1; break; - + case X86II::MRMSrcReg: EmitByte(BaseOpcode, CurByte, OS); - EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)), - CurByte, OS); - CurOp += 2; + SrcRegNum = CurOp + 1; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + + EmitRegModRMByte(MI.getOperand(SrcRegNum), + GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); + CurOp = SrcRegNum + 1; break; - + case X86II::MRMSrcMem: { + int AddrOperands = X86::AddrNumOperands; + unsigned FirstMemOp = CurOp+1; + if (HasVEX_4V) { + ++AddrOperands; + ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). + } + EmitByte(BaseOpcode, CurByte, OS); - // FIXME: Maybe lea should have its own form? This is a horrible hack. - int AddrOperands; - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - AddrOperands = X86AddrNumOperands - 1; // No segment register - else - AddrOperands = X86AddrNumOperands; - - EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)), + EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)), TSFlags, CurByte, OS, Fixups); CurOp += AddrOperands + 1; break; @@ -584,6 +868,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM2r: case X86II::MRM3r: case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: + if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). + CurOp++; EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp++), (TSFlags & X86II::FormMask)-X86II::MRM0r, @@ -596,7 +882,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m, TSFlags, CurByte, OS, Fixups); - CurOp += X86AddrNumOperands; + CurOp += X86::AddrNumOperands; break; case X86II::MRM_C1: EmitByte(BaseOpcode, CurByte, OS); @@ -639,14 +925,27 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(0xF9, CurByte, OS); break; } - + // If there is a remaining operand, it must be a trailing immediate. Emit it // according to the right size for the instruction. - if (CurOp != NumOps) - EmitImmediate(MI.getOperand(CurOp++), - X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), - CurByte, OS, Fixups); - + if (CurOp != NumOps) { + // The last source register of a 4 operand instruction in AVX is encoded + // in bits[7:4] of a immediate byte, and bits[3:0] are ignored. + if (TSFlags & X86II::VEX_I8IMM) { + const MCOperand &MO = MI.getOperand(CurOp++); + bool IsExtReg = + X86InstrInfo::isX86_64ExtendedReg(MO.getReg()); + unsigned RegNum = (IsExtReg ? (1 << 7) : 0); + RegNum |= GetX86RegNum(MO) << 4; + EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS, + Fixups); + } else + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); + } + + #ifndef NDEBUG // FIXME: Verify. if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) { diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 98975ea..5f31e00 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -127,21 +127,29 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: return RegNo-X86::ST0; - case X86::XMM0: case X86::XMM8: case X86::MM0: + case X86::XMM0: case X86::XMM8: + case X86::YMM0: case X86::YMM8: case X86::MM0: return 0; - case X86::XMM1: case X86::XMM9: case X86::MM1: + case X86::XMM1: case X86::XMM9: + case X86::YMM1: case X86::YMM9: case X86::MM1: return 1; - case X86::XMM2: case X86::XMM10: case X86::MM2: + case X86::XMM2: case X86::XMM10: + case X86::YMM2: case X86::YMM10: case X86::MM2: return 2; - case X86::XMM3: case X86::XMM11: case X86::MM3: + case X86::XMM3: case X86::XMM11: + case X86::YMM3: case X86::YMM11: case X86::MM3: return 3; - case X86::XMM4: case X86::XMM12: case X86::MM4: + case X86::XMM4: case X86::XMM12: + case X86::YMM4: case X86::YMM12: case X86::MM4: return 4; - case X86::XMM5: case X86::XMM13: case X86::MM5: + case X86::XMM5: case X86::XMM13: + case X86::YMM5: case X86::YMM13: case X86::MM5: return 5; - case X86::XMM6: case X86::XMM14: case X86::MM6: + case X86::XMM6: case X86::XMM14: + case X86::YMM6: case X86::YMM14: case X86::MM6: return 6; - case X86::XMM7: case X86::XMM15: case X86::MM7: + case X86::XMM7: case X86::XMM15: + case X86::YMM7: case X86::YMM15: case X86::MM7: return 7; case X86::ES: @@ -157,6 +165,34 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::GS: return 5; + case X86::CR0: + return 0; + case X86::CR1: + return 1; + case X86::CR2: + return 2; + case X86::CR3: + return 3; + case X86::CR4: + return 4; + + case X86::DR0: + return 0; + case X86::DR1: + return 1; + case X86::DR2: + return 2; + case X86::DR3: + return 3; + case X86::DR4: + return 4; + case X86::DR5: + return 5; + case X86::DR6: + return 6; + case X86::DR7: + return 7; + default: assert(isVirtualRegister(RegNo) && "Unknown physical register!"); llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); @@ -357,56 +393,6 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } } -const TargetRegisterClass* const* -X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - bool callsEHReturn = false; - if (MF) - callsEHReturn = MF->getMMI().callsEHReturn(); - - static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = { - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = { - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, 0 - }; - - if (Is64Bit) { - if (IsWin64) - return CalleeSavedRegClassesWin64; - else - return (callsEHReturn ? - CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit); - } else { - return (callsEHReturn ? - CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit); - } -} - BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); // Set the stack-pointer register and its aliases as reserved. @@ -696,8 +682,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // } // [EBP] MFI->CreateFixedObject(-TailCallReturnAddrDelta, - (-1U*SlotSize)+TailCallReturnAddrDelta, - true, false); + (-1U*SlotSize)+TailCallReturnAddrDelta, true); } if (hasFP(MF)) { @@ -710,7 +695,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, -(int)SlotSize + TFI.getOffsetOfLocalArea() + TailCallReturnAddrDelta, - true, false); + true); assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); FrameIdx = 0; @@ -1240,8 +1225,8 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, if (CSSize) { unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; MachineInstr *MI = - addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), - FramePtr, false, -CSSize); + addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), + FramePtr, false, -CSSize); MBB.insert(MBBI, MI); } else { BuildMI(MBB, MBBI, DL, @@ -1301,9 +1286,11 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, for (unsigned i = 0; i != 5; ++i) MIB.addOperand(MBBI->getOperand(i)); } else if (RetOpcode == X86::TCRETURNri64) { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). + addReg(JumpTarget.getReg(), RegState::Kill); } else { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index d0b82e2..d852bcd 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -105,12 +105,6 @@ public: /// callee-save registers on this target. const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - /// getCalleeSavedRegClasses - Return a null-terminated list of the preferred - /// register classes to spill each callee-saved register with. The order and - /// length of this list match the getCalleeSavedRegs() list. - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - /// getReservedRegs - Returns a bitset indexed by physical register number /// indicating if a register is a special register that has particular uses and /// should be considered unavailable at all times, e.g. SP, RA. This is used by diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 91cfaa9..9f0382e 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -147,7 +147,7 @@ let Namespace = "X86" in { def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>; def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>; def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>; - + // Pseudo Floating Point registers def FP0 : Register<"fp0">; def FP1 : Register<"fp1">; @@ -155,7 +155,7 @@ let Namespace = "X86" in { def FP3 : Register<"fp3">; def FP4 : Register<"fp4">; def FP5 : Register<"fp5">; - def FP6 : Register<"fp6">; + def FP6 : Register<"fp6">; // XMM Registers, used by the various SSE instruction set extensions. // The sub_ss and sub_sd subregs are the same registers with another regclass. @@ -357,7 +357,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, }]; } -def GR32 : RegisterClass<"X86", [i32], 32, +def GR32 : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; @@ -412,7 +412,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, // GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since // RIP isn't really a register and it can't be used anywhere except in an // address, but it doesn't cause trouble. -def GR64 : RegisterClass<"X86", [i64], 64, +def GR64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), @@ -446,7 +446,7 @@ def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> { } // Debug registers. -def DEBUG_REG : RegisterClass<"X86", [i32], 32, +def DEBUG_REG : RegisterClass<"X86", [i32], 32, [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> { } @@ -780,14 +780,14 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, } // Generic vector registers: VR64 and VR128. -def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64, v2f32], 64, +def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64, [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>; def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; - + let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -803,11 +803,27 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, } }]; } -def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, + +def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15]> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; + + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + VR256Class::iterator + VR256Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (!Subtarget.is64Bit()) + return end()-8; // Only YMM0 to YMM7 are available in 32-bit mode. + else + return end(); + } + }]; } // Status flags registers. diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 09a2685..4a10be5 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -53,9 +53,12 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { if (GV->hasDLLImportLinkage()) return X86II::MO_DLLIMPORT; - // Materializable GVs (in JIT lazy compilation mode) do not require an - // extra load from stub. - bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); + // Determine whether this is a reference to a definition or a declaration. + // Materializable GVs (in JIT lazy compilation mode) do not require an extra + // load from stub. + bool isDecl = GV->hasAvailableExternallyLinkage(); + if (GV->isDeclaration() && !GV->isMaterializable()) + isDecl = true; // X86-64 in PIC mode. if (isPICStyleRIPRel()) { @@ -293,12 +296,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , IsBTMemSlow(false) , IsUAMemFast(false) , HasVectorUAMem(false) - , DarwinVers(0) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) - , Is64Bit(is64Bit) - , TargetType(isELF) { // Default to ELF unless otherwise specified. + , TargetTriple(TT) + , Is64Bit(is64Bit) { // default to hard float ABI if (FloatABIType == FloatABI::Default) @@ -328,47 +330,40 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, HasCMov = true; } - DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Set the boolean corresponding to the current target triple, or the default - // if one cannot be determined, to true. - if (TT.length() > 5) { - size_t Pos; - if ((Pos = TT.find("-darwin")) != std::string::npos) { - TargetType = isDarwin; - - // Compute the darwin version number. - if (isdigit(TT[Pos+7])) - DarwinVers = atoi(&TT[Pos+7]); - else - DarwinVers = 8; // Minimum supported darwin is Tiger. - } else if (TT.find("linux") != std::string::npos) { - // Linux doesn't imply ELF, but we don't currently support anything else. - TargetType = isELF; - } else if (TT.find("cygwin") != std::string::npos) { - TargetType = isCygwin; - } else if (TT.find("mingw") != std::string::npos) { - TargetType = isMingw; - } else if (TT.find("win32") != std::string::npos) { - TargetType = isWindows; - } else if (TT.find("windows") != std::string::npos) { - TargetType = isWindows; - } else if (TT.find("-cl") != std::string::npos) { - TargetType = isDarwin; - DarwinVers = 9; - } - } - // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64 // bit targets. - if (TargetType == isDarwin || Is64Bit) + if (isTargetDarwin() || Is64Bit) stackAlignment = 16; if (StackAlignment) stackAlignment = StackAlignment; } + +/// IsCalleePop - Determines whether the callee is required to pop its +/// own arguments. Callee pop is necessary to support tail calls. +bool X86Subtarget::IsCalleePop(bool IsVarArg, + CallingConv::ID CallingConv) const { + if (IsVarArg) + return false; + + switch (CallingConv) { + default: + return false; + case CallingConv::X86_StdCall: + return !is64Bit(); + case CallingConv::X86_FastCall: + return !is64Bit(); + case CallingConv::X86_ThisCall: + return !is64Bit(); + case CallingConv::Fast: + return GuaranteedTailCallOpt; + case CallingConv::GHC: + return GuaranteedTailCallOpt; + } +} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 646af91..486dbc4 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -14,7 +14,9 @@ #ifndef X86SUBTARGET_H #define X86SUBTARGET_H +#include "llvm/ADT/Triple.h" #include "llvm/Target/TargetSubtarget.h" +#include "llvm/CallingConv.h" #include <string> namespace llvm { @@ -88,10 +90,6 @@ protected: /// operands. This may require setting a feature bit in the processor. bool HasVectorUAMem; - /// DarwinVers - Nonzero if this is a darwin platform: the numeric - /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. - unsigned char DarwinVers; // Is any darwin-x86 platform. - /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -99,6 +97,9 @@ protected: /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. /// unsigned MaxInlineSizeThreshold; + + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; private: /// Is64Bit - True if the processor supports 64-bit instructions and @@ -106,9 +107,6 @@ private: bool Is64Bit; public: - enum { - isELF, isCygwin, isDarwin, isWindows, isMingw - } TargetType; /// This constructor initializes the data members to match that /// of the specified triple. @@ -157,24 +155,31 @@ public: bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } - bool isTargetDarwin() const { return TargetType == isDarwin; } - bool isTargetELF() const { return TargetType == isELF; } + bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + + // ELF is a reasonably sane default and the only other X86 targets we + // support are Darwin and Windows. Just use "not those". + bool isTargetELF() const { + return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing(); + } + bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } - bool isTargetWindows() const { return TargetType == isWindows; } - bool isTargetMingw() const { return TargetType == isMingw; } - bool isTargetCygwin() const { return TargetType == isCygwin; } + bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; } + bool isTargetMingw() const { + return TargetTriple.getOS() == Triple::MinGW32 || + TargetTriple.getOS() == Triple::MinGW64; } + bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; } bool isTargetCygMing() const { - return TargetType == isMingw || TargetType == isCygwin; + return isTargetMingw() || isTargetCygwin(); } - + /// isTargetCOFF - Return true if this is any COFF/Windows target variant. bool isTargetCOFF() const { - return TargetType == isMingw || TargetType == isCygwin || - TargetType == isWindows; + return isTargetMingw() || isTargetCygwin() || isTargetWindows(); } bool isTargetWin64() const { - return Is64Bit && (TargetType == isMingw || TargetType == isWindows); + return Is64Bit && (isTargetMingw() || isTargetWindows()); } std::string getDataLayout() const { @@ -208,7 +213,10 @@ public: /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, /// 10 = Snow Leopard, etc. - unsigned getDarwinVers() const { return DarwinVers; } + unsigned getDarwinVers() const { + if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber(); + return 0; + } /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel @@ -237,6 +245,9 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; + + /// IsCalleePop - Test whether a function should pop its own arguments. + bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f2c5058..df00d3f 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -173,14 +173,18 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, // Install an instruction selector. PM.add(createX86ISelDag(*this, OptLevel)); - // Install a pass to insert x87 FP_REG_KILL instructions, as needed. - PM.add(createX87FPRegKillInserterPass()); + // For 32-bit, prepend instructions to set the "global base reg" for PIC. + if (!Subtarget.is64Bit()) + PM.add(createGlobalBaseRegPass()); return false; } bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + // Install a pass to insert x87 FP_REG_KILL instructions, as needed. + PM.add(createX87FPRegKillInserterPass()); + PM.add(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index c100c59..6656bdc 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -138,7 +138,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // FALL THROUGH case GlobalValue::InternalLinkage: case GlobalValue::PrivateLinkage: - case GlobalValue::LinkerPrivateLinkage: break; case GlobalValue::DLLImportLinkage: llvm_unreachable("DLLImport linkage is not supported by this target!"); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index b230572..abe7b2f 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -245,7 +245,7 @@ SDValue XCoreTargetLowering:: LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32); // If it's a debug information descriptor, don't mess with it. if (DAG.isVerifiedDebugInfoDesc(Op)) return GA; @@ -269,7 +269,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const DebugLoc dl = Op.getDebugLoc(); // transform to label + getid() * size const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); if (!GVar) { // If GV is an alias then use the aliasee to determine size @@ -454,12 +454,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const if (LD->getAlignment() == 2) { int SVOffset = LD->getSrcValueOffset(); - SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, dl, Chain, BasePtr, LD->getSrcValue(), SVOffset, MVT::i16, LD->isVolatile(), LD->isNonTemporal(), 2); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); - SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain, + SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, dl, Chain, HighAddr, LD->getSrcValue(), SVOffset + 2, MVT::i16, LD->isVolatile(), LD->isNonTemporal(), 2); @@ -812,6 +812,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -826,7 +827,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CallingConv::Fast: case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); } } @@ -839,6 +840,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -866,7 +868,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -919,7 +921,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); @@ -1072,7 +1074,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(ObjSize, LRSaveSize + VA.getLocMemOffset(), - true, false); + true); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -1097,7 +1099,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // address for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) { // Create a stack slot - int FI = MFI->CreateFixedObject(4, offset, true, false); + int FI = MFI->CreateFixedObject(4, offset, true); if (i == FirstVAReg) { XFI->setVarArgsFrameIndex(FI); } @@ -1120,7 +1122,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // This will point to the next argument passed via stack. XFI->setVarArgsFrameIndex( MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(), - true, false)); + true)); } } @@ -1133,19 +1135,19 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, bool XCoreTargetLowering:: CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<EVT> &OutTys, - const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) const { + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), - RVLocs, *DAG.getContext()); - return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_XCore); + RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_XCore); } SDValue XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of @@ -1175,7 +1177,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad @@ -1221,23 +1223,22 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) - .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) + .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -1250,11 +1251,12 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII.get(XCore::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII.get(XCore::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -1379,7 +1381,6 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, SDValue Mul0, Mul1, Addend0, Addend1; if (N->getValueType(0) == MVT::i32 && isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) { - SDValue Zero = DAG.getConstant(0, MVT::i32); SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(MVT::i32, MVT::i32), Mul0, Mul1, Addend0, Addend1); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index d8d2a3a..febc198 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -120,6 +120,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -178,6 +179,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -186,13 +188,13 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<EVT> &OutTys, - const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) const; + const SmallVectorImpl<ISD::OutputArg> &ArgsFlags, + LLVMContext &Context) const; }; } diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index 5260258..dd90ea9 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -299,9 +299,8 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, unsigned XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond)const{ - // FIXME there should probably be a DebugLoc argument here - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL)const{ // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -310,11 +309,11 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, if (FBB == 0) { // One way branch. if (Cond.empty()) { // Unconditional branch - BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(TBB); + BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB); } else { // Conditional branch. unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm()); - BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()) .addMBB(TBB); } return 1; @@ -323,9 +322,9 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, // Two-way Conditional branch. assert(Cond.size() == 2 && "Unexpected number of components!"); unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm()); - BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()) .addMBB(TBB); - BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(FBB); + BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(FBB); return 2; } @@ -357,37 +356,31 @@ XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - if (DestRC == SrcRC) { - if (DestRC == XCore::GRRegsRegisterClass) { - BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg) - .addReg(SrcReg) - .addImm(0); - return true; - } else { - return false; - } +void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool GRDest = XCore::GRRegsRegClass.contains(DestReg); + bool GRSrc = XCore::GRRegsRegClass.contains(SrcReg); + + if (GRDest && GRSrc) { + BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + return; } - if (SrcRC == XCore::RRegsRegisterClass && SrcReg == XCore::SP && - DestRC == XCore::GRRegsRegisterClass) { - BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg) - .addImm(0); - return true; + if (GRDest && SrcReg == XCore::SP) { + BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0); + return; } - if (DestRC == XCore::RRegsRegisterClass && DestReg == XCore::SP && - SrcRC == XCore::GRRegsRegisterClass) { + + if (DestReg == XCore::SP && GRSrc) { BuildMI(MBB, I, DL, get(XCore::SETSP_1r)) - .addReg(SrcReg); - return true; + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return false; + llvm_unreachable("Impossible reg-to-reg copy"); } void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -438,8 +431,10 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(it->getReg()); - storeRegToStackSlot(MBB, MI, it->getReg(), true, - it->getFrameIdx(), it->getRegClass(), &RI); + unsigned Reg = it->getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + storeRegToStackSlot(MBB, MI, Reg, true, + it->getFrameIdx(), RC, &RI); if (emitFrameMoves) { MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol(); BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel); @@ -460,10 +455,11 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, --BeforeI; for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin(); it != CSI.end(); ++it) { - + unsigned Reg = it->getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(), - it->getRegClass(), &RI); + RC, &RI); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert multiple diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 9035ea9..e5b0171 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -58,17 +58,16 @@ public: bool AllowModify) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index dd3cbc1..19b9b1f 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -733,7 +733,7 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), // TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out, // in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp, // tsetmr, sext (reg), zext (reg) -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let neverHasSideEffects = 1 in def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), "sext $dst, $src2", diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 0cfb358..2a88342 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -82,18 +82,6 @@ const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) return CalleeSavedRegs; } -const TargetRegisterClass* const* -XCoreRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, - XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, - XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, - XCore::GRRegsRegisterClass, XCore::RRegsRegisterClass, - 0 - }; - return CalleeSavedRegClasses; -} - BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(XCore::CP); @@ -320,7 +308,7 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx; if (! isVarArg) { // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true, false); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true); } else { FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 5bdd059..66132ba 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -44,9 +44,6 @@ public: const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool requiresRegisterScavenging(const MachineFunction &MF) const; diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index 37d7a00..abfa514 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -28,7 +28,7 @@ namespace { Hello() : FunctionPass(&ID) {} virtual bool runOnFunction(Function &F) { - HelloCounter++; + ++HelloCounter; errs() << "Hello: "; errs().write_escaped(F.getName()) << '\n'; return false; @@ -46,7 +46,7 @@ namespace { Hello2() : FunctionPass(&ID) {} virtual bool runOnFunction(Function &F) { - HelloCounter++; + ++HelloCounter; errs() << "Hello: "; errs().write_escaped(F.getName()) << '\n'; return false; diff --git a/lib/Transforms/Hello/Hello.exports b/lib/Transforms/Hello/Hello.exports new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/lib/Transforms/Hello/Hello.exports diff --git a/lib/Transforms/Hello/Makefile b/lib/Transforms/Hello/Makefile index c5e75d4..f1e3148 100644 --- a/lib/Transforms/Hello/Makefile +++ b/lib/Transforms/Hello/Makefile @@ -12,5 +12,13 @@ LIBRARYNAME = LLVMHello LOADABLE_MODULE = 1 USEDLIBS = +# If we don't need RTTI or EH, there's no reason to export anything +# from the hello plugin. +ifneq ($(REQUIRES_RTTI), 1) +ifneq ($(REQUIRES_EH), 1) +EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/Hello.exports +endif +endif + include $(LEVEL)/Makefile.common diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 89f213e..28ea079 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -360,19 +360,20 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { IndicesVector Operands; for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end(); UI != E; ++UI) { + User *U = *UI; Operands.clear(); - if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { if (LI->isVolatile()) return false; // Don't hack volatile loads Loads.push_back(LI); // Direct loads are equivalent to a GEP with a zero index and then a load. Operands.push_back(0); - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { if (GEP->use_empty()) { // Dead GEP's cause trouble later. Just remove them if we run into // them. getAnalysis<AliasAnalysis>().deleteValue(GEP); GEP->eraseFromParent(); - // TODO: This runs the above loop over and over again for dead GEPS + // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? return isSafeToPromoteArgument(Arg, isByVal); @@ -452,12 +453,14 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // Now check every path from the entry block to the load for transparency. // To do this, we perform a depth first search on the inverse CFG from the // loading block. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> > - I = idf_ext_begin(*PI, TranspBlocks), - E = idf_ext_end(*PI, TranspBlocks); I != E; ++I) + I = idf_ext_begin(P, TranspBlocks), + E = idf_ext_end(P, TranspBlocks); I != E; ++I) if (AA.canBasicBlockModify(**I, Arg, LoadSize)) return false; + } } // If the path from the entry of the function to each load is free of diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 692e47d..475eee8 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -120,9 +120,14 @@ namespace { typedef SmallVector<RetOrArg, 5> UseVector; + protected: + // DAH uses this to specify a different ID. + explicit DAE(void *ID) : ModulePass(ID) {} + public: static char ID; // Pass identification, replacement for typeid DAE() : ModulePass(&ID) {} + bool runOnModule(Module &M); virtual bool ShouldHackArguments() const { return false; } @@ -155,6 +160,8 @@ namespace { /// by bugpoint. struct DAH : public DAE { static char ID; + DAH() : DAE(&ID) {} + virtual bool ShouldHackArguments() const { return true; } }; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index b429213..735a1c4 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -160,13 +160,12 @@ static bool SafeToDestroyConstant(const Constant *C) { static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, SmallPtrSet<const PHINode*, 16> &PHIUsers) { for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; - ++UI) - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { + ++UI) { + const User *U = *UI; + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { GS.HasNonInstructionUser = true; - if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; - - } else if (const Instruction *I = dyn_cast<Instruction>(*UI)) { + } else if (const Instruction *I = dyn_cast<Instruction>(U)) { if (!GS.HasMultipleAccessingFunctions) { const Function *F = I->getParent()->getParent(); if (GS.AccessingFunction == 0) @@ -221,18 +220,21 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, if (AnalyzeGlobal(I, GS, PHIUsers)) return true; GS.HasPHIUser = true; } else if (isa<CmpInst>(I)) { + // Nothing to analyse. } else if (isa<MemTransferInst>(I)) { - if (I->getOperand(1) == V) + const MemTransferInst *MTI = cast<MemTransferInst>(I); + if (MTI->getArgOperand(0) == V) GS.StoredType = GlobalStatus::isStored; - if (I->getOperand(2) == V) + if (MTI->getArgOperand(1) == V) GS.isLoaded = true; } else if (isa<MemSetInst>(I)) { - assert(I->getOperand(1) == V && "Memset only takes one pointer!"); + assert(cast<MemSetInst>(I)->getArgOperand(0) == V && + "Memset only takes one pointer!"); GS.StoredType = GlobalStatus::isStored; } else { return true; // Any other non-load instruction might take address! } - } else if (const Constant *C = dyn_cast<Constant>(*UI)) { + } else if (const Constant *C = dyn_cast<Constant>(U)) { GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. if (!SafeToDestroyConstant(C)) @@ -242,6 +244,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, // Otherwise must be some other user. return true; } + } return false; } @@ -1304,7 +1307,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), - NElems, + NElems, 0, CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); @@ -1323,8 +1326,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // if (F2) { free(F2); F2 = 0; } // } // The malloc can also fail if its argument is too large. - Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0); - Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1), + Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); + Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), ConstantZero, "isneg"); for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], @@ -1511,10 +1514,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is an allocation of a fixed size array of structs, analyze as a // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) + if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) AllocTy = AT->getElementType(); - + const StructType *AllocSTy = dyn_cast<StructType>(AllocTy); if (!AllocSTy) return false; @@ -1533,7 +1536,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements, - CI->getName()); + 0, CI->getName()); Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); @@ -1597,13 +1600,15 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GVElType->isFloatingPointTy() || GVElType->isPointerTy() || GVElType->isVectorTy()) return false; - + // Walk the use list of the global seeing if all the uses are load or store. // If there is anything else, bail out. - for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I) - if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ + User *U = *I; + if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) return false; - + } + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); // Create the new global, initializing it to false. @@ -1641,7 +1646,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // bool. Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); - // If we're already replaced the input, StoredVal will be a cast or + // If we've already replaced the input, StoredVal will be a cast or // select instruction. If not, it will be a load of the original // global. if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { @@ -2260,8 +2265,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, getVal(Values, CI->getOperand(0)), CI->getType()); } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { - InstResult = - ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), + InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), getVal(Values, SI->getOperand(1)), getVal(Values, SI->getOperand(2))); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { @@ -2302,7 +2306,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (!Callee) return false; // Cannot resolve. SmallVector<Constant*, 8> Formals; - for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); + CallSite CS(CI); + for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) Formals.push_back(getVal(Values, *i)); diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index df2456f..e4db235 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -85,15 +85,16 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { unsigned NumNonconstant = 0; for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { + User *U = *UI; // Ignore blockaddress uses. - if (isa<BlockAddress>(*UI)) continue; + if (isa<BlockAddress>(U)) continue; // Used by a non-instruction, or not the callee of a function, do not // transform. - if (!isa<CallInst>(*UI) && !isa<InvokeInst>(*UI)) + if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) return false; - CallSite CS = CallSite::get(cast<Instruction>(*UI)); + CallSite CS = CallSite::get(cast<Instruction>(U)); if (!CS.isCallee(UI)) return false; diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index b785bb0..027a220 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -468,7 +468,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // move a call site to a function in this SCC before the // 'FirstCallInSCC' barrier. if (SCC.isSingular()) { - std::swap(CallSites[CSi], CallSites.back()); + CallSites[CSi] = CallSites.back(); CallSites.pop_back(); } else { CallSites.erase(CallSites.begin()+CSi); diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index 4d61e83..76cfef8 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -42,6 +42,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Transforms/Utils/Local.h" @@ -262,8 +263,8 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) // char*. It returns "void", so it doesn't need to replace any of // Inst's uses and doesn't get a name. CastInst* CI = - new BitCastInst(Inst->getOperand(1), SBPTy, "LJBuf", Inst); - Value *Args[] = { CI, Inst->getOperand(2) }; + new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst); + Value *Args[] = { CI, Inst->getArgOperand(1) }; CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst); SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()]; @@ -378,7 +379,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext()); CastInst* BufPtr = - new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst); + new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst); Value *Args[] = { GetSetJmpMap(Func), BufPtr, ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++) @@ -405,12 +406,14 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) // Loop over all of the uses of instruction. If any of them are after the // call, "spill" the value to the stack. for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); - UI != E; ++UI) - if (cast<Instruction>(*UI)->getParent() != ABlock || - InstrsAfterCall.count(cast<Instruction>(*UI))) { + UI != E; ++UI) { + User *U = *UI; + if (cast<Instruction>(U)->getParent() != ABlock || + InstrsAfterCall.count(cast<Instruction>(U))) { DemoteRegToStack(*II); break; } + } InstrsAfterCall.clear(); // Change the setjmp call into a branch statement. We'll remove the @@ -473,7 +476,8 @@ void LowerSetJmp::visitCallInst(CallInst& CI) // Construct the new "invoke" instruction. TerminatorInst* Term = OldBB->getTerminator(); - std::vector<Value*> Params(CI.op_begin() + 1, CI.op_end()); + CallSite CS(&CI); + std::vector<Value*> Params(CS.arg_begin(), CS.arg_end()); InvokeInst* II = InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func], Params.begin(), Params.end(), CI.getName(), Term); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 622a9b5..55d5e2a 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -146,7 +146,7 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { switch(Ty1->getTypeID()) { default: llvm_unreachable("Unknown type!"); - // Fall through in Release-Asserts mode. + // Fall through in Release mode. case Type::IntegerTyID: case Type::OpaqueTyID: // Ty1 == Ty2 would have returned true earlier. @@ -535,6 +535,7 @@ static LinkageCategory categorize(const Function *F) { case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::ExternalWeakLinkage: + case GlobalValue::LinkerPrivateWeakLinkage: return ExternalWeak; case GlobalValue::ExternalLinkage: diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 07525ea..6b9814c 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -66,13 +66,13 @@ Function* PartialInliner::unswitchFunction(Function* F) { return 0; // Clone the function, so that we can hack away on it. - DenseMap<const Value*, Value*> ValueMap; - Function* duplicateFunction = CloneFunction(F, ValueMap); + ValueMap<const Value*, Value*> VMap; + Function* duplicateFunction = CloneFunction(F, VMap); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(duplicateFunction); - BasicBlock* newEntryBlock = cast<BasicBlock>(ValueMap[entryBlock]); - BasicBlock* newReturnBlock = cast<BasicBlock>(ValueMap[returnBlock]); - BasicBlock* newNonReturnBlock = cast<BasicBlock>(ValueMap[nonReturnBlock]); + BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); + BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]); + BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp index 084b94e..58e1448 100644 --- a/lib/Transforms/IPO/PartialSpecialization.cpp +++ b/lib/Transforms/IPO/PartialSpecialization.cpp @@ -32,6 +32,10 @@ using namespace llvm; STATISTIC(numSpecialized, "Number of specialized functions created"); +STATISTIC(numReplaced, "Number of callers replaced by specialization"); + +// Maximum number of arguments markable interested +static const int MaxInterests = 6; // Call must be used at least occasionally static const int CallsMin = 5; @@ -40,8 +44,9 @@ static const int CallsMin = 5; static const double ConstValPercent = .1; namespace { + typedef SmallVector<int, MaxInterests> InterestingArgVector; class PartSpec : public ModulePass { - void scanForInterest(Function&, SmallVector<int, 6>&); + void scanForInterest(Function&, InterestingArgVector&); int scanDistribution(Function&, int, std::map<Constant*, int>&); public : static char ID; // Pass identification, replacement for typeid @@ -59,13 +64,15 @@ X("partialspecialization", "Partial Specialization"); // a call to the specialized function. Returns the specialized function static Function* SpecializeFunction(Function* F, - DenseMap<const Value*, Value*>& replacements) { + ValueMap<const Value*, Value*>& replacements) { // arg numbers of deleted arguments - DenseSet<unsigned> deleted; - for (DenseMap<const Value*, Value*>::iterator + DenseMap<unsigned, const Argument*> deleted; + for (ValueMap<const Value*, Value*>::iterator repb = replacements.begin(), repe = replacements.end(); - repb != repe; ++repb) - deleted.insert(cast<Argument>(repb->first)->getArgNo()); + repb != repe; ++repb) { + Argument const *arg = cast<const Argument>(repb->first); + deleted[arg->getArgNo()] = arg; + } Function* NF = CloneFunction(F, replacements); NF->setLinkage(GlobalValue::InternalLinkage); @@ -80,9 +87,23 @@ SpecializeFunction(Function* F, if (CS.getCalledFunction() == F) { SmallVector<Value*, 6> args; - for (unsigned x = 0; x < CS.arg_size(); ++x) - if (!deleted.count(x)) - args.push_back(CS.getArgument(x)); + // Assemble the non-specialized arguments for the updated callsite. + // In the process, make sure that the specialized arguments are + // constant and match the specialization. If that's not the case, + // this callsite needs to call the original or some other + // specialization; don't change it here. + CallSite::arg_iterator as = CS.arg_begin(), ae = CS.arg_end(); + for (CallSite::arg_iterator ai = as; ai != ae; ++ai) { + DenseMap<unsigned, const Argument*>::iterator delit = deleted.find( + std::distance(as, ai)); + if (delit == deleted.end()) + args.push_back(cast<Value>(ai)); + else { + Constant *ci = dyn_cast<Constant>(ai); + if (!(ci && ci == replacements[delit->second])) + goto next_use; + } + } Value* NCall; if (CallInst *CI = dyn_cast<CallInst>(i)) { NCall = CallInst::Create(NF, args.begin(), args.end(), @@ -99,8 +120,11 @@ SpecializeFunction(Function* F, } CS.getInstruction()->replaceAllUsesWith(NCall); CS.getInstruction()->eraseFromParent(); + ++numReplaced; } } + next_use: + ; } return NF; } @@ -111,7 +135,7 @@ bool PartSpec::runOnModule(Module &M) { for (Module::iterator I = M.begin(); I != M.end(); ++I) { Function &F = *I; if (F.isDeclaration() || F.mayBeOverridden()) continue; - SmallVector<int, 6> interestingArgs; + InterestingArgVector interestingArgs; scanForInterest(F, interestingArgs); // Find the first interesting Argument that we can specialize on @@ -126,7 +150,7 @@ bool PartSpec::runOnModule(Module &M) { ee = distribution.end(); ii != ee; ++ii) if (total > ii->second && ii->first && ii->second > total * ConstValPercent) { - DenseMap<const Value*, Value*> m; + ValueMap<const Value*, Value*> m; Function::arg_iterator arg = F.arg_begin(); for (int y = 0; y < interestingArgs[x]; ++y) ++arg; @@ -143,7 +167,7 @@ bool PartSpec::runOnModule(Module &M) { /// scanForInterest - This function decides which arguments would be worth /// specializing on. -void PartSpec::scanForInterest(Function& F, SmallVector<int, 6>& args) { +void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) { for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end(); ii != ee; ++ii) { for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end(); diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 6bc8e66..12e8db8 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -73,6 +73,19 @@ namespace { AU.setPreservesAll(); } }; + + class StripDeadDebugInfo : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + explicit StripDeadDebugInfo() + : ModulePass(&ID) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; } char StripSymbols::ID = 0; @@ -99,6 +112,14 @@ ModulePass *llvm::createStripDebugDeclarePass() { return new StripDebugDeclare(); } +char StripDeadDebugInfo::ID = 0; +static RegisterPass<StripDeadDebugInfo> +A("strip-dead-debug-info", "Strip debug info for unused symbols"); + +ModulePass *llvm::createStripDeadDebugInfoPass() { + return new StripDeadDebugInfo(); +} + /// OnlyUsedBy - Return true if V is only used by Usr. static bool OnlyUsedBy(Value *V, Value *Usr) { for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { @@ -223,27 +244,27 @@ static bool StripDebugInfo(Module &M) { Changed = true; } - NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); - if (NMD) { - Changed = true; - NMD->eraseFromParent(); - } - - NMD = M.getNamedMetadata("llvm.dbg.lv"); - if (NMD) { - Changed = true; - NMD->eraseFromParent(); + for (Module::named_metadata_iterator NMI = M.named_metadata_begin(), + NME = M.named_metadata_end(); NMI != NME;) { + NamedMDNode *NMD = NMI; + ++NMI; + if (NMD->getName().startswith("llvm.dbg.")) { + NMD->eraseFromParent(); + Changed = true; + } } - + unsigned MDDbgKind = M.getMDKindID("dbg"); - for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) + for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI) for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; - ++BI) + ++BI) { + Changed = true; // FIXME: Only set if there was debug metadata. BI->setMetadata(MDDbgKind, 0); + } - return true; + return Changed; } bool StripSymbols::runOnModule(Module &M) { @@ -266,8 +287,8 @@ bool StripDebugDeclare::runOnModule(Module &M) { if (Declare) { while (!Declare->use_empty()) { CallInst *CI = cast<CallInst>(Declare->use_back()); - Value *Arg1 = CI->getOperand(1); - Value *Arg2 = CI->getOperand(2); + Value *Arg1 = CI->getArgOperand(0); + Value *Arg2 = CI->getArgOperand(1); assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); if (Arg1->use_empty()) { @@ -295,3 +316,83 @@ bool StripDebugDeclare::runOnModule(Module &M) { return true; } + +/// getRealLinkageName - If special LLVM prefix that is used to inform the asm +/// printer to not emit usual symbol prefix before the symbol name is used then +/// return linkage name after skipping this special LLVM prefix. +static StringRef getRealLinkageName(StringRef LinkageName) { + char One = '\1'; + if (LinkageName.startswith(StringRef(&One, 1))) + return LinkageName.substr(1); + return LinkageName; +} + +bool StripDeadDebugInfo::runOnModule(Module &M) { + bool Changed = false; + + // Debugging infomration is encoded in llvm IR using metadata. This is designed + // such a way that debug info for symbols preserved even if symbols are + // optimized away by the optimizer. This special pass removes debug info for + // such symbols. + + // llvm.dbg.gv keeps track of debug info for global variables. + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) { + SmallVector<MDNode *, 8> MDs; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + if (DIGlobalVariable(NMD->getOperand(i)).Verify()) + MDs.push_back(NMD->getOperand(i)); + else + Changed = true; + NMD->eraseFromParent(); + NMD = NULL; + + for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(), + E = MDs.end(); I != E; ++I) { + if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(), + true)) { + if (!NMD) + NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addOperand(*I); + } + else + Changed = true; + } + } + + // llvm.dbg.sp keeps track of debug info for subprograms. + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) { + SmallVector<MDNode *, 8> MDs; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + if (DISubprogram(NMD->getOperand(i)).Verify()) + MDs.push_back(NMD->getOperand(i)); + else + Changed = true; + NMD->eraseFromParent(); + NMD = NULL; + + for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(), + E = MDs.end(); I != E; ++I) { + bool FnIsLive = false; + if (Function *F = DISubprogram(*I).getFunction()) + if (M.getFunction(F->getName())) + FnIsLive = true; + if (FnIsLive) { + if (!NMD) + NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(*I); + } else { + // Remove llvm.dbg.lv.fnname named mdnode which may have been used + // to hold debug info for dead function's local variables. + StringRef FName = DISubprogram(*I).getLinkageName(); + if (FName.empty()) + FName = DISubprogram(*I).getName(); + if (NamedMDNode *LVNMD = + M.getNamedMetadata(Twine("llvm.dbg.lv.", + getRealLinkageName(FName)))) + LVNMD->eraseFromParent(); + } + } + } + + return Changed; +} diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp index 473e83c..a74686f 100644 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -107,12 +107,12 @@ CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { // Check if it is ok to perform this promotion. if (isSafeToUpdateAllCallers(F) == false) { DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n"); - NumRejectedSRETUses++; + ++NumRejectedSRETUses; return 0; } DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n"); - NumSRET++; + ++NumSRET; // [1] Replace use of sret parameter AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", F->getEntryBlock().begin()); @@ -171,16 +171,16 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { // Check FirstArg's users. for (Value::use_iterator ArgI = FirstArg->use_begin(), ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) { - + User *U = *ArgI; // If FirstArg user is a CallInst that does not correspond to current // call site then this function F is not suitable for sret promotion. - if (CallInst *CI = dyn_cast<CallInst>(ArgI)) { + if (CallInst *CI = dyn_cast<CallInst>(U)) { if (CI != Call) return false; } // If FirstArg user is a GEP whose all users are not LoadInst then // this function F is not suitable for sret promotion. - else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(ArgI)) { + else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { // TODO : Use dom info and insert PHINodes to collect get results // from multiple call sites for this GEP. if (GEP->getParent() != Call->getParent()) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index c7b04a4..24e0528 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -178,7 +178,8 @@ public: Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); - Instruction *visitFree(Instruction &FI); + Instruction *visitMalloc(Instruction &FI); + Instruction *visitFree(CallInst &FI); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); Instruction *visitBranchInst(BranchInst &BI); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 8586054..3f4a857 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1584,6 +1584,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if ((match(A, m_Not(m_Specific(B))) && match(D, m_Not(m_Specific(C))))) return BinaryOperator::CreateXor(C, B); + + // ((A|B)&1)|(B&-2) -> (A&1) | B + if (match(A, m_Or(m_Value(V1), m_Specific(B))) || + match(A, m_Or(m_Specific(B), m_Value(V1)))) { + Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C); + if (Ret) return Ret; + } + // (B&-2)|((A|B)&1) -> (A&1) | B + if (match(B, m_Or(m_Specific(A), m_Value(V1))) || + match(B, m_Or(m_Value(V1), m_Specific(A)))) { + Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D); + if (Ret) return Ret; + } } // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. @@ -1599,19 +1612,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } - // ((A|B)&1)|(B&-2) -> (A&1) | B - if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); - if (Ret) return Ret; - } - // (B&-2)|((A|B)&1) -> (A&1) | B - if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); - if (Ret) return Ret; - } - // (~A | ~B) == (~(A & B)) - De Morgan's Law if (Value *Op0NotVal = dyn_castNotVal(Op0)) if (Value *Op1NotVal = dyn_castNotVal(Op1)) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 38e7b6e..85251a8 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -112,8 +112,8 @@ unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, } Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); - unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); + unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0)); + unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1)); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -125,7 +125,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with // load/store. - ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3)); + ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2)); if (MemOpLength == 0) return 0; // Source and destination pointer types are always "i8*" for intrinsic. See @@ -140,9 +140,9 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // Use an integer load+store unless we can find something better. unsigned SrcAddrSp = - cast<PointerType>(MI->getOperand(2)->getType())->getAddressSpace(); + cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace(); unsigned DstAddrSp = - cast<PointerType>(MI->getOperand(1)->getType())->getAddressSpace(); + cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); @@ -154,8 +154,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // an i64 load+store, here because this improves the odds that the source or // dest address will be promotable. See if we can find a better type than the // integer datatype. - Value *StrippedDest = MI->getOperand(1)->stripPointerCasts(); - if (StrippedDest != MI->getOperand(1)) { + Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); + if (StrippedDest != MI->getArgOperand(0)) { const Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { @@ -189,15 +189,15 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewSrcPtrTy); - Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewDstPtrTy); + Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); + Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign); InsertNewInstBefore(L, *MI); InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign), *MI); // Set the size of the copy to 0, it will be deleted on the next iteration. - MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); + MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); return MI; } @@ -250,6 +250,8 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (isFreeCall(&CI)) return visitFree(CI); + if (isMalloc(&CI)) + return visitMalloc(CI); // If the caller function is nounwind, mark the call as nounwind, even if the // callee isn't. @@ -261,7 +263,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); if (!II) return visitCallSite(&CI); - + // Intrinsics cannot occur in an invoke, so handle them here instead of in // visitCallSite. if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) { @@ -287,11 +289,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (GVSrc->isConstant()) { Module *M = CI.getParent()->getParent()->getParent(); Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[3] = { CI.getOperand(1)->getType(), - CI.getOperand(2)->getType(), - CI.getOperand(3)->getType() }; - CI.setCalledFunction( - Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); + const Type *Tys[3] = { CI.getArgOperand(0)->getType(), + CI.getArgOperand(1)->getType(), + CI.getArgOperand(2)->getType() }; + CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); Changed = true; } } @@ -311,7 +312,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Instruction *I = SimplifyMemSet(MSI)) return I; } - + if (Changed) return II; } @@ -322,10 +323,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (!TD) break; const Type *ReturnTy = CI.getType(); - bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1); + bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); // Get to the real allocated thing and offset as fast as possible. - Value *Op1 = II->getOperand(1)->stripPointerCasts(); + Value *Op1 = II->getArgOperand(0)->stripPointerCasts(); // If we've stripped down to a single global variable that we // can know the size of then just return that. @@ -393,7 +394,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset); return ReplaceInstUsesWith(CI, RetVal); - } // Do not return "I don't know" here. Later optimization passes could @@ -402,45 +402,45 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::bswap: // bswap(bswap(x)) -> x - if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1))) + if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) if (Operand->getIntrinsicID() == Intrinsic::bswap) - return ReplaceInstUsesWith(CI, Operand->getOperand(1)); + return ReplaceInstUsesWith(CI, Operand->getArgOperand(0)); // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) - if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) { + if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) { if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0))) if (Operand->getIntrinsicID() == Intrinsic::bswap) { unsigned C = Operand->getType()->getPrimitiveSizeInBits() - TI->getType()->getPrimitiveSizeInBits(); Value *CV = ConstantInt::get(Operand->getType(), C); - Value *V = Builder->CreateLShr(Operand->getOperand(1), CV); + Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV); return new TruncInst(V, TI->getType()); } } break; case Intrinsic::powi: - if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) { + if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // powi(x, 0) -> 1.0 if (Power->isZero()) return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); // powi(x, 1) -> x if (Power->isOne()) - return ReplaceInstUsesWith(CI, II->getOperand(1)); + return ReplaceInstUsesWith(CI, II->getArgOperand(0)); // powi(x, -1) -> 1/x if (Power->isAllOnesValue()) return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), - II->getOperand(1)); + II->getArgOperand(0)); } break; case Intrinsic::cttz: { // If all bits below the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); + const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth), + ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne); unsigned TrailingZeros = KnownOne.countTrailingZeros(); APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); @@ -453,11 +453,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ctlz: { // If all bits above the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); + const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth), + ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne); unsigned LeadingZeros = KnownOne.countLeadingZeros(); APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); @@ -468,8 +468,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; case Intrinsic::uadd_with_overflow: { - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); - const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt Mask = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0); @@ -513,19 +513,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // FALL THROUGH uadd into sadd case Intrinsic::sadd_with_overflow: // Canonicalize constants into the RHS. - if (isa<Constant>(II->getOperand(1)) && - !isa<Constant>(II->getOperand(2))) { - Value *LHS = II->getOperand(1); - II->setOperand(1, II->getOperand(2)); - II->setOperand(2, LHS); + if (isa<Constant>(II->getArgOperand(0)) && + !isa<Constant>(II->getArgOperand(1))) { + Value *LHS = II->getArgOperand(0); + II->setArgOperand(0, II->getArgOperand(1)); + II->setArgOperand(1, LHS); return II; } // X + undef -> undef - if (isa<UndefValue>(II->getOperand(2))) + if (isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X + 0 -> {X, false} if (RHS->isZero()) { Constant *V[] = { @@ -533,7 +533,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; @@ -541,38 +541,38 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ssub_with_overflow: // undef - X -> undef // X - undef -> undef - if (isa<UndefValue>(II->getOperand(1)) || - isa<UndefValue>(II->getOperand(2))) + if (isa<UndefValue>(II->getArgOperand(0)) || + isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X - 0 -> {X, false} if (RHS->isZero()) { Constant *V[] = { - UndefValue::get(II->getOperand(1)->getType()), + UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: // Canonicalize constants into the RHS. - if (isa<Constant>(II->getOperand(1)) && - !isa<Constant>(II->getOperand(2))) { - Value *LHS = II->getOperand(1); - II->setOperand(1, II->getOperand(2)); - II->setOperand(2, LHS); + if (isa<Constant>(II->getArgOperand(0)) && + !isa<Constant>(II->getArgOperand(1))) { + Value *LHS = II->getArgOperand(0); + II->setArgOperand(0, II->getArgOperand(1)); + II->setArgOperand(1, LHS); return II; } // X * undef -> undef - if (isa<UndefValue>(II->getOperand(2))) + if (isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) { + if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X*0 -> {0, false} if (RHSI->isZero()) return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); @@ -580,11 +580,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X * 1 -> {X, false} if (RHSI->equalsInt(1)) { Constant *V[] = { - UndefValue::get(II->getOperand(1)->getType()), + UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; @@ -595,8 +595,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_loadu_dq: // Turn PPC lvx -> load if the pointer is known aligned. // Turn X86 loadups -> load if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { - Value *Ptr = Builder->CreateBitCast(II->getOperand(1), + if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); } @@ -604,22 +604,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { + if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) { const Type *OpPtrTy = - PointerType::getUnqual(II->getOperand(1)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); - return new StoreInst(II->getOperand(1), Ptr); + PointerType::getUnqual(II->getArgOperand(0)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + return new StoreInst(II->getArgOperand(0), Ptr); } break; case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { + if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { const Type *OpPtrTy = - PointerType::getUnqual(II->getOperand(2)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); - return new StoreInst(II->getOperand(2), Ptr); + PointerType::getUnqual(II->getArgOperand(1)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); + return new StoreInst(II->getArgOperand(1), Ptr); } break; @@ -627,12 +627,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // These intrinsics only demands the 0th element of its input vector. If // we can simplify the input based on that, do so now. unsigned VWidth = - cast<VectorType>(II->getOperand(1)->getType())->getNumElements(); + cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); APInt DemandedElts(VWidth, 1); APInt UndefElts(VWidth, 0); - if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts)) { - II->setOperand(1, V); + II->setArgOperand(0, V); return II; } break; @@ -640,7 +640,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. - if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) { + if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) { assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); // Check that all of the elements are integer constants or undefs. @@ -655,8 +655,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (AllEltsOk) { // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); + Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), Mask->getType()); + Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType()); Value *Result = UndefValue::get(Op0->getType()); // Only extract each element once. @@ -689,7 +689,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. - if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) { + if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { if (SS->getIntrinsicID() == Intrinsic::stacksave) { BasicBlock::iterator BI = SS; if (&*++BI == II) @@ -772,13 +772,13 @@ protected: NewInstruction = IC->ReplaceInstUsesWith(*CI, With); } bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp))) { + if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - CallInst::ArgOffset))) { if (SizeCI->isAllOnesValue()) return true; if (isString) return SizeCI->getZExtValue() >= - GetStringLength(CI->getOperand(SizeArgOp)); - if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getOperand(SizeArgOp))) + GetStringLength(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset)); + if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset))) return SizeCI->getZExtValue() >= Arg->getZExtValue(); } return false; @@ -846,7 +846,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), CS.getInstruction()); - // If CS dues not return void then replaceAllUsesWith undef. + // If CS does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!CS.getInstruction()->getType()->isVoidTy()) CS.getInstruction()-> @@ -1140,7 +1140,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { IntrinsicInst *Tramp = cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); - Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts()); + Function *NestF = cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); @@ -1181,7 +1181,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { do { if (Idx == NestIdx) { // Add the chain argument and attributes. - Value *NestVal = Tramp->getOperand(3); + Value *NestVal = Tramp->getArgOperand(2); if (NestVal->getType() != NestTy) NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); NewArgs.push_back(NestVal); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index b0137c4..505a0bf 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -22,19 +22,18 @@ using namespace PatternMatch; /// X*Scale+Offset. /// static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, - int &Offset) { - assert(Val->getType()->isIntegerTy(32) && "Unexpected allocation size type!"); + uint64_t &Offset) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { Offset = CI->getZExtValue(); Scale = 0; - return ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0); + return ConstantInt::get(Val->getType(), 0); } if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { if (I->getOpcode() == Instruction::Shl) { // This is a value scaled by '1 << the shift amt'. - Scale = 1U << RHS->getZExtValue(); + Scale = UINT64_C(1) << RHS->getZExtValue(); Offset = 0; return I->getOperand(0); } @@ -100,7 +99,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // See if we can satisfy the modulus by pulling a scale out of the array // size argument. unsigned ArraySizeScale; - int ArrayOffset; + uint64_t ArrayOffset; Value *NumElements = // See if the array size is a decomposable linear expr. DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); @@ -114,13 +113,13 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, if (Scale == 1) { Amt = NumElements; } else { - Amt = ConstantInt::get(Type::getInt32Ty(CI.getContext()), Scale); + Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale); // Insert before the alloca, not before the cast. Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); } - if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { - Value *Off = ConstantInt::get(Type::getInt32Ty(CI.getContext()), + if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { + Value *Off = ConstantInt::get(AI.getArraySize()->getType(), Offset, true); Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 861cf92..6c00586 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1423,7 +1423,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, switch (II->getIntrinsicID()) { case Intrinsic::bswap: Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(0, II->getArgOperand(0)); ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap())); return &ICI; case Intrinsic::ctlz: @@ -1431,7 +1431,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // ctz(A) == bitwidth(a) -> A == 0 and likewise for != if (RHSV == RHS->getType()->getBitWidth()) { Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(0, II->getArgOperand(0)); ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0)); return &ICI; } @@ -1440,13 +1440,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // popcount(A) == 0 -> A == 0 and likewise for != if (RHS->isZero()) { Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(0, II->getArgOperand(0)); ICI.setOperand(1, RHS); return &ICI; } break; default: - break; + break; } } } @@ -1924,35 +1924,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } break; } - case Instruction::Call: - // If we have (malloc != null), and if the malloc has a single use, we - // can assume it is successful and remove the malloc. - if (isMalloc(LHSI) && LHSI->hasOneUse() && - isa<ConstantPointerNull>(RHSC)) { - // Need to explicitly erase malloc call here, instead of adding it to - // Worklist, because it won't get DCE'd from the Worklist since - // isInstructionTriviallyDead() returns false for function calls. - // It is OK to replace LHSI/MallocCall with Undef because the - // instruction that uses it will be erased via Worklist. - if (extractMallocCall(LHSI)) { - LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType())); - EraseInstFromFunction(*LHSI); - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(I.getContext()), - !I.isTrueWhenEqual())); - } - if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI)) - if (MallocCall->hasOneUse()) { - MallocCall->replaceAllUsesWith( - UndefValue::get(MallocCall->getType())); - EraseInstFromFunction(*MallocCall); - Worklist.Add(LHSI); // The malloc's bitcast use. - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(I.getContext()), - !I.isTrueWhenEqual())); - } - } - break; case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && TD && diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 0f2a24f..8933a0b 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -22,6 +23,18 @@ using namespace llvm; STATISTIC(NumDeadStore, "Number of dead stores eliminated"); Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { + // Ensure that the alloca array size argument has type intptr_t, so that + // any casting is exposed early. + if (TD) { + const Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); + if (AI.getArraySize()->getType() != IntPtrTy) { + Value *V = Builder->CreateIntCast(AI.getArraySize(), + IntPtrTy, false); + AI.setOperand(0, V); + return &AI; + } + } + // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { @@ -352,10 +365,11 @@ DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { return 0; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI)) + User *U = *UI; + if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U)) return DI; - if (isa<BitCastInst>(UI) && UI->hasOneUse()) { - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin())) + if (isa<BitCastInst>(U) && U->hasOneUse()) { + if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U->use_begin())) return DI; } } @@ -511,17 +525,20 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); + BasicBlock *P = *PI; BasicBlock *OtherBB = 0; - if (*PI != StoreBB) - OtherBB = *PI; - ++PI; - if (PI == pred_end(DestBB)) + + if (P != StoreBB) + OtherBB = P; + + if (++PI == pred_end(DestBB)) return false; - if (*PI != StoreBB) { + P = *PI; + if (P != StoreBB) { if (OtherBB) return false; - OtherBB = *PI; + OtherBB = P; } if (++PI != pred_end(DestBB)) return false; diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 65f0393..f7fc62f 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -230,8 +230,9 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { bool isAddressTaken = false; for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) { - if (isa<LoadInst>(UI)) continue; - if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + User *U = *UI; + if (isa<LoadInst>(U)) continue; + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { // If storing TO the alloca, then the address isn't taken. if (SI->getOperand(1) == AI) continue; } diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index c958cde..c44fe9d 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -329,6 +329,37 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, } } + // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1 + // and (X <s 0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1 + // FIXME: Type and constness constraints could be lifted, but we have to + // watch code size carefully. We should consider xor instead of + // sub/add when we decide to do that. + if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { + if (TrueVal->getType() == Ty) { + if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) { + ConstantInt *C1 = NULL, *C2 = NULL; + if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) { + C1 = dyn_cast<ConstantInt>(TrueVal); + C2 = dyn_cast<ConstantInt>(FalseVal); + } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) { + C1 = dyn_cast<ConstantInt>(FalseVal); + C2 = dyn_cast<ConstantInt>(TrueVal); + } + if (C1 && C2) { + // This shift results in either -1 or 0. + Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1); + + // Check if we can express the operation with a single or. + if (C2->isAllOnesValue()) + return ReplaceInstUsesWith(SI, Builder->CreateOr(AShr, C1)); + + Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue()); + return ReplaceInstUsesWith(SI, Builder->CreateAdd(And, C1)); + } + } + } + } + if (CmpLHS == TrueVal && CmpRHS == FalseVal) { // Transform (X == Y) ? X : Y -> Y if (Pred == ICmpInst::ICMP_EQ) diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 836bda3..e5ce8a6 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -404,7 +404,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){ bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop; Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0); - Value *Cmp = Builder->CreateICmpEQ(II->getOperand(1), RHS); + Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS); return new ZExtInst(Cmp, II->getType()); } } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index cd41844..adf7a76 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -732,10 +732,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // the right place. Instruction *NewVal; if (InputBit > ResultBit) - NewVal = BinaryOperator::CreateLShr(I->getOperand(1), + NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0), ConstantInt::get(I->getType(), InputBit-ResultBit)); else - NewVal = BinaryOperator::CreateShl(I->getOperand(1), + NewVal = BinaryOperator::CreateShl(II->getArgOperand(0), ConstantInt::get(I->getType(), ResultBit-InputBit)); NewVal->takeName(I); return InsertNewInstBefore(NewVal, *I); @@ -1052,12 +1052,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_sse2_mul_sd: case Intrinsic::x86_sse2_min_sd: case Intrinsic::x86_sse2_max_sd: - TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts, Depth+1); - if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; } - TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts, + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, UndefElts2, Depth+1); - if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; } + if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } // If only the low elt is demanded and this is a scalarizable intrinsic, // scalarize it now. @@ -1069,8 +1069,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_sse2_sub_sd: case Intrinsic::x86_sse2_mul_sd: // TODO: Lower MIN/MAX/ABS/etc - Value *LHS = II->getOperand(1); - Value *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0); + Value *RHS = II->getArgOperand(1); // Extract the element as scalars. LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index af9ec5c..af2958f 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -710,8 +710,55 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return 0; } -Instruction *InstCombiner::visitFree(Instruction &FI) { - Value *Op = FI.getOperand(1); + + +static bool IsOnlyNullComparedAndFreed(const Value &V) { + for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (isFreeCall(U)) + continue; + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U)) + if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) + continue; + return false; + } + return true; +} + +Instruction *InstCombiner::visitMalloc(Instruction &MI) { + // If we have a malloc call which is only used in any amount of comparisons + // to null and free calls, delete the calls and replace the comparisons with + // true or false as appropriate. + if (IsOnlyNullComparedAndFreed(MI)) { + for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end(); + UI != UE;) { + // We can assume that every remaining use is a free call or an icmp eq/ne + // to null, so the cast is safe. + Instruction *I = cast<Instruction>(*UI); + + // Early increment here, as we're about to get rid of the user. + ++UI; + + if (isFreeCall(I)) { + EraseInstFromFunction(*cast<CallInst>(I)); + continue; + } + // Again, the cast is safe. + ICmpInst *C = cast<ICmpInst>(I); + ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()), + C->isFalseWhenEqual())); + EraseInstFromFunction(*C); + } + return EraseInstFromFunction(MI); + } + return 0; +} + + + +Instruction *InstCombiner::visitFree(CallInst &FI) { + Value *Op = FI.getArgOperand(0); // free undef -> unreachable. if (isa<UndefValue>(Op)) { @@ -726,23 +773,6 @@ Instruction *InstCombiner::visitFree(Instruction &FI) { if (isa<ConstantPointerNull>(Op)) return EraseInstFromFunction(FI); - // If we have a malloc call whose only use is a free call, delete both. - if (isMalloc(Op)) { - if (CallInst* CI = extractMallocCallFromBitCast(Op)) { - if (Op->hasOneUse() && CI->hasOneUse()) { - EraseInstFromFunction(FI); - EraseInstFromFunction(*CI); - return EraseInstFromFunction(*cast<Instruction>(Op)); - } - } else { - // Op is a call to malloc - if (Op->hasOneUse()) { - EraseInstFromFunction(FI); - return EraseInstFromFunction(*cast<Instruction>(Op)); - } - } - } - return 0; } @@ -896,7 +926,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { // We're extracting from an intrinsic, see if we're the only user, which // allows us to simplify multiple result intrinsics to simpler things that - // just get one value.. + // just get one value. if (II->hasOneUse()) { // Check if we're grabbing the overflow bit or the result of a 'with // overflow' intrinsic. If it's the latter we can remove the intrinsic @@ -905,7 +935,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); II->replaceAllUsesWith(UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateAdd(LHS, RHS); @@ -914,7 +944,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); II->replaceAllUsesWith(UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateSub(LHS, RHS); @@ -923,7 +953,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); II->replaceAllUsesWith(UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateMul(LHS, RHS); diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 5650150..41e3a39 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -143,7 +143,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry); if (!std::binary_search(MST.begin(), MST.end(), edge)) { printEdgeCounter(edge,entry,i); - IncrementCounterInBlock(entry, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ Initializer[i++] = (Uncounted); @@ -166,7 +166,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0); if (!std::binary_search(MST.begin(), MST.end(), edge)) { printEdgeCounter(edge,BB,i); - IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ Initializer[i++] = (Uncounted); @@ -189,11 +189,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { if (TI->getNumSuccessors() == 1) { // Insert counter at the start of the block printEdgeCounter(edge,BB,i); - IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; } else { // Insert counter at the start of the block printEdgeCounter(edge,Succ,i); - IncrementCounterInBlock(Succ, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted; } Initializer[i++] = (Zero); } else { diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 8662a82..1a30e9b 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -61,8 +61,8 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, } Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements); - Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), - "newargc", InsertPos); + CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), + "newargc", InsertPos); // If argc or argv are not available in main, just pass null values in. Function::arg_iterator AI; @@ -73,10 +73,10 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, if (AI->getType() != ArgVTy) { Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy, false); - InitCall->setOperand(2, + InitCall->setArgOperand(1, CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall)); } else { - InitCall->setOperand(2, AI); + InitCall->setArgOperand(1, AI); } /* FALL THROUGH */ @@ -93,12 +93,12 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, } opcode = CastInst::getCastOpcode(AI, true, Type::getInt32Ty(Context), true); - InitCall->setOperand(1, + InitCall->setArgOperand(0, CastInst::Create(opcode, AI, Type::getInt32Ty(Context), "argc.cast", InitCall)); } else { AI->replaceAllUsesWith(InitCall); - InitCall->setOperand(1, AI); + InitCall->setArgOperand(0, AI); } case 0: break; diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp index 6135992..dcf14a6 100644 --- a/lib/Transforms/Scalar/ABCD.cpp +++ b/lib/Transforms/Scalar/ABCD.cpp @@ -230,7 +230,7 @@ class ABCD : public FunctionPass { DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin(); DenseMapIterator<Value*, MemoizedResultChart> end = map.end(); for (; begin != end; ++begin) { - begin->second.clear(); + begin->second.clear(); } map.clear(); } @@ -396,8 +396,8 @@ class ABCD : public FunctionPass { /// this case the method returns true, otherwise false. It also obtains the /// Instruction and ConstantInt from the BinaryOperator and returns it. bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1, - Instruction **I2, ConstantInt **C1, - ConstantInt **C2); + Instruction **I2, ConstantInt **C1, + ConstantInt **C2); /// This method creates a constraint between a Sigma and an Instruction. /// These constraints are created as soon as we find a comparator that uses a diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index 5a49841..2d19467 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -83,7 +83,7 @@ bool ADCE::runOnFunction(Function& F) { for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(), E = worklist.end(); I != E; ++I) { - NumRemoved++; + ++NumRemoved; (*I)->eraseFromParent(); } diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 93e9bfb..272066c 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -548,7 +548,8 @@ protected: CI->eraseFromParent(); } bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp))) + if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp + - CallInst::ArgOffset))) return SizeCI->isAllOnesValue(); return false; } @@ -559,7 +560,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { - bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1); + bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); const Type *ReturnTy = CI->getType(); Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); CI->replaceAllUsesWith(RetVal); @@ -759,8 +760,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, } // Compute the constraint code and ConstraintType to use. - TLI->ComputeConstraintToUse(OpInfo, SDValue(), - OpInfo.ConstraintType == TargetLowering::C_Memory); + TLI->ComputeConstraintToUse(OpInfo, SDValue()); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 09c01d3..e047e4f 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -56,7 +56,8 @@ namespace { } bool runOnBasicBlock(BasicBlock &BB); - bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep); + bool handleFreeWithNonTrivialDependency(const CallInst *F, + MemDepResult Dep); bool handleEndBlock(BasicBlock &BB); bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize, BasicBlock::iterator &BBI, @@ -73,7 +74,6 @@ namespace { AU.addRequired<AliasAnalysis>(); AU.addRequired<MemoryDependenceAnalysis>(); AU.addPreserved<DominatorTree>(); - AU.addPreserved<AliasAnalysis>(); AU.addPreserved<MemoryDependenceAnalysis>(); } @@ -123,14 +123,15 @@ static Value *getPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) - return MI->getOperand(1); - - switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + return MI->getArgOperand(0); + + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { default: assert(false && "Unexpected intrinsic!"); case Intrinsic::init_trampoline: - return I->getOperand(1); + return II->getArgOperand(0); case Intrinsic::lifetime_end: - return I->getOperand(2); + return II->getArgOperand(1); } } @@ -147,12 +148,13 @@ static unsigned getStoreSize(Instruction *I, const TargetData *TD) { if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { Len = MI->getLength(); } else { - switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { default: assert(false && "Unexpected intrinsic!"); case Intrinsic::init_trampoline: return -1u; case Intrinsic::lifetime_end: - Len = I->getOperand(1); + Len = II->getArgOperand(0); break; } } @@ -201,8 +203,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (InstDep.isNonLocal()) continue; // Handle frees whose dependencies are non-trivial. - if (isFreeCall(Inst)) { - MadeChange |= handleFreeWithNonTrivialDependency(Inst, InstDep); + if (const CallInst *F = isFreeCall(Inst)) { + MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep); continue; } @@ -218,7 +220,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { isElidable(DepStore)) { // Delete the store and now-dead instructions that feed it. DeleteDeadInstruction(DepStore); - NumFastStores++; + ++NumFastStores; MadeChange = true; // DeleteDeadInstruction can delete the current instruction in loop @@ -249,7 +251,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -270,7 +272,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -287,7 +289,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { /// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose /// dependency is a store to a field of that structure. -bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) { +bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F, + MemDepResult Dep) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); Instruction *Dependency = Dep.getInst(); @@ -297,13 +300,13 @@ bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) { Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject(); // Check for aliasing. - if (AA.alias(F->getOperand(1), 1, DepPointer, 1) != + if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) != AliasAnalysis::MustAlias) return false; // DCE instructions only used to calculate that store DeleteDeadInstruction(Dependency); - NumFastStores++; + ++NumFastStores; return true; } @@ -349,9 +352,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (deadPointers.count(pointerOperand)) { // DCE instructions only used to calculate that store. Instruction *Dead = BBI; - BBI++; + ++BBI; DeleteDeadInstruction(Dead, &deadPointers); - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -371,9 +374,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // However, if this load is unused and not volatile, we can go ahead and // remove it, and not have to worry about it making our pointer undead! if (L->use_empty() && !L->isVolatile()) { - BBI++; + ++BBI; DeleteDeadInstruction(L, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; continue; } @@ -391,9 +394,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Dead alloca's can be DCE'd when we reach them if (A->use_empty()) { - BBI++; + ++BBI; DeleteDeadInstruction(A, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; } @@ -426,9 +429,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { getPointerSize(*I)); if (A == AliasAnalysis::ModRef) - modRef++; + ++modRef; else - other++; + ++other; if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) dead.push_back(*I); @@ -442,9 +445,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } else if (isInstructionTriviallyDead(BBI)) { // For any non-memory-affecting non-terminators, DCE them as we reach them Instruction *Inst = BBI; - BBI++; + ++BBI; DeleteDeadInstruction(Inst, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; continue; } @@ -497,7 +500,7 @@ bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize, // Remove it! ++BBI; DeleteDeadInstruction(S, &deadPointers); - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index ca8ab49..88b6776 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/PHITransAddr.h" @@ -271,7 +272,8 @@ Expression ValueTable::create_expression(CallInst* C) { e.function = C->getCalledFunction(); e.opcode = Expression::CALL; - for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); + CallSite CS(C); + for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); @@ -447,14 +449,14 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { if (local_dep.isDef()) { CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); - if (local_cdep->getNumOperands() != C->getNumOperands()) { + if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -504,13 +506,13 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { return nextValueNumber++; } - if (cdep->getNumOperands() != C->getNumOperands()) { + if (cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -1500,7 +1502,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, MD->invalidateCachedPointerInfo(V); VN.erase(LI); toErase.push_back(LI); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1723,7 +1725,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, MD->invalidateCachedPointerInfo(V); VN.erase(LI); toErase.push_back(LI); - NumPRELoad++; + ++NumPRELoad; return true; } @@ -1784,7 +1786,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { MD->invalidateCachedPointerInfo(AvailVal); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1830,7 +1832,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { MD->invalidateCachedPointerInfo(StoredVal); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1860,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { MD->invalidateCachedPointerInfo(DepLI); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1871,7 +1873,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(UndefValue::get(L->getType())); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1882,7 +1884,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(UndefValue::get(L->getType())); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } } @@ -2014,7 +2016,7 @@ bool GVN::runOnFunction(Function& F) { BasicBlock *BB = FI; ++FI; bool removedBlock = MergeBlockIntoPredecessor(BB, this); - if (removedBlock) NumGVNBlocks++; + if (removedBlock) ++NumGVNBlocks; Changed |= removedBlock; } @@ -2126,27 +2128,28 @@ bool GVN::performPRE(Function &F) { for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; // We're not interested in PRE where the block is its // own predecessor, or in blocks with predecessors // that are not reachable. - if (*PI == CurrentBlock) { + if (P == CurrentBlock) { NumWithout = 2; break; - } else if (!localAvail.count(*PI)) { + } else if (!localAvail.count(P)) { NumWithout = 2; break; } DenseMap<uint32_t, Value*>::iterator predV = - localAvail[*PI]->table.find(ValNo); - if (predV == localAvail[*PI]->table.end()) { - PREPred = *PI; - NumWithout++; + localAvail[P]->table.find(ValNo); + if (predV == localAvail[P]->table.end()) { + PREPred = P; + ++NumWithout; } else if (predV->second == CurInst) { NumWithout = 2; } else { - predMap[*PI] = predV->second; - NumWith++; + predMap[P] = predV->second; + ++NumWith; } } @@ -2201,7 +2204,7 @@ bool GVN::performPRE(Function &F) { PREInstr->setName(CurInst->getName() + ".pre"); predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); - NumGVNPRE++; + ++NumGVNPRE; // Update the availability map to include the new instruction. localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); @@ -2211,8 +2214,10 @@ bool GVN::performPRE(Function &F) { CurInst->getName() + ".pre-phi", CurrentBlock->begin()); for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) - Phi->addIncoming(predMap[*PI], *PI); + PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; + Phi->addIncoming(predMap[P], P); + } VN.add(Phi, ValNo); localAvail[CurrentBlock]->table[ValNo] = Phi; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 36bea67..b5c9dd8 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -467,6 +467,17 @@ void IndVarSimplify::EliminateIVRemainders() { } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { + // If LoopSimplify form is not available, stay out of trouble. Some notes: + // - LSR currently only supports LoopSimplify-form loops. Indvars' + // canonicalization can be a pessimization without LSR to "clean up" + // afterwards. + // - We depend on having a preheader; in particular, + // Loop::getCanonicalInductionVariable only supports loops with preheaders, + // and we're in trouble if we can't find the induction variable even when + // we've manually inserted one. + if (!L->isLoopSimplifyForm()) + return false; + IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); @@ -760,8 +771,9 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { bool UsedInLoop = false; for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) { - BasicBlock *UseBB = cast<Instruction>(UI)->getParent(); - if (PHINode *P = dyn_cast<PHINode>(UI)) { + User *U = *UI; + BasicBlock *UseBB = cast<Instruction>(U)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(U)) { unsigned i = PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); UseBB = P->getIncomingBlock(i); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index df05b71..edce14c 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -18,6 +18,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -288,14 +289,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ // Perhaps getConstantOnEdge should be smart enough to do this? for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. - Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB); + Constant *PredCst = LVI->getConstantOnEdge(V, P, BB); if (PredCst == 0 || (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst))) continue; - Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI)); + Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P)); } return !Result.empty(); @@ -345,8 +347,19 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ } for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) { - Result.push_back(RHSVals[i]); - Result.back().first = InterestingVal; + // If we already inferred a value for this block on the LHS, don't + // re-add it. + bool HasValue = false; + for (unsigned r = 0, e = Result.size(); r != e; ++r) + if (Result[r].second == RHSVals[i].second) { + HasValue = true; + break; + } + + if (!HasValue) { + Result.push_back(RHSVals[i]); + Result.back().first = InterestingVal; + } } return !Result.empty(); } @@ -409,20 +422,21 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ (!isa<Instruction>(Cmp->getOperand(0)) || cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) { Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); - + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), - RHSCst, *PI, BB); + RHSCst, P, BB); if (Res == LazyValueInfo::Unknown) continue; Constant *ResC = ConstantInt::get(Cmp->getType(), Res); - Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI)); + Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P)); } - + return !Result.empty(); } } @@ -538,18 +552,22 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition. pred_iterator PI = pred_begin(BB), E = pred_end(BB); if (isa<BranchInst>(BB->getTerminator())) { - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator())) if (PBI->isConditional() && PBI->getCondition() == Condition && - ProcessBranchOnDuplicateCond(*PI, BB)) + ProcessBranchOnDuplicateCond(P, BB)) return true; + } } else { assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator"); - for (; PI != E; ++PI) - if (SwitchInst *PSI = dyn_cast<SwitchInst>((*PI)->getTerminator())) + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator())) if (PSI->getCondition() == Condition && - ProcessSwitchOnDuplicateCond(*PI, BB)) + ProcessSwitchOnDuplicateCond(P, BB)) return true; + } } } @@ -569,19 +587,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // If we have a comparison, loop over the predecessors to see if there is // a condition with a lexically identical value. pred_iterator PI = pred_begin(BB), E = pred_end(BB); - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) - if (PBI->isConditional() && *PI != BB) { + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator())) + if (PBI->isConditional() && P != BB) { if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) { if (CI->getOperand(0) == CondCmp->getOperand(0) && CI->getOperand(1) == CondCmp->getOperand(1) && CI->getPredicate() == CondCmp->getPredicate()) { // TODO: Could handle things like (x != 4) --> (x == 17) - if (ProcessBranchOnDuplicateCond(*PI, BB)) + if (ProcessBranchOnDuplicateCond(P, BB)) return true; } } } + } } } @@ -869,9 +889,15 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Add all the unavailable predecessors to the PredsToSplit list. for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); - PI != PE; ++PI) - if (!AvailablePredSet.count(*PI)) - PredsToSplit.push_back(*PI); + PI != PE; ++PI) { + BasicBlock *P = *PI; + // If the predecessor is an indirect goto, we can't split the edge. + if (isa<IndirectBrInst>(P->getTerminator())) + return false; + + if (!AvailablePredSet.count(P)) + PredsToSplit.push_back(P); + } // Split them out to their own block. UnavailablePred = @@ -903,11 +929,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // have multiple entries here. for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { + BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), - std::make_pair(*PI, (Value*)0)); + std::make_pair(P, (Value*)0)); - assert(I != AvailablePreds.end() && I->first == *PI && + assert(I != AvailablePreds.end() && I->first == P && "Didn't find entry for predecessor!"); PN->addIncoming(I->second, I->first); diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 48817ab..e4894e9 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -83,7 +83,7 @@ bool LoopDeletion::IsLoopDead(Loop* L, if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; - BI++; + ++BI; } // Make sure that no instructions in the block have potential side-effects. @@ -176,7 +176,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { BasicBlock::iterator BI = exitBlock->begin(); while (PHINode* P = dyn_cast<PHINode>(BI)) { P->replaceUsesOfWith(exitingBlock, preheader); - BI++; + ++BI; } // Update the dominator tree and remove the instructions and blocks that will @@ -226,7 +226,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { LPM.deleteLoopFromQueue(L); Changed = true; - NumDeleted++; + ++NumDeleted; return Changed; } diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 101ff5b..31058e5 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -649,7 +649,7 @@ bool LoopIndexSplit::updateLoopIterationSpace() { } } } - NumRestrictBounds++; + ++NumRestrictBounds; return true; } @@ -958,11 +958,11 @@ bool LoopIndexSplit::splitLoop() { continue; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { + BI != BE; ++BI) { Instruction *Inst = BI; if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst) - && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) + && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) return false; } } @@ -1016,13 +1016,13 @@ bool LoopIndexSplit::splitLoop() { BSV = getMax(BSV, IVStartValue, Sign, PHTerm); // [*] Clone Loop - DenseMap<const Value *, Value *> ValueMap; - Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this); + ValueMap<const Value *, Value *> VMap; + Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this); Loop *ALoop = L; // [*] ALoop's exiting edge enters BLoop's header. // ALoop's original exit block becomes BLoop's exit block. - PHINode *B_IndVar = cast<PHINode>(ValueMap[IndVar]); + PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]); BasicBlock *A_ExitingBlock = ExitCondition->getParent(); BranchInst *A_ExitInsn = dyn_cast<BranchInst>(A_ExitingBlock->getTerminator()); @@ -1047,7 +1047,7 @@ bool LoopIndexSplit::splitLoop() { for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), BE = ALoop->getHeader()->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast<PHINode>(BI)) { - PHINode *PNClone = cast<PHINode>(ValueMap[PN]); + PHINode *PNClone = cast<PHINode>(VMap[PN]); InverseMap[PNClone] = PN; } else break; @@ -1085,11 +1085,11 @@ bool LoopIndexSplit::splitLoop() { // block. Remove incoming PHINode values from ALoop's exiting block. // Add new incoming values from BLoop's incoming exiting value. // Update BLoop exit block's dominator info.. - BasicBlock *B_ExitingBlock = cast<BasicBlock>(ValueMap[A_ExitingBlock]); + BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]); for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast<PHINode>(BI)) { - PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)], + PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)], B_ExitingBlock); PN->removeIncomingValue(A_ExitingBlock); } else @@ -1131,7 +1131,7 @@ bool LoopIndexSplit::splitLoop() { removeBlocks(A_InactiveBranch, L, A_ActiveBranch); //[*] Eliminate split condition's inactive branch in from BLoop. - BasicBlock *B_SplitCondBlock = cast<BasicBlock>(ValueMap[A_SplitCondBlock]); + BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]); BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator()); BasicBlock *B_InactiveBranch = NULL; BasicBlock *B_ActiveBranch = NULL; @@ -1146,9 +1146,9 @@ bool LoopIndexSplit::splitLoop() { //[*] Move exit condition into split condition block to avoid // executing dead loop iteration. - ICmpInst *B_ExitCondition = cast<ICmpInst>(ValueMap[ExitCondition]); - Instruction *B_IndVarIncrement = cast<Instruction>(ValueMap[IVIncrement]); - ICmpInst *B_SplitCondition = cast<ICmpInst>(ValueMap[SplitCondition]); + ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]); + Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]); + ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]); moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition, cast<ICmpInst>(SplitCondition), IndVar, IVIncrement, @@ -1159,7 +1159,7 @@ bool LoopIndexSplit::splitLoop() { B_SplitCondition, B_IndVar, B_IndVarIncrement, BLoop, EVOpNum); - NumIndexSplit++; + ++NumIndexSplit; return true; } diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 5004483..16c4a15 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -147,7 +147,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { continue; // PHI nodes don't count. if (isa<DbgInfoIntrinsic>(OI)) continue; // Debug intrinsics don't count as size. - Size++; + ++Size; } if (Size > MAX_HEADER_SIZE) @@ -263,7 +263,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { preserveCanonicalLoopForm(LPM); - NumRotated++; + ++NumRotated; return true; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 86ea3eb..a250a88 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -392,12 +392,13 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy)); } -/// isMulSExtable - Return true if the given add can be sign-extended +/// isMulSExtable - Return true if the given mul can be sign-extended /// without changing its value. -static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) { +static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { const Type *WideTy = - IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); - return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy)); + IntegerType::get(SE.getContext(), + SE.getTypeSizeInBits(M->getType()) * M->getNumOperands()); + return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy)); } /// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined @@ -413,20 +414,28 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (LHS == RHS) return SE.getConstant(LHS->getType(), 1); - // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some - // folding. - if (RHS->isAllOnesValue()) - return SE.getMulExpr(LHS, RHS); + // Handle a few RHS special cases. + const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS); + if (RC) { + const APInt &RA = RC->getValue()->getValue(); + // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do + // some folding. + if (RA.isAllOnesValue()) + return SE.getMulExpr(LHS, RC); + // Handle x /s 1 as x. + if (RA == 1) + return LHS; + } // Check for a division of a constant by a constant. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) { - const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS); if (!RC) return 0; - if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0) + const APInt &LA = C->getValue()->getValue(); + const APInt &RA = RC->getValue()->getValue(); + if (LA.srem(RA) != 0) return 0; - return SE.getConstant(C->getValue()->getValue() - .sdiv(RC->getValue()->getValue())); + return SE.getConstant(LA.sdiv(RA)); } // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. @@ -440,6 +449,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (!Step) return 0; return SE.getAddRecExpr(Start, Step, AR->getLoop()); } + return 0; } // Distribute the sdiv over add operands, if the add doesn't overflow. @@ -455,10 +465,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, } return SE.getAddExpr(Ops); } + return 0; } // Check for a multiply operand that we can pull RHS out of. - if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) { if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) { SmallVector<const SCEV *, 4> Ops; bool Found = false; @@ -475,6 +486,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, } return Found ? SE.getMulExpr(Ops) : 0; } + return 0; + } // Otherwise we don't know. return 0; @@ -546,7 +559,7 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: case Intrinsic::x86_sse2_storel_dq: - if (II->getOperand(1) == OperandVal) + if (II->getArgOperand(0) == OperandVal) isAddress = true; break; } @@ -568,7 +581,7 @@ static const Type *getAccessType(const Instruction *Inst) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: case Intrinsic::x86_sse2_storel_dq: - AccessTy = II->getOperand(1)->getType(); + AccessTy = II->getArgOperand(0)->getType(); break; } } @@ -976,6 +989,8 @@ public: void dump() const; }; +} + /// HasFormula - Test whether this use as a formula which has the same /// registers as the given formula. bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { @@ -1203,6 +1218,32 @@ static bool isAlwaysFoldable(const SCEV *S, return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI); } +namespace { + +/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding +/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind. +struct UseMapDenseMapInfo { + static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() { + return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic); + } + + static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() { + return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic); + } + + static unsigned + getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) { + unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first); + Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second)); + return Result; + } + + static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS, + const std::pair<const SCEV *, LSRUse::KindType> &RHS) { + return LHS == RHS; + } +}; + /// FormulaSorter - This class implements an ordering for formulae which sorts /// the by their standalone cost. class FormulaSorter { @@ -1275,7 +1316,9 @@ class LSRInstance { } // Support for sharing of LSRUses between LSRFixups. - typedef DenseMap<const SCEV *, size_t> UseMapTy; + typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>, + size_t, + UseMapDenseMapInfo> UseMapTy; UseMapTy UseMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, @@ -1613,8 +1656,11 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { NewRHS = Sel->getOperand(1); else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); + else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS)) + NewRHS = SU->getValue(); else - llvm_unreachable("Max doesn't match expected pattern!"); + // Max doesn't match expected pattern. + return Cond; // Determine the new comparison opcode. It may be signed or unsigned, // and the original comparison may be either equality or inequality. @@ -1805,6 +1851,8 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, NewMaxOffset = NewOffset; } // Check for a mismatched access type, and fall back conservatively as needed. + // TODO: Be less conservative when the type is similar and can use the same + // addressing modes. if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) NewAccessTy = Type::getVoidTy(AccessTy->getContext()); @@ -1833,7 +1881,7 @@ LSRInstance::getUse(const SCEV *&Expr, } std::pair<UseMapTy::iterator, bool> P = - UseMap.insert(std::make_pair(Expr, 0)); + UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0)); if (!P.second) { // A use already existed with this base. size_t LUIdx = P.first->second; @@ -1919,7 +1967,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() { Strides.insert(AR->getStepRecurrence(SE)); Worklist.push_back(AR->getStart()); } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end()); + Worklist.append(Add->op_begin(), Add->op_end()); } } while (!Worklist.empty()); } @@ -2086,7 +2134,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { const SCEV *S = Worklist.pop_back_val(); if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) - Worklist.insert(Worklist.end(), N->op_begin(), N->op_end()); + Worklist.append(N->op_begin(), N->op_end()); else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) Worklist.push_back(C->getOperand()); else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { @@ -2095,8 +2143,12 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { if (!Inserted.insert(U)) continue; const Value *V = U->getValue(); - if (const Instruction *Inst = dyn_cast<Instruction>(V)) + if (const Instruction *Inst = dyn_cast<Instruction>(V)) { + // Look for instructions defined outside the loop. if (L->contains(Inst)) continue; + } else if (isa<UndefValue>(V)) + // Undef doesn't have a live range, so it doesn't matter. + continue; for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { const Instruction *UserInst = dyn_cast<Instruction>(*UI); @@ -2155,20 +2207,23 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { /// separate registers. If C is non-null, multiply each subexpression by C. static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl<const SCEV *> &Ops, + SmallVectorImpl<const SCEV *> &UninterestingOps, + const Loop *L, ScalarEvolution &SE) { if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { // Break out add operands. for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) - CollectSubexprs(*I, C, Ops, SE); + CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE); return; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. if (!AR->getStart()->isZero()) { CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), C, Ops, SE); - CollectSubexprs(AR->getStart(), C, Ops, SE); + AR->getLoop()), + C, Ops, UninterestingOps, L, SE); + CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE); return; } } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { @@ -2178,13 +2233,17 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, dyn_cast<SCEVConstant>(Mul->getOperand(0))) { CollectSubexprs(Mul->getOperand(1), C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0, - Ops, SE); + Ops, UninterestingOps, L, SE); return; } } - // Otherwise use the value itself. - Ops.push_back(C ? SE.getMulExpr(C, S) : S); + // Otherwise use the value itself. Loop-variant "unknown" values are + // uninteresting; we won't be able to do anything meaningful with them. + if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L)) + UninterestingOps.push_back(S); + else + Ops.push_back(C ? SE.getMulExpr(C, S) : S); } /// GenerateReassociations - Split out subexpressions from adds and the bases of @@ -2198,8 +2257,15 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { const SCEV *BaseReg = Base.BaseRegs[i]; - SmallVector<const SCEV *, 8> AddOps; - CollectSubexprs(BaseReg, 0, AddOps, SE); + SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps; + CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE); + + // Add any uninteresting values as one register, as we won't be able to + // form any interesting reassociation opportunities with them. They'll + // just have to be added inside the loop no matter what we do. + if (!UninterestingAddOps.empty()) + AddOps.push_back(SE.getAddExpr(UninterestingAddOps)); + if (AddOps.size() == 1) continue; for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), @@ -2212,11 +2278,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, continue; // Collect all operands except *J. - SmallVector<const SCEV *, 8> InnerAddOps; - for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(), - KE = AddOps.end(); K != KE; ++K) - if (K != J) - InnerAddOps.push_back(*K); + SmallVector<const SCEV *, 8> InnerAddOps + ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); + InnerAddOps.append + (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end()); // Don't leave just a constant behind in a register if the constant could // be folded into an immediate field. @@ -2350,13 +2415,12 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, for (SmallSetVector<int64_t, 8>::const_iterator I = Factors.begin(), E = Factors.end(); I != E; ++I) { int64_t Factor = *I; - Formula F = Base; // Check that the multiplication doesn't overflow. - if (F.AM.BaseOffs == INT64_MIN && Factor == -1) + if (Base.AM.BaseOffs == INT64_MIN && Factor == -1) continue; - F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; - if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs) + int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; + if (NewBaseOffs / Factor != Base.AM.BaseOffs) continue; // Check that multiplying with the use offset doesn't overflow. @@ -2367,6 +2431,9 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, if (Offset / Factor != LU.MinOffset) continue; + Formula F = Base; + F.AM.BaseOffs = NewBaseOffs; + // Check that this scale is legal. if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI)) continue; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index ae7bf40..0c900ff 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -445,7 +445,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // This is a very ad-hoc heuristic. if (Metrics.NumInsts > Threshold || Metrics.NumBlocks * 5 > Threshold || - Metrics.NeverInline) { + Metrics.containsIndirectBr || Metrics.isRecursive) { DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << ", cost too high: " << currentLoop->getBlocks().size() << "\n"); @@ -457,21 +457,21 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { } // RemapInstruction - Convert the instruction operands from referencing the -// current values into those specified by ValueMap. +// current values into those specified by VMap. // static inline void RemapInstruction(Instruction *I, - DenseMap<const Value *, Value*> &ValueMap) { + ValueMap<const Value *, Value*> &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); - DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op); - if (It != ValueMap.end()) Op = It->second; + ValueMap<const Value *, Value*>::iterator It = VMap.find(Op); + if (It != VMap.end()) Op = It->second; I->setOperand(op, Op); } } /// CloneLoop - Recursively clone the specified loop and all of its children, /// mapping the blocks with the specified map. -static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM, +static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM, LoopInfo *LI, LPPassManager *LPM) { Loop *New = new Loop(); LPM->insertLoop(New, PL); @@ -615,11 +615,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // the loop preheader and exit blocks), keeping track of the mapping between // the instructions and blocks. NewBlocks.reserve(LoopBlocks.size()); - DenseMap<const Value*, Value*> ValueMap; + ValueMap<const Value*, Value*> VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { - BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F); + BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); NewBlocks.push_back(NewBB); - ValueMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. + VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); } @@ -629,7 +629,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, NewBlocks[0], F->end()); // Now we create the new Loop object for the versioned loop. - Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM); + Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM); Loop *ParentLoop = L->getParentLoop(); if (ParentLoop) { // Make sure to add the cloned preheader and exit blocks to the parent loop @@ -638,7 +638,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, } for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - BasicBlock *NewExit = cast<BasicBlock>(ValueMap[ExitBlocks[i]]); + BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]); // The new exit block should be in the same loop as the old one. if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); @@ -653,8 +653,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) { PN = cast<PHINode>(I); Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); - DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V); - if (It != ValueMap.end()) V = It->second; + ValueMap<const Value *, Value*>::iterator It = VMap.find(V); + if (It != VMap.end()) V = It->second; PN->addIncoming(V, NewExit); } } @@ -663,7 +663,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) - RemapInstruction(I, ValueMap); + RemapInstruction(I, VMap); // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 3611b8e..0e566c5 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -632,7 +632,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Remove the memcpy MD.removeInstruction(cpy); cpy->eraseFromParent(); - NumMemCpyInstr++; + ++NumMemCpyInstr; return true; } @@ -710,7 +710,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { if (MD.getDependency(C) == dep) { MD.removeInstruction(M); M->eraseFromParent(); - NumMemCpyInstr++; + ++NumMemCpyInstr; return true; } diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 5aca9cdc..98452f5 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -407,13 +407,14 @@ static Value *NegateValue(Value *V, Instruction *BI) { // Okay, we need to materialize a negated version of V with an instruction. // Scan the use lists of V to see if we have one already. for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - if (!BinaryOperator::isNeg(*UI)) continue; + User *U = *UI; + if (!BinaryOperator::isNeg(U)) continue; // We found one! Now we have to make sure that the definition dominates // this use. We do this by moving it to the entry block (if it is a // non-instruction value) or right after the definition. These negates will // be zapped by reassociate later, so we don't need much finesse here. - BinaryOperator *TheNeg = cast<BinaryOperator>(*UI); + BinaryOperator *TheNeg = cast<BinaryOperator>(U); // Verify that the negate is in this function, V might be a constant expr. if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 5ca9ce3..dd445f6 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -926,7 +926,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, DeleteDeadInstructions(); AI->eraseFromParent(); - NumReplaced++; + ++NumReplaced; } /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, @@ -965,11 +965,11 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, isSafeGEP(GEPI, AI, GEPOffset, Info); if (!Info.isUnsafe) isSafeForScalarRepl(GEPI, AI, GEPOffset, Info); - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) { + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); if (Length) isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, - UI.getOperandNo() == 1, Info); + UI.getOperandNo() == CallInst::ArgOffset, Info); else MarkUnsafe(Info); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { @@ -1272,6 +1272,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { + unsigned AddrSpace = + cast<PointerType>(OtherPtr->getType())->getAddressSpace(); // Remove bitcasts and all-zero GEPs from OtherPtr. This is an // optimization, but it's also required to detect the corner case where @@ -1279,20 +1281,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // OtherPtr may be a bitcast or GEP that currently being rewritten. (This // function is only called for mem intrinsics that access the whole // aggregate, so non-zero GEPs are not an issue here.) - while (1) { - if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) { - OtherPtr = BC->getOperand(0); - continue; - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) { - // All zero GEPs are effectively bitcasts. - if (GEP->hasAllZeroIndices()) { - OtherPtr = GEP->getOperand(0); - continue; - } - } - break; - } + OtherPtr = OtherPtr->stripPointerCasts(); + // Copying the alloca to itself is a no-op: just delete it. if (OtherPtr == AI || OtherPtr == NewElts[0]) { // This code will run twice for a no-op memcpy -- once for each operand. @@ -1304,15 +1294,13 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, return; } - if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr)) - if (BCE->getOpcode() == Instruction::BitCast) - OtherPtr = BCE->getOperand(0); - // If the pointer is not the right type, insert a bitcast to the right // type. - if (OtherPtr->getType() != AI->getType()) - OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), - MI); + const Type *NewTy = + PointerType::get(AI->getType()->getElementType(), AddrSpace); + + if (OtherPtr->getType() != NewTy) + OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); } // Process each element of the aggregate. @@ -1373,7 +1361,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If the stored element is zero (common case), just store a null // constant. Constant *StoreVal; - if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) { if (CI->isZero()) { StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { @@ -1436,7 +1424,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, Value *Ops[] = { SROADest ? EltPtr : OtherElt, // Dest ptr SROADest ? OtherElt : EltPtr, // Src ptr - ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size // Align ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign), MI->getVolatileCst() @@ -1451,8 +1439,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } else { assert(isa<MemSetInst>(MI)); Value *Ops[] = { - EltPtr, MI->getOperand(2), // Dest, Value, - ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + EltPtr, MI->getArgOperand(1), // Dest, Value, + ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size Zero, // Align ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile }; @@ -1655,7 +1643,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); } - ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); + // Don't create an 'or x, 0' on the first iteration. + if (!isa<Constant>(ResultVal) || + !cast<Constant>(ResultVal)->isNullValue()) + ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); + else + ResultVal = SrcField; } // Handle tail padding by truncating the result @@ -1794,7 +1787,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (isOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != 1) return false; + if (UI.getOperandNo() != CallInst::ArgOffset) return false; // If the source of the memcpy/move is not a constant global, reject it. if (!PointsToConstantGlobal(MI->getSource())) diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 9744100..49d93a2 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -137,6 +137,9 @@ static bool MarkAliveBlocks(BasicBlock *BB, // they should be changed to unreachable by passes that can't modify the // CFG. if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + // Don't touch volatile stores. + if (SI->isVolatile()) continue; + Value *Ptr = SI->getOperand(1); if (isa<UndefValue>(Ptr) || diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 7414be7..b1c6191 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -66,6 +66,11 @@ public: this->TD = TD; if (CI->getCalledFunction()) Context = &CI->getCalledFunction()->getContext(); + + // We never change the calling convention. + if (CI->getCallingConv() != llvm::CallingConv::C) + return NULL; + return CallOptimizer(CI->getCalledFunction(), CI, B); } }; @@ -92,6 +97,20 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { return true; } +/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality +/// comparisons with With. +static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + //===----------------------------------------------------------------------===// // String and Memory LibCall Optimizations //===----------------------------------------------------------------------===// @@ -110,8 +129,8 @@ struct StrCatOpt : public LibCallOptimization { return 0; // Extract some information from the instruction - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); @@ -162,12 +181,12 @@ struct StrNCatOpt : public StrCatOpt { return 0; // Extract some information from the instruction - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); uint64_t Len; // We don't do anything if length is not constant - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3))) + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Len = LengthArg->getZExtValue(); else return 0; @@ -207,11 +226,11 @@ struct StrChrOpt : public LibCallOptimization { FT->getParamType(0) != FT->getReturnType()) return 0; - Value *SrcStr = CI->getOperand(1); + Value *SrcStr = CI->getArgOperand(0); // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. - ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2)); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); if (CharC == 0) { // These optimizations require TargetData. if (!TD) return 0; @@ -220,7 +239,7 @@ struct StrChrOpt : public LibCallOptimization { if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. return 0; - return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. ConstantInt::get(TD->getIntPtrType(*Context), Len), B, TD); } @@ -260,12 +279,12 @@ struct StrCmpOpt : public LibCallOptimization { // Verify the "strcmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strcmp(x,x) -> 0 return ConstantInt::get(CI->getType(), 0); @@ -308,19 +327,19 @@ struct StrNCmpOpt : public LibCallOptimization { // Verify the "strncmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !FT->getParamType(2)->isIntegerTy()) return 0; - Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 return ConstantInt::get(CI->getType(), 0); // Get the length argument if it is constant. uint64_t Length; - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3))) + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Length = LengthArg->getZExtValue(); else return 0; @@ -328,6 +347,9 @@ struct StrNCmpOpt : public LibCallOptimization { if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); + if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD); + std::string Str1, Str2; bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); @@ -365,7 +387,7 @@ struct StrCpyOpt : public LibCallOptimization { FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; @@ -381,7 +403,7 @@ struct StrCpyOpt : public LibCallOptimization { if (OptChkCall) EmitMemCpyChk(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), - CI->getOperand(3), B, TD); + CI->getArgOperand(2), B, TD); else EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), @@ -402,9 +424,9 @@ struct StrNCpyOpt : public LibCallOptimization { !FT->getParamType(2)->isIntegerTy()) return 0; - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); - Value *LenOp = CI->getOperand(3); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *LenOp = CI->getArgOperand(2); // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); @@ -452,7 +474,7 @@ struct StrLenOpt : public LibCallOptimization { !FT->getReturnType()->isIntegerTy()) return 0; - Value *Src = CI->getOperand(1); + Value *Src = CI->getArgOperand(0); // Constant folding: strlen("xyz") -> 3 if (uint64_t Len = GetStringLength(Src)) @@ -477,7 +499,7 @@ struct StrToOpt : public LibCallOptimization { !FT->getParamType(1)->isPointerTy()) return 0; - Value *EndPtr = CI->getOperand(2); + Value *EndPtr = CI->getArgOperand(1); if (isa<ConstantPointerNull>(EndPtr)) { CI->setOnlyReadsMemory(); CI->addAttribute(1, Attribute::NoCapture); @@ -500,17 +522,34 @@ struct StrStrOpt : public LibCallOptimization { return 0; // fold strstr(x, x) -> x. - if (CI->getOperand(1) == CI->getOperand(2)) - return B.CreateBitCast(CI->getOperand(1), CI->getType()); + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD); + Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, TD); + for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); + UI != UE; ) { + ICmpInst *Old = cast<ICmpInst>(UI++); + Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), + "cmp"); + Old->replaceAllUsesWith(Cmp); + Old->eraseFromParent(); + } + return CI; + } // See if either input string is a constant string. std::string SearchStr, ToFindStr; - bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr); - bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr); + bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr); // fold strstr(x, "") -> x. if (HasStr2 && ToFindStr.empty()) - return B.CreateBitCast(CI->getOperand(1), CI->getType()); + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); // If both strings are known, constant fold it. if (HasStr1 && HasStr2) { @@ -520,14 +559,14 @@ struct StrStrOpt : public LibCallOptimization { return Constant::getNullValue(CI->getType()); // strstr("abcd", "bc") -> gep((char*)"abcd", 1) - Value *Result = CastToCStr(CI->getOperand(1), B); + Value *Result = CastToCStr(CI->getArgOperand(0), B); Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); return B.CreateBitCast(Result, CI->getType()); } // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD), + return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD), CI->getType()); return 0; } @@ -545,13 +584,13 @@ struct MemCmpOpt : public LibCallOptimization { !FT->getReturnType()->isIntegerTy(32)) return 0; - Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); if (LHS == RHS) // memcmp(s,s,x) -> 0 return Constant::getNullValue(CI->getType()); // Make sure we have a constant length. - ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3)); + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); if (!LenC) return 0; uint64_t Len = LenC->getZExtValue(); @@ -598,9 +637,9 @@ struct MemCpyOpt : public LibCallOptimization { return 0; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, false, B, TD); - return CI->getOperand(1); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); + return CI->getArgOperand(0); } }; @@ -620,9 +659,9 @@ struct MemMoveOpt : public LibCallOptimization { return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - EmitMemMove(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, false, B, TD); - return CI->getOperand(1); + EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); + return CI->getArgOperand(0); } }; @@ -642,10 +681,10 @@ struct MemSetOpt : public LibCallOptimization { return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), + Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD); - return CI->getOperand(1); + EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); + return CI->getArgOperand(0); } }; @@ -666,7 +705,7 @@ struct PowOpt : public LibCallOptimization { !FT->getParamType(0)->isFloatingPointTy()) return 0; - Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2); + Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 return Op1C; @@ -720,18 +759,18 @@ struct Exp2Opt : public LibCallOptimization { !FT->getParamType(0)->isFloatingPointTy()) return 0; - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 Value *LdExpArg = 0; if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) LdExpArg = B.CreateSExt(OpC->getOperand(0), - Type::getInt32Ty(*Context), "tmp"); + Type::getInt32Ty(*Context), "tmp"); } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) LdExpArg = B.CreateZExt(OpC->getOperand(0), - Type::getInt32Ty(*Context), "tmp"); + Type::getInt32Ty(*Context), "tmp"); } if (LdExpArg) { @@ -772,7 +811,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { return 0; // If this is something like 'floor((double)floatval)', convert to floorf. - FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1)); + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) return 0; @@ -797,11 +836,11 @@ struct FFSOpt : public LibCallOptimization { // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 1 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || !FT->getParamType(0)->isIntegerTy()) return 0; - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); // Constant fold. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { @@ -821,7 +860,7 @@ struct FFSOpt : public LibCallOptimization { Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); return B.CreateSelect(Cond, V, - ConstantInt::get(Type::getInt32Ty(*Context), 0)); + ConstantInt::get(Type::getInt32Ty(*Context), 0)); } }; @@ -837,7 +876,7 @@ struct IsDigitOpt : public LibCallOptimization { return 0; // isdigit(c) -> (c-'0') <u 10 - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'), "isdigittmp"); Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10), @@ -858,7 +897,7 @@ struct IsAsciiOpt : public LibCallOptimization { return 0; // isascii(c) -> c <u 128 - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128), "isascii"); return B.CreateZExt(Op, CI->getType()); @@ -877,7 +916,7 @@ struct AbsOpt : public LibCallOptimization { return 0; // abs(x) -> x >s -1 ? x : -x - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos"); @@ -899,7 +938,7 @@ struct ToAsciiOpt : public LibCallOptimization { return 0; // isascii(c) -> c & 0x7f - return B.CreateAnd(CI->getOperand(1), + return B.CreateAnd(CI->getArgOperand(0), ConstantInt::get(CI->getType(),0x7F)); } }; @@ -922,7 +961,7 @@ struct PrintFOpt : public LibCallOptimization { // Check for a fixed format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(1), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr)) return 0; // Empty format string -> noop. @@ -954,20 +993,20 @@ struct PrintFOpt : public LibCallOptimization { } // Optimize specific format strings. - // printf("%c", chr) --> putchar(*(i8*)dst) - if (FormatStr == "%c" && CI->getNumOperands() > 2 && - CI->getOperand(2)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getOperand(2), B, TD); + // printf("%c", chr) --> putchar(chr) + if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isIntegerTy()) { + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD); if (CI->use_empty()) return CI; return B.CreateIntCast(Res, CI->getType(), true); } // printf("%s\n", str) --> puts(str) - if (FormatStr == "%s\n" && CI->getNumOperands() > 2 && - CI->getOperand(2)->getType()->isPointerTy() && + if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isPointerTy() && CI->use_empty()) { - EmitPutS(CI->getOperand(2), B, TD); + EmitPutS(CI->getArgOperand(1), B, TD); return CI; } return 0; @@ -988,11 +1027,11 @@ struct SPrintFOpt : public LibCallOptimization { // Check for a fixed format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) return 0; // If we just have a format string (nothing else crazy) transform it. - if (CI->getNumOperands() == 3) { + if (CI->getNumArgOperands() == 2) { // Make sure there's no % in the constant array. We could try to handle // %% -> % in the future if we cared. for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) @@ -1003,7 +1042,7 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte. ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1), 1, false, B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); @@ -1011,16 +1050,17 @@ struct SPrintFOpt : public LibCallOptimization { // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) return 0; // Decode the second character of the format string. if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; - Value *V = B.CreateTrunc(CI->getOperand(3), + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *V = B.CreateTrunc(CI->getArgOperand(2), Type::getInt8Ty(*Context), "char"); - Value *Ptr = CastToCStr(CI->getOperand(1), B); + Value *Ptr = CastToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), "nul"); @@ -1034,13 +1074,13 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) - if (!CI->getOperand(3)->getType()->isPointerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; - Value *Len = EmitStrLen(CI->getOperand(3), B, TD); + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD); Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -1064,8 +1104,8 @@ struct FWriteOpt : public LibCallOptimization { return 0; // Get the element size and count. - ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2)); - ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3)); + ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); if (!SizeC || !CountC) return 0; uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); @@ -1075,8 +1115,8 @@ struct FWriteOpt : public LibCallOptimization { // If this is writing one byte, turn it into fputc. if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char"); - EmitFPutC(Char, CI->getOperand(4), B, TD); + Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); + EmitFPutC(Char, CI->getArgOperand(3), B, TD); return ConstantInt::get(CI->getType(), 1); } @@ -1100,11 +1140,11 @@ struct FPutsOpt : public LibCallOptimization { return 0; // fputs(s,F) --> fwrite(s,1,strlen(s),F) - uint64_t Len = GetStringLength(CI->getOperand(1)); + uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return 0; - EmitFWrite(CI->getOperand(1), + EmitFWrite(CI->getArgOperand(0), ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getOperand(2), B, TD); + CI->getArgOperand(1), B, TD); return CI; // Known to have no uses (see above). } }; @@ -1123,11 +1163,11 @@ struct FPrintFOpt : public LibCallOptimization { // All the optimizations depend on the format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) return 0; // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) - if (CI->getNumOperands() == 3) { + if (CI->getNumArgOperands() == 2) { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') // Could handle %% -> % if we cared. return 0; // We found a format specifier. @@ -1135,31 +1175,32 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitFWrite(CI->getOperand(2), + EmitFWrite(CI->getArgOperand(1), ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()), - CI->getOperand(1), B, TD); + CI->getArgOperand(0), B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) return 0; // Decode the second character of the format string. if (FormatStr[1] == 'c') { - // fprintf(F, "%c", chr) --> *(i8*)dst = chr - if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; - EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD); + // fprintf(F, "%c", chr) --> fputc(chr, F) + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); return ConstantInt::get(CI->getType(), 1); } if (FormatStr[1] == 's') { - // fprintf(F, "%s", str) -> fputs(str, F) - if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty()) + // fprintf(F, "%s", str) --> fputs(str, F) + if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) return 0; - EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD); + EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); return CI; } return 0; diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index 2306a77..9208238 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -206,12 +206,13 @@ static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock, // there is only one other pred, get it, otherwise we can't handle it. PI = pred_begin(DstBlock); PE = pred_end(DstBlock); BasicBlock *DstOtherPred = 0; - if (*PI == SrcBlock) { + BasicBlock *P = *PI; + if (P == SrcBlock) { if (++PI == PE) return 0; DstOtherPred = *PI; if (++PI != PE) return 0; } else { - DstOtherPred = *PI; + DstOtherPred = P; if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0; } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 5ad5de2..01c8e5d 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -16,9 +16,9 @@ // transformation from taking place, though currently the analysis cannot // support moving any really useful instructions (only dead ones). // 2. This pass transforms functions that are prevented from being tail -// recursive by an associative expression to use an accumulator variable, -// thus compiling the typical naive factorial or 'fib' implementation into -// efficient code. +// recursive by an associative and commutative expression to use an +// accumulator variable, thus compiling the typical naive factorial or +// 'fib' implementation into efficient code. // 3. TRE is performed if the function returns void, if the return // returns the result returned by the call, or if the function returns a // run-time constant on all exits from the function. It is possible, though @@ -60,6 +60,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/ADT/Statistic.h" @@ -252,7 +253,7 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { // If we are passing this argument into call as the corresponding // argument operand, then the argument is dynamically constant. // Otherwise, we cannot transform this function safely. - if (CI->getOperand(ArgNo+1) == Arg) + if (CI->getArgOperand(ArgNo) == Arg) return true; } @@ -269,16 +270,16 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { } // getCommonReturnValue - Check to see if the function containing the specified -// return instruction and tail call consistently returns the same -// runtime-constant value at all exit points. If so, return the returned value. +// tail call consistently returns the same runtime-constant value at all exit +// points except for IgnoreRI. If so, return the returned value. // -static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { - Function *F = TheRI->getParent()->getParent(); +static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { + Function *F = CI->getParent()->getParent(); Value *ReturnedValue = 0; for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) - if (RI != TheRI) { + if (RI != IgnoreRI) { Value *RetOp = RI->getOperand(0); // We can only perform this transformation if the value returned is @@ -301,9 +302,9 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { /// Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { - if (!I->isAssociative()) return 0; + if (!I->isAssociative() || !I->isCommutative()) return 0; assert(I->getNumOperands() == 2 && - "Associative operations should have 2 args!"); + "Associative/commutative operations should have 2 args!"); // Exactly one operand should be the result of the call instruction... if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || @@ -368,11 +369,16 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, return false; } - // If we are introducing accumulator recursion to eliminate associative - // operations after the call instruction, this variable contains the initial - // value for the accumulator. If this value is set, we actually perform - // accumulator recursion elimination instead of simple tail recursion - // elimination. + // If we are introducing accumulator recursion to eliminate operations after + // the call instruction that are both associative and commutative, the initial + // value for the accumulator is placed in this variable. If this value is set + // then we actually perform accumulator recursion elimination instead of + // simple tail recursion elimination. If the operation is an LLVM instruction + // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then + // we are handling the case when the return instruction returns a constant C + // which is different to the constant returned by other return instructions + // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a + // special case of accumulator recursion, the operation being "return C". Value *AccumulatorRecursionEliminationInitVal = 0; Instruction *AccumulatorRecursionInstr = 0; @@ -383,9 +389,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) if (!CanMoveAboveCall(BBI, CI)) { // If we can't move the instruction above the call, it might be because it - // is an associative operation that could be tranformed using accumulator - // recursion elimination. Check to see if this is the case, and if so, - // remember the initial accumulator value for later. + // is an associative and commutative operation that could be tranformed + // using accumulator recursion elimination. Check to see if this is the + // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = CanTransformAccumulatorRecursion(BBI, CI))) { // Yes, this is accumulator recursion. Remember which instruction @@ -403,8 +409,18 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa<UndefValue>(Ret->getReturnValue()) && AccumulatorRecursionEliminationInitVal == 0 && - !getCommonReturnValue(Ret, CI)) - return false; + !getCommonReturnValue(0, CI)) { + // One case remains that we are able to handle: the current return + // instruction returns a constant, and all other return instructions + // return a different constant. + if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) + return false; // Current return instruction does not return a constant. + // Check that all other return instructions return a common constant. If + // so, record it in AccumulatorRecursionEliminationInitVal. + AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); + if (!AccumulatorRecursionEliminationInitVal) + return false; + } // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. @@ -453,8 +469,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. - for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i) - ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB); + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) + ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB); // If we are introducing an accumulator variable to eliminate the recursion, // do so now. Note that we _know_ that no subsequent tail recursion @@ -464,8 +480,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. - PHINode *AccPN = PHINode::Create(AccRecInstr->getType(), "accumulator.tr", - OldEntry->begin()); + PHINode *AccPN = + PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), + "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the // real entry into the function we seed the PHI with the initial value, @@ -475,20 +492,27 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // it will not show up as a predecessor. for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry); PI != PE; ++PI) { - if (*PI == &F->getEntryBlock()) - AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, *PI); + BasicBlock *P = *PI; + if (P == &F->getEntryBlock()) + AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); else - AccPN->addIncoming(AccPN, *PI); + AccPN->addIncoming(AccPN, P); } - // Add an incoming argument for the current block, which is computed by our - // associative accumulator instruction. - AccPN->addIncoming(AccRecInstr, BB); - - // Next, rewrite the accumulator recursion instruction so that it does not - // use the result of the call anymore, instead, use the PHI node we just - // inserted. - AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); + if (AccRecInstr) { + // Add an incoming argument for the current block, which is computed by + // our associative and commutative accumulator instruction. + AccPN->addIncoming(AccRecInstr, BB); + + // Next, rewrite the accumulator recursion instruction so that it does not + // use the result of the call anymore, instead, use the PHI node we just + // inserted. + AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); + } else { + // Add an incoming argument for the current block, which is just the + // constant returned by the current return instruction. + AccPN->addIncoming(Ret->getReturnValue(), BB); + } // Finally, rewrite any return instructions in the program to return the PHI // node instead of the "initval" that they do currently. This loop will diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index ea9d1c1..4d64c85 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -381,29 +381,28 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI) { std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints(); - - unsigned ArgNo = 1; // ArgNo - The operand of the CallInst. + + unsigned ArgNo = 0; // The argument of the CallInst. for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo OpInfo(Constraints[i]); - + // Compute the value type for each operand. switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.isIndirect) - OpInfo.CallOperandVal = CI->getOperand(ArgNo++); + OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++); break; case InlineAsm::isInput: - OpInfo.CallOperandVal = CI->getOperand(ArgNo++); + OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++); break; case InlineAsm::isClobber: // Nothing to do. break; } - + // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue(), - OpInfo.ConstraintType == TargetLowering::C_Memory); - + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + // If this asm operand is our Value*, and if it isn't an indirect memory // operand, we can't fold it! if (OpInfo.CallOperandVal == OpVal && @@ -411,7 +410,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, !OpInfo.isIndirect)) return false; } - + return true; } @@ -450,7 +449,7 @@ static bool FindAllMemoryUses(Instruction *I, if (CallInst *CI = dyn_cast<CallInst>(U)) { InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); - if (IA == 0) return true; + if (!IA) return true; // If this is a memory operand, we're cool, otherwise bail out. if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 2f1ae00..ec625b4 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -558,121 +558,3 @@ void llvm::FindFunctionBackedges(const Function &F, } - - - -/// AreEquivalentAddressValues - Test if A and B will obviously have the same -/// value. This includes recognizing that %t0 and %t1 will have the same -/// value in code like this: -/// %t0 = getelementptr \@a, 0, 3 -/// store i32 0, i32* %t0 -/// %t1 = getelementptr \@a, 0, 3 -/// %t2 = load i32* %t1 -/// -static bool AreEquivalentAddressValues(const Value *A, const Value *B) { - // Test if the values are trivially equivalent. - if (A == B) return true; - - // Test if the values come from identical arithmetic instructions. - // Use isIdenticalToWhenDefined instead of isIdenticalTo because - // this function is only used when one address use dominates the - // other, which means that they'll always either have the same - // value or one of them will have an undefined value. - if (isa<BinaryOperator>(A) || isa<CastInst>(A) || - isa<PHINode>(A) || isa<GetElementPtrInst>(A)) - if (const Instruction *BI = dyn_cast<Instruction>(B)) - if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) - return true; - - // Otherwise they may not be equivalent. - return false; -} - -/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the -/// instruction before ScanFrom) checking to see if we have the value at the -/// memory address *Ptr locally available within a small number of instructions. -/// If the value is available, return it. -/// -/// If not, return the iterator for the last validated instruction that the -/// value would be live through. If we scanned the entire block and didn't find -/// something that invalidates *Ptr or provides it, ScanFrom would be left at -/// begin() and this returns null. ScanFrom could also be left -/// -/// MaxInstsToScan specifies the maximum instructions to scan in the block. If -/// it is set to 0, it will scan the whole block. You can also optionally -/// specify an alias analysis implementation, which makes this more precise. -Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, - BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, - AliasAnalysis *AA) { - if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; - - // If we're using alias analysis to disambiguate get the size of *Ptr. - unsigned AccessSize = 0; - if (AA) { - const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); - AccessSize = AA->getTypeStoreSize(AccessTy); - } - - while (ScanFrom != ScanBB->begin()) { - // We must ignore debug info directives when counting (otherwise they - // would affect codegen). - Instruction *Inst = --ScanFrom; - if (isa<DbgInfoIntrinsic>(Inst)) - continue; - - // Restore ScanFrom to expected value in case next test succeeds - ScanFrom++; - - // Don't scan huge blocks. - if (MaxInstsToScan-- == 0) return 0; - - --ScanFrom; - // If this is a load of Ptr, the loaded value is available. - if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) - if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) - return LI; - - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - // If this is a store through Ptr, the value is available! - if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) - return SI->getOperand(0); - - // If Ptr is an alloca and this is a store to a different alloca, ignore - // the store. This is a trivial form of alias analysis that is important - // for reg2mem'd code. - if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) && - (isa<AllocaInst>(SI->getOperand(1)) || - isa<GlobalVariable>(SI->getOperand(1)))) - continue; - - // If we have alias analysis and it says the store won't modify the loaded - // value, ignore the store. - if (AA && - (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) - continue; - - // Otherwise the store that may or may not alias the pointer, bail out. - ++ScanFrom; - return 0; - } - - // If this is some other instruction that may clobber Ptr, bail out. - if (Inst->mayWriteToMemory()) { - // If alias analysis claims that it really won't modify the load, - // ignore it. - if (AA && - (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) - continue; - - // May modify the pointer, bail out. - ++ScanFrom; - return 0; - } - } - - // Got to the start of the block, we didn't find it, but are done for this - // block. - return 0; -} - diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 8c25ad1..26f53c0 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -106,11 +106,12 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, // If AllowIdenticalEdges is true, then we allow this edge to be considered // non-critical iff all preds come from TI's block. while (I != E) { - if (*I != FirstPred) + const BasicBlock *P = *I; + if (P != FirstPred) return true; // Note: leave this as is until no one ever compiles with either gcc 4.0.1 // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207 - E = pred_end(*I); + E = pred_end(P); ++I; } return false; @@ -277,11 +278,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); - I != E; ++I) - if (*I != NewBB) - OtherPreds.push_back(*I); + I != E; ++I) { + BasicBlock *P = *I; + if (P != NewBB) + OtherPreds.push_back(P); + } } - + bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? @@ -400,11 +403,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, bool HasPredOutsideOfLoop = false; BasicBlock *Exit = ExitBlocks[i]; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); - I != E; ++I) - if (TIL->contains(*I)) - Preds.push_back(*I); + I != E; ++I) { + BasicBlock *P = *I; + if (TIL->contains(P)) + Preds.push_back(P); else HasPredOutsideOfLoop = true; + } // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block // may appear multiple times in the list. We can't use diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 767fa3a..7a9d007 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -69,6 +69,31 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, return CI; } +/// EmitStrNCmp - Emit a call to the strncmp function to the builder. +Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, + IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI, 3), + B.getInt32Ty(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), NULL); + CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B), + CastToCStr(Ptr2, B), Len, "strncmp"); + + if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, @@ -112,10 +137,10 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, bool isVolatile, IRBuilder<> &B, const TargetData *TD) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() }; - Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); + const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() }; + Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3); return B.CreateCall5(MemCpy, Dst, Src, Len, ConstantInt::get(B.getInt32Ty(), Align), ConstantInt::get(B.getInt1Ty(), isVolatile)); @@ -395,11 +420,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(2) != TD->getIntPtrType(Context) || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1, false, B, TD); - replaceCall(CI->getOperand(1)); + replaceCall(CI->getArgOperand(0)); return true; } return false; @@ -418,11 +443,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(2) != TD->getIntPtrType(Context) || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1, false, B, TD); - replaceCall(CI->getOperand(1)); + replaceCall(CI->getArgOperand(0)); return true; } return false; @@ -436,12 +461,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(2) != TD->getIntPtrType(Context) || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(), + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD); - replaceCall(CI->getOperand(1)); + EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); + replaceCall(CI->getArgOperand(0)); return true; } return false; @@ -462,8 +487,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (isFoldable(3, 2, true)) { - Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD, + if (isFoldable(2 + CallInst::ArgOffset, 1 + CallInst::ArgOffset, true)) { + Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, Name.substr(2, 6)); replaceCall(Ret); return true; @@ -479,10 +504,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { !FT->getParamType(2)->isIntegerTy() || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - Value *Ret = EmitStrNCpy(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), B, TD, Name.substr(2, 7)); + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TD, Name.substr(2, 7)); replaceCall(Ret); return true; } diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 6d4fe4b..1dcfd57 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -32,7 +32,7 @@ using namespace llvm; // CloneBasicBlock - See comments in Cloning.h BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, - DenseMap<const Value*, Value*> &ValueMap, + ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, ClonedCodeInfo *CodeInfo) { BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); @@ -47,7 +47,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); - ValueMap[II] = NewInst; // Add instruction map to value. + VMap[II] = NewInst; // Add instruction map to value. hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { @@ -72,7 +72,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, // ArgMap values. // void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, - DenseMap<const Value*, Value*> &ValueMap, + ValueToValueMapTy &VMap, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); @@ -80,17 +80,17 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, #ifndef NDEBUG for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) - assert(ValueMap.count(I) && "No mapping from source argument specified!"); + assert(VMap.count(I) && "No mapping from source argument specified!"); #endif // Clone any attributes. if (NewFunc->arg_size() == OldFunc->arg_size()) NewFunc->copyAttributesFrom(OldFunc); else { - //Some arguments were deleted with the ValueMap. Copy arguments one by one + //Some arguments were deleted with the VMap. Copy arguments one by one for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) - if (Argument* Anew = dyn_cast<Argument>(ValueMap[I])) + if (Argument* Anew = dyn_cast<Argument>(VMap[I])) Anew->addAttr( OldFunc->getAttributes() .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() @@ -111,43 +111,43 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! - BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix, NewFunc, + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); - ValueMap[&BB] = CBB; // Add basic block mapping. + VMap[&BB] = CBB; // Add basic block mapping. if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) Returns.push_back(RI); } // Loop over all of the instructions in the function, fixing up operand - // references as we go. This uses ValueMap to do all the hard work. + // references as we go. This uses VMap to do all the hard work. // - for (Function::iterator BB = cast<BasicBlock>(ValueMap[OldFunc->begin()]), + for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]), BE = NewFunc->end(); BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) - RemapInstruction(II, ValueMap); + RemapInstruction(II, VMap); } /// CloneFunction - Return a copy of the specified function, but without /// embedding the function into another module. Also, any references specified -/// in the ValueMap are changed to refer to their mapped value instead of the -/// original one. If any of the arguments to the function are in the ValueMap, -/// the arguments are deleted from the resultant function. The ValueMap is +/// in the VMap are changed to refer to their mapped value instead of the +/// original one. If any of the arguments to the function are in the VMap, +/// the arguments are deleted from the resultant function. The VMap is /// updated to include mappings from all of the instructions and basicblocks in /// the function from their old to new values. /// Function *llvm::CloneFunction(const Function *F, - DenseMap<const Value*, Value*> &ValueMap, + ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo) { std::vector<const Type*> ArgTypes; // The user might be deleting arguments to the function by specifying them in - // the ValueMap. If so, we need to not add the arguments to the arg ty vector + // the VMap. If so, we need to not add the arguments to the arg ty vector // for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - if (ValueMap.count(I) == 0) // Haven't mapped the argument to anything yet? + if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet? ArgTypes.push_back(I->getType()); // Create a new function type... @@ -161,13 +161,13 @@ Function *llvm::CloneFunction(const Function *F, Function::arg_iterator DestI = NewF->arg_begin(); for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - if (ValueMap.count(I) == 0) { // Is this argument preserved? + if (VMap.count(I) == 0) { // Is this argument preserved? DestI->setName(I->getName()); // Copy the name over... - ValueMap[I] = DestI++; // Add mapping to ValueMap + VMap[I] = DestI++; // Add mapping to VMap } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo); + CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo); return NewF; } @@ -179,19 +179,19 @@ namespace { struct PruningFunctionCloner { Function *NewFunc; const Function *OldFunc; - DenseMap<const Value*, Value*> &ValueMap; + ValueToValueMapTy &VMap; SmallVectorImpl<ReturnInst*> &Returns; const char *NameSuffix; ClonedCodeInfo *CodeInfo; const TargetData *TD; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, - DenseMap<const Value*, Value*> &valueMap, + ValueToValueMapTy &valueMap, SmallVectorImpl<ReturnInst*> &returns, const char *nameSuffix, ClonedCodeInfo *codeInfo, const TargetData *td) - : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns), + : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { } @@ -202,7 +202,7 @@ namespace { public: /// ConstantFoldMappedInstruction - Constant fold the specified instruction, - /// mapping its operands through ValueMap if they are available. + /// mapping its operands through VMap if they are available. Constant *ConstantFoldMappedInstruction(const Instruction *I); }; } @@ -211,7 +211,7 @@ namespace { /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, std::vector<const BasicBlock*> &ToClone){ - Value *&BBEntry = ValueMap[BB]; + Value *&BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; @@ -230,7 +230,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If this instruction constant folds, don't bother cloning the instruction, // instead, just add the constant to the value map. if (Constant *C = ConstantFoldMappedInstruction(II)) { - ValueMap[II] = C; + VMap[II] = C; continue; } @@ -238,7 +238,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); - ValueMap[II] = NewInst; // Add instruction map to value. + VMap[II] = NewInst; // Add instruction map to value. hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { @@ -258,12 +258,12 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); // Or is a known constant in the caller... if (Cond == 0) - Cond = dyn_cast_or_null<ConstantInt>(ValueMap[BI->getCondition()]); + Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]); // Constant fold to uncond branch! if (Cond) { BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); - ValueMap[OldTI] = BranchInst::Create(Dest, NewBB); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; } @@ -272,10 +272,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If switching on a value known constant in the caller. ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); if (Cond == 0) // Or known constant after constant prop in the callee... - Cond = dyn_cast_or_null<ConstantInt>(ValueMap[SI->getCondition()]); + Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]); if (Cond) { // Constant fold to uncond branch! BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond)); - ValueMap[OldTI] = BranchInst::Create(Dest, NewBB); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; } @@ -286,7 +286,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (OldTI->hasName()) NewInst->setName(OldTI->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); - ValueMap[OldTI] = NewInst; // Add instruction map to value. + VMap[OldTI] = NewInst; // Add instruction map to value. // Recursively clone any reachable successor blocks. const TerminatorInst *TI = BB->getTerminator(); @@ -307,13 +307,13 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } /// ConstantFoldMappedInstruction - Constant fold the specified instruction, -/// mapping its operands through ValueMap if they are available. +/// mapping its operands through VMap if they are available. Constant *PruningFunctionCloner:: ConstantFoldMappedInstruction(const Instruction *I) { SmallVector<Constant*, 8> Ops; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), - ValueMap))) + VMap))) Ops.push_back(Op); else return 0; // All operands not constant! @@ -363,7 +363,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) { /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, - DenseMap<const Value*, Value*> &ValueMap, + ValueToValueMapTy &VMap, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, @@ -374,10 +374,10 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) - assert(ValueMap.count(II) && "No mapping from source argument specified!"); + assert(VMap.count(II) && "No mapping from source argument specified!"); #endif - PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns, + PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. @@ -397,14 +397,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { - BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]); + BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Loop over all of the instructions in the block, fixing up operand - // references as we go. This uses ValueMap to do all the hard work. + // references as we go. This uses VMap to do all the hard work. // BasicBlock::iterator I = NewBB->begin(); @@ -455,7 +455,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, I->setMetadata(DbgKind, 0); } } - RemapInstruction(I, ValueMap); + RemapInstruction(I, VMap); } } @@ -465,19 +465,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); - BasicBlock *NewBB = cast<BasicBlock>(ValueMap[OldBB]); + BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; - PHINode *PN = cast<PHINode>(ValueMap[OPN]); + PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { if (BasicBlock *MappedBlock = - cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) { + cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) { Value *InVal = MapValue(PN->getIncomingValue(pred), - ValueMap); + VMap); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); @@ -531,15 +531,15 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); - assert(ValueMap[OldI] == PN && "ValueMap mismatch"); - ValueMap[OldI] = NV; + assert(VMap[OldI] == PN && "VMap mismatch"); + VMap[OldI] = NV; PN->eraseFromParent(); ++OldI; } } // NOTE: We cannot eliminate single entry phi nodes here, because of - // ValueMap. Single entry phi nodes can have multiple ValueMap entries - // pointing at them. Thus, deleting one would require scanning the ValueMap + // VMap. Single entry phi nodes can have multiple VMap entries + // pointing at them. Thus, deleting one would require scanning the VMap // to update any entries in it that would require that. This would be // really slow. } @@ -548,14 +548,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. - Function::iterator I = cast<BasicBlock>(ValueMap[&OldFunc->getEntryBlock()]); + Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); while (I != NewFunc->end()) { BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } // Note that we can't eliminate uncond branches if the destination has // single-entry PHI nodes. Eliminating the single-entry phi nodes would - // require scanning the ValueMap to update any entries that point to the phi + // require scanning the VMap to update any entries that point to the phi // node. BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp index 38928dc..551b630 100644 --- a/lib/Transforms/Utils/CloneLoop.cpp +++ b/lib/Transforms/Utils/CloneLoop.cpp @@ -15,7 +15,6 @@ #include "llvm/BasicBlock.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/ADT/DenseMap.h" using namespace llvm; @@ -23,13 +22,13 @@ using namespace llvm; /// CloneDominatorInfo - Clone basicblock's dominator tree and, if available, /// dominance info. It is expected that basic block is already cloned. static void CloneDominatorInfo(BasicBlock *BB, - DenseMap<const Value *, Value *> &ValueMap, + ValueMap<const Value *, Value *> &VMap, DominatorTree *DT, DominanceFrontier *DF) { assert (DT && "DominatorTree is not available"); - DenseMap<const Value *, Value*>::iterator BI = ValueMap.find(BB); - assert (BI != ValueMap.end() && "BasicBlock clone is missing"); + ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB); + assert (BI != VMap.end() && "BasicBlock clone is missing"); BasicBlock *NewBB = cast<BasicBlock>(BI->second); // NewBB already got dominator info. @@ -43,11 +42,11 @@ static void CloneDominatorInfo(BasicBlock *BB, // NewBB's dominator is either BB's dominator or BB's dominator's clone. BasicBlock *NewBBDom = BBDom; - DenseMap<const Value *, Value*>::iterator BBDomI = ValueMap.find(BBDom); - if (BBDomI != ValueMap.end()) { + ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom); + if (BBDomI != VMap.end()) { NewBBDom = cast<BasicBlock>(BBDomI->second); if (!DT->getNode(NewBBDom)) - CloneDominatorInfo(BBDom, ValueMap, DT, DF); + CloneDominatorInfo(BBDom, VMap, DT, DF); } DT->addNewBlock(NewBB, NewBBDom); @@ -60,8 +59,8 @@ static void CloneDominatorInfo(BasicBlock *BB, for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end(); I != E; ++I) { BasicBlock *DB = *I; - DenseMap<const Value*, Value*>::iterator IDM = ValueMap.find(DB); - if (IDM != ValueMap.end()) + ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB); + if (IDM != VMap.end()) NewDFSet.insert(cast<BasicBlock>(IDM->second)); else NewDFSet.insert(DB); @@ -71,10 +70,10 @@ static void CloneDominatorInfo(BasicBlock *BB, } } -/// CloneLoop - Clone Loop. Clone dominator info. Populate ValueMap +/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap /// using old blocks to new blocks mapping. Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, - DenseMap<const Value *, Value *> &ValueMap, Pass *P) { + ValueMap<const Value *, Value *> &VMap, Pass *P) { DominatorTree *DT = NULL; DominanceFrontier *DF = NULL; @@ -104,8 +103,8 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; - BasicBlock *NewBB = CloneBasicBlock(BB, ValueMap, ".clone"); - ValueMap[BB] = NewBB; + BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone"); + VMap[BB] = NewBB; if (P) LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L); NewLoop->addBasicBlockToLoop(NewBB, LI->getBase()); @@ -117,7 +116,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; - CloneDominatorInfo(BB, ValueMap, DT, DF); + CloneDominatorInfo(BB, VMap, DT, DF); } // Process sub loops @@ -125,7 +124,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, LoopNest.push_back(*I); } while (!LoopNest.empty()); - // Remap instructions to reference operands from ValueMap. + // Remap instructions to reference operands from VMap. for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(), NBE = NewBlocks.end(); NBItr != NBE; ++NBItr) { BasicBlock *NB = *NBItr; @@ -135,8 +134,8 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, for (unsigned index = 0, num_ops = Insn->getNumOperands(); index != num_ops; ++index) { Value *Op = Insn->getOperand(index); - DenseMap<const Value *, Value *>::iterator OpItr = ValueMap.find(Op); - if (OpItr != ValueMap.end()) + ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op); + if (OpItr != VMap.end()) Insn->setOperand(index, OpItr->second); } } diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index b87c082..fc603d2 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -28,12 +28,12 @@ using namespace llvm; Module *llvm::CloneModule(const Module *M) { // Create the value map that maps things from the old module over to the new // module. - DenseMap<const Value*, Value*> ValueMap; - return CloneModule(M, ValueMap); + ValueToValueMapTy VMap; + return CloneModule(M, VMap); } Module *llvm::CloneModule(const Module *M, - DenseMap<const Value*, Value*> &ValueMap) { + ValueToValueMapTy &VMap) { // First off, we need to create the new module... Module *New = new Module(M->getModuleIdentifier(), M->getContext()); New->setDataLayout(M->getDataLayout()); @@ -51,7 +51,7 @@ Module *llvm::CloneModule(const Module *M, New->addLibrary(*I); // Loop over all of the global variables, making corresponding globals in the - // new module. Here we add them to the ValueMap and to the new Module. We + // new module. Here we add them to the VMap and to the new Module. We // don't worry about attributes or initializers, they will come later. // for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); @@ -62,7 +62,7 @@ Module *llvm::CloneModule(const Module *M, GlobalValue::ExternalLinkage, 0, I->getName()); GV->setAlignment(I->getAlignment()); - ValueMap[I] = GV; + VMap[I] = GV; } // Loop over the functions in the module, making external functions as before @@ -71,13 +71,13 @@ Module *llvm::CloneModule(const Module *M, Function::Create(cast<FunctionType>(I->getType()->getElementType()), GlobalValue::ExternalLinkage, I->getName(), New); NF->copyAttributesFrom(I); - ValueMap[I] = NF; + VMap[I] = NF; } // Loop over the aliases in the module for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) - ValueMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage, + VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage, I->getName(), NULL, New); // Now that all of the things that global variable initializer can refer to @@ -86,10 +86,10 @@ Module *llvm::CloneModule(const Module *M, // for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) { - GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]); + GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); if (I->hasInitializer()) GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(), - ValueMap))); + VMap))); GV->setLinkage(I->getLinkage()); GV->setThreadLocal(I->isThreadLocal()); GV->setConstant(I->isConstant()); @@ -98,17 +98,17 @@ Module *llvm::CloneModule(const Module *M, // Similarly, copy over function bodies now... // for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { - Function *F = cast<Function>(ValueMap[I]); + Function *F = cast<Function>(VMap[I]); if (!I->isDeclaration()) { Function::arg_iterator DestI = F->arg_begin(); for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end(); ++J) { DestI->setName(J->getName()); - ValueMap[J] = DestI++; + VMap[J] = DestI++; } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(F, I, ValueMap, Returns); + CloneFunctionInto(F, I, VMap, Returns); } F->setLinkage(I->getLinkage()); @@ -117,11 +117,37 @@ Module *llvm::CloneModule(const Module *M, // And aliases for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { - GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]); + GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); GA->setLinkage(I->getLinkage()); if (const Constant* C = I->getAliasee()) - GA->setAliasee(cast<Constant>(MapValue(C, ValueMap))); + GA->setAliasee(cast<Constant>(MapValue(C, VMap))); } - + + // And named metadata.... + for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), + E = M->named_metadata_end(); I != E; ++I) { + const NamedMDNode &NMD = *I; + SmallVector<MDNode*, 4> MDs; + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) + MDs.push_back(cast<MDNode>(MapValue(NMD.getOperand(i), VMap))); + NamedMDNode::Create(New->getContext(), NMD.getName(), + MDs.data(), MDs.size(), New); + } + + // Update metadata attach with instructions. + for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI) + for (Function::iterator FI = MI->begin(), FE = MI->end(); + FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + SmallVector<std::pair<unsigned, MDNode *>, 4 > MDs; + BI->getAllMetadata(MDs); + for (SmallVector<std::pair<unsigned, MDNode *>, 4>::iterator + MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) { + Value *MappedValue = MapValue(MDI->second, VMap); + if (MDI->second != MappedValue && MappedValue) + BI->setMetadata(MDI->first, cast<MDNode>(MappedValue)); + } + } return New; } diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index c908b4a..8e82a02 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -35,7 +35,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, I.eraseFromParent(); return 0; } - + // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { @@ -46,7 +46,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", F->getEntryBlock().begin()); } - + // Change all of the users of the instruction to read from the stack slot // instead. while (!I.use_empty()) { @@ -67,7 +67,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, Value *&V = Loads[PN->getIncomingBlock(i)]; if (V == 0) { // Insert the load into the predecessor block - V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, + V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, PN->getIncomingBlock(i)->getTerminator()); } PN->setIncomingValue(i, V); @@ -110,8 +110,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, /// The phi node is deleted and it returns the pointer to the alloca inserted. AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { if (P->use_empty()) { - P->eraseFromParent(); - return 0; + P->eraseFromParent(); + return 0; } // Create a stack slot to hold the value. @@ -124,23 +124,23 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", F->getEntryBlock().begin()); } - + // Iterate over each operand, insert store in each predecessor. for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) { - assert(II->getParent() != P->getIncomingBlock(i) && + assert(II->getParent() != P->getIncomingBlock(i) && "Invoke edge not supported yet"); II=II; } - new StoreInst(P->getIncomingValue(i), Slot, + new StoreInst(P->getIncomingValue(i), Slot, P->getIncomingBlock(i)->getTerminator()); } - + // Insert load in place of the phi and replace all uses. Value *V = new LoadInst(Slot, P->getName()+".reload", P); P->replaceAllUsesWith(V); - + // Delete phi. P->eraseFromParent(); - + return Slot; } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 91390bc..598e7d2 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -63,7 +63,8 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // Next, create the new invoke instruction, inserting it at the end // of the old basic block. - SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end()); + ImmutableCallSite CS(CI); + SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end()); InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, InvokeArgs.begin(), InvokeArgs.end(), @@ -169,7 +170,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, /// some edges of the callgraph may remain. static void UpdateCallGraphAfterInlining(CallSite CS, Function::iterator FirstNewBlock, - DenseMap<const Value*, Value*> &ValueMap, + ValueMap<const Value*, Value*> &VMap, InlineFunctionInfo &IFI) { CallGraph &CG = *IFI.CG; const Function *Caller = CS.getInstruction()->getParent()->getParent(); @@ -192,9 +193,9 @@ static void UpdateCallGraphAfterInlining(CallSite CS, for (; I != E; ++I) { const Value *OrigCall = I->first; - DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall); + ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall); // Only copy the edge if the call was inlined! - if (VMI == ValueMap.end() || VMI->second == 0) + if (VMI == VMap.end() || VMI->second == 0) continue; // If the call was inlined, but then constant folded, there is no edge to @@ -285,8 +286,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; - { // Scope to destroy ValueMap after cloning. - DenseMap<const Value*, Value*> ValueMap; + { // Scope to destroy VMap after cloning. + ValueMap<const Value*, Value*> VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -351,16 +352,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Uses of the argument in the function should use our new alloca // instead. ActualArg = NewAlloca; + + // Calls that we inline may use the new alloca, so we need to clear + // their 'tail' flags. + MustClearTailCallFlags = true; } - ValueMap[I] = ActualArg; + VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i", + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. @@ -368,7 +373,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Update the callgraph if requested. if (IFI.CG) - UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, IFI); + UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); } // If there are any alloca instructions in the block that used to be the entry diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index df6e603..e90c30b 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -190,14 +190,15 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { - BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); - if (PHINode *PN = dyn_cast<PHINode>(*UI)) + User *U = *UI; + BasicBlock *UserBB = cast<Instruction>(U)->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(U)) UserBB = PN->getIncomingBlock(UI); if (InstBB != UserBB && !inLoop(UserBB)) UsesToRewrite.push_back(&UI.getUse()); } - + // If there are no uses outside the loop, exit with no change. if (UsesToRewrite.empty()) return false; diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index d03f7a6..0b48a8f 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -35,111 +35,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// Local analysis. -// - -/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and -/// bitcasts to get back to the underlying object being addressed, keeping -/// track of the offset in bytes from the GEPs relative to the result. -/// This is closely related to Value::getUnderlyingObject but is located -/// here to avoid making VMCore depend on TargetData. -static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, - uint64_t &ByteOffset, - unsigned MaxLookup = 6) { - if (!V->getType()->isPointerTy()) - return V; - for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { - if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - if (!GEP->hasAllConstantIndices()) - return V; - SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end()); - ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); - V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { - V = cast<Operator>(V)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { - if (GA->mayBeOverridden()) - return V; - V = GA->getAliasee(); - } else { - return V; - } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } - return V; -} - -/// isSafeToLoadUnconditionally - Return true if we know that executing a load -/// from this value cannot trap. If it is not obviously safe to load from the -/// specified pointer, we do a quick local scan of the basic block containing -/// ScanFrom, to determine if the address is already accessed. -bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align, const TargetData *TD) { - uint64_t ByteOffset = 0; - Value *Base = V; - if (TD) - Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); - - const Type *BaseType = 0; - unsigned BaseAlign = 0; - if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { - // An alloca is safe to load from as load as it is suitably aligned. - BaseType = AI->getAllocatedType(); - BaseAlign = AI->getAlignment(); - } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) { - // Global variables are safe to load from but their size cannot be - // guaranteed if they are overridden. - if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) { - BaseType = GV->getType()->getElementType(); - BaseAlign = GV->getAlignment(); - } - } - - if (BaseType && BaseType->isSized()) { - if (TD && BaseAlign == 0) - BaseAlign = TD->getPrefTypeAlignment(BaseType); - - if (Align <= BaseAlign) { - if (!TD) - return true; // Loading directly from an alloca or global is OK. - - // Check if the load is within the bounds of the underlying object. - const PointerType *AddrTy = cast<PointerType>(V->getType()); - uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); - if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && - (Align == 0 || (ByteOffset % Align) == 0)) - return true; - } - } - - // Otherwise, be a little bit aggressive by scanning the local block where we - // want to check to see if the pointer is already being loaded or stored - // from/to. If so, the previous load or store would have already trapped, - // so there is no harm doing an extra load (also, CSE will later eliminate - // the load entirely). - BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); - - while (BBI != E) { - --BBI; - - // If we see a free or a call which may write to memory (i.e. which might do - // a free) the pointer could be marked invalid. - if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() && - !isa<DbgInfoIntrinsic>(BBI)) - return false; - - if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { - if (LI->getOperand(0) == V) return true; - } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { - if (SI->getOperand(1) == V) return true; - } - } - return false; -} - - -//===----------------------------------------------------------------------===// // Local constant propagation. // @@ -537,9 +432,11 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // Use that list to make another list of common predecessors of BB and Succ BlockSet CommonPreds; for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ); - PI != PE; ++PI) - if (BBPreds.count(*PI)) - CommonPreds.insert(*PI); + PI != PE; ++PI) { + BasicBlock *P = *PI; + if (BBPreds.count(P)) + CommonPreds.insert(P); + } // Shortcut, if there are no common predecessors, merging is always safe if (CommonPreds.empty()) diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 1ef3c32..4f4edf3 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -142,9 +142,11 @@ ReprocessLoop: if (*BB == L->getHeader()) continue; SmallPtrSet<BasicBlock *, 4> BadPreds; - for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI) - if (!L->contains(*PI)) - BadPreds.insert(*PI); + for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI){ + BasicBlock *P = *PI; + if (!L->contains(P)) + BadPreds.insert(P); + } // Delete each unique out-of-loop (and thus dead) predecessor. for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(), @@ -192,7 +194,7 @@ ReprocessLoop: if (!Preheader) { Preheader = InsertPreheaderForLoop(L); if (Preheader) { - NumInserted++; + ++NumInserted; Changed = true; } } @@ -215,7 +217,7 @@ ReprocessLoop: // allowed. if (!L->contains(*PI)) { if (RewriteLoopExitBlock(L, ExitBlock)) { - NumInserted++; + ++NumInserted; Changed = true; } break; @@ -244,7 +246,7 @@ ReprocessLoop: // loop header. LoopLatch = InsertUniqueBackedgeBlock(L, Preheader); if (LoopLatch) { - NumInserted++; + ++NumInserted; Changed = true; } } @@ -353,16 +355,18 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { // Compute the set of predecessors of the loop that are not in the loop. SmallVector<BasicBlock*, 8> OutsideBlocks; for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) - if (!L->contains(*PI)) { // Coming in from outside the loop? + PI != PE; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) { // Coming in from outside the loop? // If the loop is branched to from an indirect branch, we won't // be able to fully transform the loop, because it prohibits // edge splitting. - if (isa<IndirectBrInst>((*PI)->getTerminator())) return 0; + if (isa<IndirectBrInst>(P->getTerminator())) return 0; // Keep track of it. - OutsideBlocks.push_back(*PI); + OutsideBlocks.push_back(P); } + } // Split out the loop pre-header. BasicBlock *NewBB = @@ -385,13 +389,15 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { /// outside of the loop. BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { SmallVector<BasicBlock*, 8> LoopBlocks; - for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) - if (L->contains(*I)) { + for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { + BasicBlock *P = *I; + if (L->contains(P)) { // Don't do this if the loop is exited via an indirect branch. - if (isa<IndirectBrInst>((*I)->getTerminator())) return 0; + if (isa<IndirectBrInst>(P->getTerminator())) return 0; - LoopBlocks.push_back(*I); + LoopBlocks.push_back(P); } + } assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], @@ -559,10 +565,11 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { // Determine which blocks should stay in L and which should be moved out to // the Outer loop now. std::set<BasicBlock*> BlocksInL; - for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) - if (DT->dominates(Header, *PI)) - AddBlockAndPredsToSet(*PI, Header, BlocksInL); - + for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { + BasicBlock *P = *PI; + if (DT->dominates(Header, P)) + AddBlockAndPredsToSet(P, Header, BlocksInL); + } // Scan all of the loop children of L, moving them to OuterLoop if they are // not part of the inner loop. @@ -610,8 +617,10 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; - for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I) - if (*I != Preheader) BackedgeBlocks.push_back(*I); + for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ + BasicBlock *P = *I; + if (P != Preheader) BackedgeBlocks.push_back(P); + } // Create and insert the new backedge block... BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 84fd1eb..e0e07e7 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -37,13 +37,13 @@ STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); /// RemapInstruction - Convert the instruction operands from referencing the -/// current values into those specified by ValueMap. +/// current values into those specified by VMap. static inline void RemapInstruction(Instruction *I, - DenseMap<const Value *, Value*> &ValueMap) { + ValueMap<const Value *, Value*> &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); - DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op); - if (It != ValueMap.end()) + ValueMap<const Value *, Value*>::iterator It = VMap.find(Op); + if (It != VMap.end()) I->setOperand(op, It->second); } } @@ -183,7 +183,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. - typedef DenseMap<const Value*, Value*> ValueToValueMapTy; + typedef ValueMap<const Value*, Value*> ValueToValueMapTy; ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { @@ -205,26 +205,26 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), E = LoopBlocks.end(); BB != E; ++BB) { - ValueToValueMapTy ValueMap; - BasicBlock *New = CloneBasicBlock(*BB, ValueMap, "." + Twine(It)); + ValueToValueMapTy VMap; + BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. if (*BB == Header) for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { - PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]); + PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; - ValueMap[OrigPHINode[i]] = InVal; + VMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; - for (ValueToValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end(); + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 0ed8c72..2696e69 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -45,6 +45,7 @@ #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" @@ -62,10 +63,7 @@ static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support", namespace { class LowerInvoke : public FunctionPass { // Used for both models. - Constant *WriteFn; Constant *AbortFn; - Value *AbortMessage; - unsigned AbortMessageLength; // Used for expensive EH support. const Type *JBLinkTy; @@ -92,10 +90,8 @@ namespace { } private: - void createAbortMessage(Module *M); - void writeAbortMessage(Instruction *IB); bool insertCheapEHSupport(Function &F); - void splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes); + void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes); void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, AllocaInst *InvokeNum, AllocaInst *StackPtr, SwitchInst *CatchSwitch); @@ -123,7 +119,6 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, bool LowerInvoke::doInitialization(Module &M) { const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); - AbortMessage = 0; if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; @@ -175,68 +170,14 @@ bool LowerInvoke::doInitialization(Module &M) { // We need the 'write' and 'abort' functions for both models. AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()), (Type *)0); -#if 0 // "write" is Unix-specific.. code is going away soon anyway. - WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty, - VoidPtrTy, Type::Int32Ty, (Type *)0); -#else - WriteFn = 0; -#endif return true; } -void LowerInvoke::createAbortMessage(Module *M) { - if (useExpensiveEHSupport) { - // The abort message for expensive EH support tells the user that the - // program 'unwound' without an 'invoke' instruction. - Constant *Msg = - ConstantArray::get(M->getContext(), - "ERROR: Exception thrown, but not caught!\n"); - AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 - - GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true, - GlobalValue::InternalLinkage, - Msg, "abortmsg"); - std::vector<Constant*> GEPIdx(2, - Constant::getNullValue(Type::getInt32Ty(M->getContext()))); - AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); - } else { - // The abort message for cheap EH support tells the user that EH is not - // enabled. - Constant *Msg = - ConstantArray::get(M->getContext(), - "Exception handler needed, but not enabled." - "Recompile program with -enable-correct-eh-support.\n"); - AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 - - GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true, - GlobalValue::InternalLinkage, - Msg, "abortmsg"); - std::vector<Constant*> GEPIdx(2, Constant::getNullValue( - Type::getInt32Ty(M->getContext()))); - AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); - } -} - - -void LowerInvoke::writeAbortMessage(Instruction *IB) { -#if 0 - if (AbortMessage == 0) - createAbortMessage(IB->getParent()->getParent()->getParent()); - - // These are the arguments we WANT... - Value* Args[3]; - Args[0] = ConstantInt::get(Type::Int32Ty, 2); - Args[1] = AbortMessage; - Args[2] = ConstantInt::get(Type::Int32Ty, AbortMessageLength); - (new CallInst(WriteFn, Args, 3, "", IB))->setTailCall(); -#endif -} - bool LowerInvoke::insertCheapEHSupport(Function &F) { bool Changed = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { - std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3); + SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); // Insert a normal call instruction... CallInst *NewCall = CallInst::Create(II->getCalledValue(), CallArgs.begin(), CallArgs.end(), @@ -257,9 +198,6 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { ++NumInvokes; Changed = true; } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - // Insert a new call to write(2, AbortMessage, AbortMessageLength); - writeAbortMessage(UI); - // Insert a call to abort() CallInst::Create(AbortFn, "", UI)->setTailCall(); @@ -320,7 +258,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); // Insert a normal call instruction. - std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3); + SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), CallArgs.begin(), CallArgs.end(), "", II); @@ -349,7 +287,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) { // across the unwind edge. This process also splits all critical edges // coming out of invoke's. void LowerInvoke:: -splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) { +splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { // First step, split all critical edges from invoke instructions. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; @@ -371,16 +309,33 @@ splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) { ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { - // This is always a no-op cast because we're casting AI to AI->getType() so - // src and destination types are identical. BitCast is the only possibility. - CastInst *NC = new BitCastInst( - AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); - AI->replaceAllUsesWith(NC); - // Normally its is forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, we're - // replacing it here with the same value it was constructed with to simply - // make NC its user. - NC->setOperand(0, AI); + const Type *Ty = AI->getType(); + // Aggregate types can't be cast, but are legal argument types, so we have + // to handle them differently. We use an extract/insert pair as a + // lightweight method to achieve the same goal. + if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { + Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); + Instruction *NI = InsertValueInst::Create(AI, EI, 0); + NI->insertAfter(EI); + AI->replaceAllUsesWith(NI); + // Set the operand of the instructions back to the AllocaInst. + EI->setOperand(0, AI); + NI->setOperand(0, AI); + } else { + // This is always a no-op cast because we're casting AI to AI->getType() + // so src and destination types are identical. BitCast is the only + // possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Set the operand of the cast instruction back to the AllocaInst. + // Normally it's forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, + // we're replacing it here with the same value it was constructed with. + // We do this because the above replaceAllUsesWith() clobbered the + // operand, but we want this one to remain. + NC->setOperand(0, AI); + } } // Finally, scan the code looking for instructions with bad live ranges. @@ -402,7 +357,7 @@ splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) { continue; // Avoid iterator invalidation by copying users to a temporary vector. - std::vector<Instruction*> Users; + SmallVector<Instruction*,16> Users; for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); @@ -452,9 +407,9 @@ splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) { } bool LowerInvoke::insertExpensiveEHSupport(Function &F) { - std::vector<ReturnInst*> Returns; - std::vector<UnwindInst*> Unwinds; - std::vector<InvokeInst*> Invokes; + SmallVector<ReturnInst*,16> Returns; + SmallVector<UnwindInst*,16> Unwinds; + SmallVector<InvokeInst*,16> Invokes; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { @@ -502,12 +457,11 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin()); - std::vector<Value*> Idx; - Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); - Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 1)); - OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), + Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) }; + OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2], "OldBuf", - EntryBB->getTerminator()); + EntryBB->getTerminator()); // Copy the JBListHead to the alloca. Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true, @@ -552,7 +506,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { "setjmp.cont"); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0); - Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), + Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2], "TheJmpBuf", EntryBB->getTerminator()); JmpBufPtr = new BitCastInst(JmpBufPtr, @@ -605,24 +559,20 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Create the block to do the longjmp. // Get a pointer to the jmpbuf and longjmp. - std::vector<Value*> Idx; - Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); - Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)); - Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf", + Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) }; + Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf", UnwindBlock); Idx[0] = new BitCastInst(Idx[0], Type::getInt8PtrTy(F.getContext()), "tmp", UnwindBlock); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock); + CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock); new UnreachableInst(F.getContext(), UnwindBlock); // Set up the term block ("throw without a catch"). new UnreachableInst(F.getContext(), TermBlock); - // Insert a new call to write(2, AbortMessage, AbortMessageLength); - writeAbortMessage(TermBlock->getTerminator()); - // Insert a call to abort() CallInst::Create(AbortFn, "", TermBlock->getTerminator())->setTailCall(); diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 13f0a28..c0de193 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -69,11 +69,12 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { // Only allow direct and non-volatile loads and stores... for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) // Loop over all of the uses of the alloca - if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + UI != UE; ++UI) { // Loop over all of the uses of the alloca + const User *U = *UI; + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { if (LI->isVolatile()) return false; - } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == AI) return false; // Don't allow a store OF the AI, only INTO the AI. if (SI->isVolatile()) @@ -81,6 +82,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { } else { return false; } + } return true; } @@ -603,9 +605,8 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, // To determine liveness, we must iterate through the predecessors of blocks // where the def is live. Blocks are added to the worklist if we need to // check their predecessors. Start with all the using blocks. - SmallVector<BasicBlock*, 64> LiveInBlockWorklist; - LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), - Info.UsingBlocks.begin(), Info.UsingBlocks.end()); + SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), + Info.UsingBlocks.end()); // If any of the using blocks is also a definition block, check to see if the // definition occurs before or after the use. If it happens before the use, @@ -897,6 +898,9 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, // Propagate any debug metadata from the store onto the dbg.value. if (MDNode *SIMD = SI->getMetadata("dbg")) DbgVal->setMetadata("dbg", SIMD); + // Otherwise propagate debug metadata from dbg.declare. + else if (MDNode *MD = DDI->getMetadata("dbg")) + DbgVal->setMetadata("dbg", MD); } // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 9f2209d..fd3ed3e 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1513,17 +1513,19 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Okay, we're going to insert the PHI node. Since PBI is not the only // predecessor, compute the PHI'd conditional value for all of the preds. // Any predecessor where the condition is not computable we keep symbolic. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if ((PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) && + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI && PBI->isConditional() && PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), - CondIsTrue), *PI); + CondIsTrue), P); } else { - NewPN->addIncoming(BI->getCondition(), *PI); + NewPN->addIncoming(BI->getCondition(), P); } + } BI->setCondition(NewPN); return true; @@ -1697,10 +1699,11 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { SmallVector<BasicBlock*, 8> UncondBranchPreds; SmallVector<BranchInst*, 8> CondBranchPreds; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - TerminatorInst *PTI = (*PI)->getTerminator(); + BasicBlock *P = *PI; + TerminatorInst *PTI = P->getTerminator(); if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) { if (BI->isUnconditional()) - UncondBranchPreds.push_back(*PI); + UncondBranchPreds.push_back(P); else CondBranchPreds.push_back(BI); } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 87ce631..3f6a90c 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -28,7 +28,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { // DenseMap. This includes any recursive calls to MapValue. // Global values and non-function-local metadata do not need to be seeded into - // the ValueMap if they are using the identity mapping. + // the VM if they are using the identity mapping. if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) || (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal())) return VMSlot = const_cast<Value*>(V); @@ -45,7 +45,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { if (isa<ConstantInt>(C) || isa<ConstantFP>(C) || isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) || - isa<UndefValue>(C) || isa<MDString>(C)) + isa<UndefValue>(C)) return VMSlot = C; // Primitive constants map directly if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) { @@ -125,11 +125,11 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { } /// RemapInstruction - Convert the instruction operands from referencing the -/// current values into those specified by ValueMap. +/// current values into those specified by VMap. /// -void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &ValueMap) { +void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) { for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, ValueMap); + Value *V = MapValue(*op, VMap); assert(V && "Referenced value not in value map!"); *op = V; } diff --git a/lib/Transforms/Utils/ValueMapper.h b/lib/Transforms/Utils/ValueMapper.h index d61c24c..f4ff643 100644 --- a/lib/Transforms/Utils/ValueMapper.h +++ b/lib/Transforms/Utils/ValueMapper.h @@ -15,12 +15,12 @@ #ifndef VALUEMAPPER_H #define VALUEMAPPER_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ValueMap.h" namespace llvm { class Value; class Instruction; - typedef DenseMap<const Value *, Value *> ValueToValueMapTy; + typedef ValueMap<const Value *, Value *> ValueToValueMapTy; Value *MapValue(const Value *V, ValueToValueMapTy &VM); void RemapInstruction(Instruction *I, ValueToValueMapTy &VM); diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index e48c026..7a471ef 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -70,8 +70,7 @@ static const Module *getModuleFromVal(const Value *V) { // PrintEscapedString - Print each character of the specified string, escaping // it if it is not printable or if it is an escape char. -static void PrintEscapedString(const StringRef &Name, - raw_ostream &Out) { +static void PrintEscapedString(StringRef Name, raw_ostream &Out) { for (unsigned i = 0, e = Name.size(); i != e; ++i) { unsigned char C = Name[i]; if (isprint(C) && C != '\\' && C != '"') @@ -1419,6 +1418,9 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT, case GlobalValue::ExternalLinkage: break; case GlobalValue::PrivateLinkage: Out << "private "; break; case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break; + case GlobalValue::LinkerPrivateWeakLinkage: + Out << "linker_private_weak "; + break; case GlobalValue::InternalLinkage: Out << "internal "; break; case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break; case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break; @@ -1469,8 +1471,11 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { writeOperand(GV->getInitializer(), false); } - if (GV->hasSection()) - Out << ", section \"" << GV->getSection() << '"'; + if (GV->hasSection()) { + Out << ", section \""; + PrintEscapedString(GV->getSection(), Out); + Out << '"'; + } if (GV->getAlignment()) Out << ", align " << GV->getAlignment(); @@ -1628,8 +1633,11 @@ void AssemblyWriter::printFunction(const Function *F) { Attributes FnAttrs = Attrs.getFnAttributes(); if (FnAttrs != Attribute::None) Out << ' ' << Attribute::getAsString(Attrs.getFnAttributes()); - if (F->hasSection()) - Out << " section \"" << F->getSection() << '"'; + if (F->hasSection()) { + Out << " section \""; + PrintEscapedString(F->getSection(), Out); + Out << '"'; + } if (F->getAlignment()) Out << " align " << F->getAlignment(); if (F->hasGC()) @@ -1854,6 +1862,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { default: Out << " cc" << CI->getCallingConv(); break; } + Operand = CI->getCalledValue(); const PointerType *PTy = cast<PointerType>(Operand->getType()); const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); const Type *RetTy = FTy->getReturnType(); @@ -1877,10 +1886,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(Operand, true); } Out << '('; - for (unsigned op = 1, Eop = I.getNumOperands(); op < Eop; ++op) { - if (op > 1) + for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) { + if (op > 0) Out << ", "; - writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op)); + writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op + 1)); } Out << ')'; if (PAL.getFnAttributes() != Attribute::None) @@ -1925,10 +1934,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(Operand, true); } Out << '('; - for (unsigned op = 0, Eop = I.getNumOperands() - 3; op < Eop; ++op) { + for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) { if (op) Out << ", "; - writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op + 1)); + writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op + 1)); } Out << ')'; @@ -2027,7 +2036,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { } static void WriteMDNodeComment(const MDNode *Node, - formatted_raw_ostream &Out) { + formatted_raw_ostream &Out) { if (Node->getNumOperands() < 1) return; ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Node->getOperand(0)); diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 0144210..dc39024 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -18,6 +18,7 @@ #include "llvm/Module.h" #include "llvm/IntrinsicInst.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/IRBuilder.h" #include <cstring> @@ -314,7 +315,8 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Function *F = CI->getCalledFunction(); LLVMContext &C = CI->getContext(); - + ImmutableCallSite CS(CI); + assert(F && "CallInst has no function associated with it."); if (!NewFn) { @@ -344,11 +346,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD || isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) { std::vector<Constant*> Idxs; - Value *Op0 = CI->getOperand(1); + Value *Op0 = CI->getArgOperand(0); ShuffleVectorInst *SI = NULL; if (isLoadH || isLoadL) { Value *Op1 = UndefValue::get(Op0->getType()); - Value *Addr = new BitCastInst(CI->getOperand(2), + Value *Addr = new BitCastInst(CI->getArgOperand(1), Type::getDoublePtrTy(C), "upgraded.", CI); Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI); @@ -381,7 +383,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI); } else if (isMovSD || isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) { - Value *Op1 = CI->getOperand(2); + Value *Op1 = CI->getArgOperand(1); if (isMovSD) { Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2)); Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1)); @@ -395,8 +397,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *Mask = ConstantVector::get(Idxs); SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); } else if (isShufPD) { - Value *Op1 = CI->getOperand(2); - unsigned MaskVal = cast<ConstantInt>(CI->getOperand(3))->getZExtValue(); + Value *Op1 = CI->getArgOperand(1); + unsigned MaskVal = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1)); Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), ((MaskVal >> 1) & 1)+2)); @@ -416,8 +418,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); } else if (F->getName() == "llvm.x86.sse41.pmulld") { // Upgrade this set of intrinsics into vector multiplies. - Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1), - CI->getOperand(2), + Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0), + CI->getArgOperand(1), CI->getName(), CI); // Fix up all the uses with our new multiply. @@ -427,9 +429,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Remove upgraded multiply. CI->eraseFromParent(); } else if (F->getName() == "llvm.x86.ssse3.palign.r") { - Value *Op1 = CI->getOperand(1); - Value *Op2 = CI->getOperand(2); - Value *Op3 = CI->getOperand(3); + Value *Op1 = CI->getArgOperand(0); + Value *Op2 = CI->getArgOperand(1); + Value *Op3 = CI->getArgOperand(2); unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue(); Value *Rep; IRBuilder<> Builder(C); @@ -483,9 +485,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") { - Value *Op1 = CI->getOperand(1); - Value *Op2 = CI->getOperand(2); - Value *Op3 = CI->getOperand(3); + Value *Op1 = CI->getArgOperand(0); + Value *Op2 = CI->getArgOperand(1); + Value *Op3 = CI->getArgOperand(2); unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue(); Value *Rep; IRBuilder<> Builder(C); @@ -556,10 +558,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::x86_mmx_psrl_w: { Value *Operands[2]; - Operands[0] = CI->getOperand(1); + Operands[0] = CI->getArgOperand(0); // Cast the second parameter to the correct type. - BitCastInst *BC = new BitCastInst(CI->getOperand(2), + BitCastInst *BC = new BitCastInst(CI->getArgOperand(1), NewFn->getFunctionType()->getParamType(1), "upgraded.", CI); Operands[1] = BC; @@ -583,9 +585,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::ctlz: case Intrinsic::ctpop: case Intrinsic::cttz: { - // Build a small vector of the 1..(N-1) operands, which are the - // parameters. - SmallVector<Value*, 8> Operands(CI->op_begin()+1, CI->op_end()); + // Build a small vector of the original arguments. + SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end()); // Construct a new CallInst CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(), @@ -620,7 +621,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::eh_selector: case Intrinsic::eh_typeid_for: { // Only the return type changed. - SmallVector<Value*, 8> Operands(CI->op_begin() + 1, CI->op_end()); + SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end()); CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(), "upgraded." + CI->getName(), CI); NewCI->setTailCall(CI->isTailCall()); @@ -643,8 +644,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::memset: { // Add isVolatile const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext()); - Value *Operands[5] = { CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), CI->getOperand(4), + Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(3), llvm::ConstantInt::get(I1Ty, 0) }; CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5, CI->getName(), CI); @@ -726,7 +727,8 @@ void llvm::CheckDebugInfoIntrinsics(Module *M) { if (Function *Declare = M->getFunction("llvm.dbg.declare")) { if (!Declare->use_empty()) { DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back()); - if (!isa<MDNode>(DDI->getOperand(1)) ||!isa<MDNode>(DDI->getOperand(2))) { + if (!isa<MDNode>(DDI->getArgOperand(0)) || + !isa<MDNode>(DDI->getArgOperand(1))) { while (!Declare->use_empty()) { CallInst *CI = cast<CallInst>(Declare->use_back()); CI->eraseFromParent(); diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 549977c..3567266 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -658,7 +658,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, } } // Handle an offsetof-like expression. - if (Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()){ + if (Ty->isStructTy() || Ty->isArrayTy()) { if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2), DestTy, false)) return C; @@ -1817,8 +1817,15 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, return Constant::getAllOnesValue(ResultTy); // Handle some degenerate cases first - if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) + if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) { + // For EQ and NE, we can always pick a value for the undef to make the + // predicate pass or fail, so we can return undef. + if (ICmpInst::isEquality(ICmpInst::Predicate(pred))) + return UndefValue::get(ResultTy); + // Otherwise, pick the same value as the non-undef operand, and fold + // it to true or false. return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred)); + } // No compile-time operations on this type yet. if (C1->getType()->isPPC_FP128Ty()) @@ -2194,7 +2201,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C, } NewIndices.push_back(Combined); - NewIndices.insert(NewIndices.end(), Idxs+1, Idxs+NumIdx); + NewIndices.append(Idxs+1, Idxs+NumIdx); return (inBounds && cast<GEPOperator>(CE)->isInBounds()) ? ConstantExpr::getInBoundsGetElementPtr(CE->getOperand(0), &NewIndices[0], diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index bbf1375..ca1a399 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -1058,6 +1058,8 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) { return LLVMPrivateLinkage; case GlobalValue::LinkerPrivateLinkage: return LLVMLinkerPrivateLinkage; + case GlobalValue::LinkerPrivateWeakLinkage: + return LLVMLinkerPrivateWeakLinkage; case GlobalValue::DLLImportLinkage: return LLVMDLLImportLinkage; case GlobalValue::DLLExportLinkage: @@ -1108,6 +1110,9 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) { case LLVMLinkerPrivateLinkage: GV->setLinkage(GlobalValue::LinkerPrivateLinkage); break; + case LLVMLinkerPrivateWeakLinkage: + GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage); + break; case LLVMDLLImportLinkage: GV->setLinkage(GlobalValue::DLLImportLinkage); break; @@ -2205,15 +2210,14 @@ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile( LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, char **OutMessage) { - MemoryBuffer *MB = MemoryBuffer::getSTDIN(); - if (!MB->getBufferSize()) { - delete MB; - *OutMessage = strdup("stdin is empty."); - return 1; + std::string Error; + if (MemoryBuffer *MB = MemoryBuffer::getSTDIN(&Error)) { + *OutMemBuf = wrap(MB); + return 0; } - *OutMemBuf = wrap(MB); - return 0; + *OutMessage = strdup(Error.c_str()); + return 1; } void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) { diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index a37fe07..9792ada 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -286,9 +286,10 @@ bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const { for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { // PHI nodes uses values in the corresponding predecessor block. For other // instructions, just check to see whether the parent of the use matches up. - const PHINode *PN = dyn_cast<PHINode>(*UI); + const User *U = *UI; + const PHINode *PN = dyn_cast<PHINode>(U); if (PN == 0) { - if (cast<Instruction>(*UI)->getParent() != BB) + if (cast<Instruction>(U)->getParent() != BB) return true; continue; } @@ -401,12 +402,20 @@ bool Instruction::isSafeToSpeculativelyExecute() const { return false; // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. - if (isa<AllocaInst>(getOperand(0))) + // It's also not safe to follow a bitcast, for example: + // bitcast i8* (alloca i8) to i32* + // would result in a 4-byte load from a 1-byte alloca. + Value *Op0 = getOperand(0); + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) { + // TODO: it's safe to do this for any GEP with constant indices that + // compute inside the allocated type, but not for any inbounds gep. + if (GEP->hasAllZeroIndices()) + Op0 = GEP->getPointerOperand(); + } + if (isa<AllocaInst>(Op0)) return true; if (GlobalVariable *GV = dyn_cast<GlobalVariable>(getOperand(0))) return !GV->hasExternalWeakLinkage(); - // FIXME: Handle cases involving GEPs. We have to be careful because - // a load of a out-of-bounds GEP has undefined behavior. return false; } case Call: @@ -421,6 +430,7 @@ bool Instruction::isSafeToSpeculativelyExecute() const { case Store: case Ret: case Br: + case IndirectBr: case Switch: case Unwind: case Unreachable: diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index f64b220..c13696f 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -33,7 +33,9 @@ using namespace llvm; User::op_iterator CallSite::getCallee() const { Instruction *II(getInstruction()); return isCall() - ? cast<CallInst>(II)->op_begin() + ? (CallInst::ArgOffset + ? cast</*FIXME: CallInst*/User>(II)->op_begin() + : cast</*FIXME: CallInst*/User>(II)->op_end() - 1) : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Function } @@ -231,8 +233,7 @@ CallInst::~CallInst() { void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) { assert(NumOperands == NumParams+1 && "NumOperands not set up?"); - Use *OL = OperandList; - OL[0] = Func; + Op<ArgOffset -1>() = Func; const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); @@ -245,16 +246,15 @@ void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) { assert((i >= FTy->getNumParams() || FTy->getParamType(i) == Params[i]->getType()) && "Calling a function with a bad signature!"); - OL[i+1] = Params[i]; + OperandList[i + ArgOffset] = Params[i]; } } void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) { assert(NumOperands == 3 && "NumOperands not set up?"); - Use *OL = OperandList; - OL[0] = Func; - OL[1] = Actual1; - OL[2] = Actual2; + Op<ArgOffset -1>() = Func; + Op<ArgOffset + 0>() = Actual1; + Op<ArgOffset + 1>() = Actual2; const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); @@ -273,9 +273,8 @@ void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) { void CallInst::init(Value *Func, Value *Actual) { assert(NumOperands == 2 && "NumOperands not set up?"); - Use *OL = OperandList; - OL[0] = Func; - OL[1] = Actual; + Op<ArgOffset -1>() = Func; + Op<ArgOffset + 0>() = Actual; const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); @@ -291,8 +290,7 @@ void CallInst::init(Value *Func, Value *Actual) { void CallInst::init(Value *Func) { assert(NumOperands == 1 && "NumOperands not set up?"); - Use *OL = OperandList; - OL[0] = Func; + Op<ArgOffset -1>() = Func; const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); @@ -473,9 +471,10 @@ static Instruction *createMalloc(Instruction *InsertBefore, Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, const Type *IntPtrTy, const Type *AllocTy, Value *AllocSize, Value *ArraySize, + Function * MallocF, const Twine &Name) { return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize, - ArraySize, NULL, Name); + ArraySize, MallocF, Name); } /// CreateMalloc - Generate the IR for a call to malloc: @@ -527,8 +526,8 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore, } /// CreateFree - Generate the IR for a call to the builtin free function. -void CallInst::CreateFree(Value* Source, Instruction *InsertBefore) { - createFree(Source, InsertBefore, NULL); +Instruction * CallInst::CreateFree(Value* Source, Instruction *InsertBefore) { + return createFree(Source, InsertBefore, NULL); } /// CreateFree - Generate the IR for a call to the builtin free function. @@ -828,8 +827,8 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) { else { assert(!isa<BasicBlock>(Amt) && "Passed basic block into allocation size parameter! Use other ctor"); - assert(Amt->getType()->isIntegerTy(32) && - "Allocation array size is not a 32-bit integer!"); + assert(Amt->getType()->isIntegerTy() && + "Allocation array size is not an integer!"); } return Amt; } @@ -1456,7 +1455,7 @@ void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx, Op<0>() = Agg; Op<1>() = Val; - Indices.insert(Indices.end(), Idx, Idx + NumIdx); + Indices.append(Idx, Idx + NumIdx); setName(Name); } @@ -1509,7 +1508,7 @@ void ExtractValueInst::init(const unsigned *Idx, unsigned NumIdx, const Twine &Name) { assert(NumOperands == 1 && "NumOperands not initialized?"); - Indices.insert(Indices.end(), Idx, Idx + NumIdx); + Indices.append(Idx, Idx + NumIdx); setName(Name); } @@ -1911,9 +1910,12 @@ bool CastInst::isLosslessCast() const { /// # bitcast i32* %x to i8* /// # bitcast <2 x i32> %x to <4 x i16> /// # ptrtoint i32* %x to i32 ; on 32-bit plaforms only -/// @brief Determine if a cast is a no-op. -bool CastInst::isNoopCast(const Type *IntPtrTy) const { - switch (getOpcode()) { +/// @brief Determine if the described cast is a no-op. +bool CastInst::isNoopCast(Instruction::CastOps Opcode, + const Type *SrcTy, + const Type *DestTy, + const Type *IntPtrTy) { + switch (Opcode) { default: assert(!"Invalid CastOp"); case Instruction::Trunc: @@ -1930,13 +1932,18 @@ bool CastInst::isNoopCast(const Type *IntPtrTy) const { return true; // BitCast never modifies bits. case Instruction::PtrToInt: return IntPtrTy->getScalarSizeInBits() == - getType()->getScalarSizeInBits(); + DestTy->getScalarSizeInBits(); case Instruction::IntToPtr: return IntPtrTy->getScalarSizeInBits() == - getOperand(0)->getType()->getScalarSizeInBits(); + SrcTy->getScalarSizeInBits(); } } +/// @brief Determine if a cast is a no-op. +bool CastInst::isNoopCast(const Type *IntPtrTy) const { + return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy); +} + /// This function determines if a pair of casts can be eliminated and what /// opcode should be used in the elimination. This assumes that there are two /// instructions like this: @@ -1999,6 +2006,14 @@ unsigned CastInst::isEliminableCastPair( { 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr | { 5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast -+ }; + + // If either of the casts are a bitcast from scalar to vector, disallow the + // merging. + if ((firstOp == Instruction::BitCast && + isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) || + (secondOp == Instruction::BitCast && + isa<VectorType>(MidTy) != isa<VectorType>(DstTy))) + return 0; // Disallowed int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin] [secondOp-Instruction::CastOpsBegin]; diff --git a/lib/VMCore/IntrinsicInst.cpp b/lib/VMCore/IntrinsicInst.cpp index c37d5b0..ac8ec20 100644 --- a/lib/VMCore/IntrinsicInst.cpp +++ b/lib/VMCore/IntrinsicInst.cpp @@ -54,7 +54,7 @@ Value *DbgInfoIntrinsic::StripCast(Value *C) { /// Value *DbgDeclareInst::getAddress() const { - if (MDNode* MD = cast_or_null<MDNode>(getOperand(1))) + if (MDNode* MD = cast_or_null<MDNode>(getArgOperand(0))) return MD->getOperand(0); else return NULL; @@ -65,9 +65,9 @@ Value *DbgDeclareInst::getAddress() const { /// const Value *DbgValueInst::getValue() const { - return cast<MDNode>(getOperand(1))->getOperand(0); + return cast<MDNode>(getArgOperand(0))->getOperand(0); } Value *DbgValueInst::getValue() { - return cast<MDNode>(getOperand(1))->getOperand(0); + return cast<MDNode>(getArgOperand(0))->getOperand(0); } diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index b894ea3..1d3a058 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -133,6 +133,7 @@ static const Function *getFunctionForValue(Value *V) { static const Function *assertLocalFunction(const MDNode *N) { if (!N->isFunctionLocal()) return 0; + // FIXME: This does not handle cyclic function local metadata. const Function *F = 0, *NewF = 0; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (Value *V = N->getOperand(i)) { diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index 94840f0..38a51df 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -17,6 +17,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/GVMaterializer.h" #include "llvm/LLVMContext.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/LeakDetector.h" @@ -311,9 +312,11 @@ GlobalAlias *Module::getNamedAlias(StringRef Name) const { /// getNamedMetadata - Return the first NamedMDNode in the module with the /// specified name. This method returns null if a NamedMDNode with the -//// specified name is not found. -NamedMDNode *Module::getNamedMetadata(StringRef Name) const { - return NamedMDSymTab->lookup(Name); +/// specified name is not found. +NamedMDNode *Module::getNamedMetadata(const Twine &Name) const { + SmallString<256> NameData; + StringRef NameRef = Name.toStringRef(NameData); + return NamedMDSymTab->lookup(NameRef); } /// getOrInsertNamedMetadata - Return the first named MDNode in the module diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index a60877d..efd98af 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -35,6 +35,15 @@ using namespace llvm; // Pass Implementation // +Pass::Pass(PassKind K, intptr_t pid) : Resolver(0), PassID(pid), Kind(K) { + assert(pid && "pid cannot be 0"); +} + +Pass::Pass(PassKind K, const void *pid) + : Resolver(0), PassID((intptr_t)pid), Kind(K) { + assert(pid && "pid cannot be 0"); +} + // Force out-of-line virtual method. Pass::~Pass() { delete Resolver; @@ -92,6 +101,23 @@ void Pass::verifyAnalysis() const { // By default, don't do anything. } +void *Pass::getAdjustedAnalysisPointer(const PassInfo *) { + return this; +} + +ImmutablePass *Pass::getAsImmutablePass() { + return 0; +} + +PMDataManager *Pass::getAsPMDataManager() { + return 0; +} + +void Pass::setResolver(AnalysisResolver *AR) { + assert(!Resolver && "Resolver is already set"); + Resolver = AR; +} + // print - Print out the internal state of the pass. This is called by Analyze // to print out the contents of an analysis. Otherwise it is not necessary to // implement this method. @@ -364,6 +390,14 @@ void PassInfo::unregisterPass() { getPassRegistrar()->UnregisterPass(*this); } +Pass *PassInfo::createPass() const { + assert((!isAnalysisGroup() || NormalCtor) && + "No default implementation found for analysis group!"); + assert(NormalCtor && + "Cannot call createPass on PassInfo without default ctor!"); + return NormalCtor(); +} + //===----------------------------------------------------------------------===// // Analysis Group Implementation Code //===----------------------------------------------------------------------===// @@ -467,4 +501,15 @@ void AnalysisUsage::setPreservesCFG() { GetCFGOnlyPasses(Preserved).enumeratePasses(); } +AnalysisUsage &AnalysisUsage::addRequiredID(AnalysisID ID) { + assert(ID && "Pass class not registered!"); + Required.push_back(ID); + return *this; +} +AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(AnalysisID ID) { + assert(ID && "Pass class not registered!"); + Required.push_back(ID); + RequiredTransitive.push_back(ID); + return *this; +} diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index a56938c..296b0d1 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -1147,6 +1147,11 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { llvm_unreachable("Unable to schedule pass"); } +Pass *PMDataManager::getOnTheFlyPass(Pass *P, const PassInfo *PI, Function &F) { + assert(0 && "Unable to find on the fly pass"); + return NULL; +} + // Destructor PMDataManager::~PMDataManager() { for (SmallVector<Pass *, 8>::iterator I = PassVector.begin(), diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 645dd5a..585edf0 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -322,7 +322,13 @@ void Value::replaceAllUsesWith(Value *New) { Value *Value::stripPointerCasts() { if (!getType()->isPointerTy()) return this; + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet<Value *, 4> Visited; + Value *V = this; + Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { if (!GEP->hasAllZeroIndices()) @@ -338,7 +344,9 @@ Value *Value::stripPointerCasts() { return V; } assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } while (1); + } while (Visited.insert(V)); + + return V; } Value *Value::getUnderlyingObject(unsigned MaxLookup) { diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 75988cc..f97699d 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -85,7 +85,8 @@ namespace { // Anonymous namespace for class for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { if (I->empty() || !I->back().isTerminator()) { - dbgs() << "Basic Block does not have terminator!\n"; + dbgs() << "Basic Block in function '" << F.getName() + << "' does not have terminator!\n"; WriteAsOperand(dbgs(), I, true); dbgs() << "\n"; Broken = true; @@ -1356,7 +1357,7 @@ void Verifier::visitLoadInst(LoadInst &LI) { void Verifier::visitStoreInst(StoreInst &SI) { const PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType()); - Assert1(PTy, "Load operand must be a pointer.", &SI); + Assert1(PTy, "Store operand must be a pointer.", &SI); const Type *ElTy = PTy->getElementType(); Assert2(ElTy == SI.getOperand(0)->getType(), "Stored value type does not match pointer operand type!", @@ -1371,8 +1372,8 @@ void Verifier::visitAllocaInst(AllocaInst &AI) { &AI); Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type", &AI); - Assert1(AI.getArraySize()->getType()->isIntegerTy(32), - "Alloca array size must be i32", &AI); + Assert1(AI.getArraySize()->getType()->isIntegerTy(), + "Alloca array size must have integer type", &AI); visitInstruction(AI); } @@ -1453,7 +1454,7 @@ void Verifier::visitInstruction(Instruction &I) { if (Function *F = dyn_cast<Function>(I.getOperand(i))) { // Check to make sure that the "address of" an intrinsic function is never // taken. - Assert1(!F->isIntrinsic() || (i == 0 && isa<CallInst>(I)), + Assert1(!F->isIntrinsic() || (i + 1 == e && isa<CallInst>(I)), "Cannot take the address of an intrinsic!", &I); Assert1(F->getParent() == Mod, "Referencing function in another module!", &I); @@ -1536,7 +1537,8 @@ void Verifier::visitInstruction(Instruction &I) { "Instruction does not dominate all uses!", Op, &I); } } else if (isa<InlineAsm>(I.getOperand(i))) { - Assert1((i == 0 && isa<CallInst>(I)) || (i + 3 == e && isa<InvokeInst>(I)), + Assert1((i + 1 == e && isa<CallInst>(I)) || + (i + 3 == e && isa<InvokeInst>(I)), "Cannot take the address of an inline asm!", &I); } } @@ -1628,24 +1630,24 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { // If the intrinsic takes MDNode arguments, verify that they are either global // or are local to *this* function. - for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) - if (MDNode *MD = dyn_cast<MDNode>(CI.getOperand(i))) + for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i) + if (MDNode *MD = dyn_cast<MDNode>(CI.getArgOperand(i))) visitMDNode(*MD, CI.getParent()->getParent()); switch (ID) { default: break; case Intrinsic::dbg_declare: { // llvm.dbg.declare - Assert1(CI.getOperand(1) && isa<MDNode>(CI.getOperand(1)), + Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)), "invalid llvm.dbg.declare intrinsic call 1", &CI); - MDNode *MD = cast<MDNode>(CI.getOperand(1)); + MDNode *MD = cast<MDNode>(CI.getArgOperand(0)); Assert1(MD->getNumOperands() == 1, "invalid llvm.dbg.declare intrinsic call 2", &CI); } break; case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: - Assert1(isa<ConstantInt>(CI.getOperand(4)), + Assert1(isa<ConstantInt>(CI.getArgOperand(3)), "alignment argument of memory intrinsics must be a constant int", &CI); break; @@ -1654,10 +1656,10 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { case Intrinsic::gcread: if (ID == Intrinsic::gcroot) { AllocaInst *AI = - dyn_cast<AllocaInst>(CI.getOperand(1)->stripPointerCasts()); + dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts()); Assert1(AI && AI->getType()->getElementType()->isPointerTy(), "llvm.gcroot parameter #1 must be a pointer alloca.", &CI); - Assert1(isa<Constant>(CI.getOperand(2)), + Assert1(isa<Constant>(CI.getArgOperand(1)), "llvm.gcroot parameter #2 must be a constant.", &CI); } @@ -1665,32 +1667,32 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { "Enclosing function does not use GC.", &CI); break; case Intrinsic::init_trampoline: - Assert1(isa<Function>(CI.getOperand(2)->stripPointerCasts()), + Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()), "llvm.init_trampoline parameter #2 must resolve to a function.", &CI); break; case Intrinsic::prefetch: - Assert1(isa<ConstantInt>(CI.getOperand(2)) && - isa<ConstantInt>(CI.getOperand(3)) && - cast<ConstantInt>(CI.getOperand(2))->getZExtValue() < 2 && - cast<ConstantInt>(CI.getOperand(3))->getZExtValue() < 4, + Assert1(isa<ConstantInt>(CI.getArgOperand(1)) && + isa<ConstantInt>(CI.getArgOperand(2)) && + cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 && + cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4, "invalid arguments to llvm.prefetch", &CI); break; case Intrinsic::stackprotector: - Assert1(isa<AllocaInst>(CI.getOperand(2)->stripPointerCasts()), + Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()), "llvm.stackprotector parameter #2 must resolve to an alloca.", &CI); break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: - Assert1(isa<ConstantInt>(CI.getOperand(1)), + Assert1(isa<ConstantInt>(CI.getArgOperand(0)), "size argument of memory use markers must be a constant integer", &CI); break; case Intrinsic::invariant_end: - Assert1(isa<ConstantInt>(CI.getOperand(2)), + Assert1(isa<ConstantInt>(CI.getArgOperand(1)), "llvm.invariant.end parameter #2 must be a constant integer", &CI); break; } |