diff options
Diffstat (limited to 'lib')
274 files changed, 19422 insertions, 18656 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index dee9b53..371dcaf 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -116,13 +116,16 @@ AliasAnalysis::getModRefBehavior(Function *F, return DoesNotAccessMemory; if (F->onlyReadsMemory()) return OnlyReadsMemory; - if (unsigned id = F->getIntrinsicID()) { + if (unsigned id = F->getIntrinsicID()) + return getModRefBehavior(id); + } + return UnknownModRefBehavior; +} + +AliasAnalysis::ModRefBehavior AliasAnalysis::getModRefBehavior(unsigned iid) { #define GET_INTRINSIC_MODREF_BEHAVIOR #include "llvm/Intrinsics.gen" #undef GET_INTRINSIC_MODREF_BEHAVIOR - } - } - return UnknownModRefBehavior; } AliasAnalysis::ModRefResult diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index f8cb323..398dec7 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -13,11 +13,11 @@ using namespace llvm; -int LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, - char **OutMessages) { +LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, + char **OutMessages) { std::string Messages; - int Result = verifyModule(*unwrap(M), + LLVMBool Result = verifyModule(*unwrap(M), static_cast<VerifierFailureAction>(Action), OutMessages? &Messages : 0); @@ -27,7 +27,7 @@ int LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, return Result; } -int LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) { +LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) { return verifyFunction(*unwrap<Function>(Fn), static_cast<VerifierFailureAction>(Action)); } diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index eaf90d0..4ae8859 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -398,8 +398,8 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, BytesLoaded, TD)) return 0; - APInt ResultVal(IntType->getBitWidth(), 0); - for (unsigned i = 0; i != BytesLoaded; ++i) { + APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]); + for (unsigned i = 1; i != BytesLoaded; ++i) { ResultVal <<= 8; ResultVal |= APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1-i]); } @@ -718,14 +718,13 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, switch (Opcode) { default: return 0; + case Instruction::ICmp: + case Instruction::FCmp: assert(0 && "Invalid for compares"); case Instruction::Call: if (Function *F = dyn_cast<Function>(Ops[0])) if (canConstantFoldCallTo(F)) return ConstantFoldCall(F, Ops+1, NumOps-1); return 0; - case Instruction::ICmp: - case Instruction::FCmp: - llvm_unreachable("This function is invalid for compares: no predicate specified"); case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing // the width of a pointer, so it can't be done in ConstantExpr::getCast. @@ -877,6 +876,20 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, CE1->getOperand(0), TD); } } + + // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0) + // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0) + if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && + CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { + Constant *LHS = + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD); + Constant *RHS = + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD); + unsigned OpC = + Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + Constant *Ops[] = { LHS, RHS }; + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD); + } } return ConstantExpr::getCompare(Predicate, Ops0, Ops1); diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp index 7d72b38..3532b05 100644 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ b/lib/Analysis/DbgInfoPrinter.cpp @@ -37,8 +37,6 @@ PrintDirectory("print-fullpath", namespace { class PrintDbgInfo : public FunctionPass { raw_ostream &Out; - void printStopPoint(const DbgStopPointInst *DSI); - void printFuncStart(const DbgFuncStartInst *FS); void printVariableDeclaration(const Value *V); public: static char ID; // Pass identification @@ -74,27 +72,6 @@ void PrintDbgInfo::printVariableDeclaration(const Value *V) { Out << File << ":" << LineNo << "\n"; } -void PrintDbgInfo::printStopPoint(const DbgStopPointInst *DSI) { - if (PrintDirectory) - if (MDString *Str = dyn_cast<MDString>(DSI->getDirectory())) - Out << Str->getString() << '/'; - - if (MDString *Str = dyn_cast<MDString>(DSI->getFileName())) - Out << Str->getString(); - Out << ':' << DSI->getLine(); - - if (unsigned Col = DSI->getColumn()) - Out << ':' << Col; -} - -void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) { - DISubprogram Subprogram(FS->getSubprogram()); - Out << "; fully qualified function name: " << Subprogram.getDisplayName() - << " return type: " << Subprogram.getReturnTypeName() - << " at line " << Subprogram.getLineNumber() - << "\n\n"; -} - bool PrintDbgInfo::runOnFunction(Function &F) { if (F.isDeclaration()) return false; @@ -108,57 +85,21 @@ bool PrintDbgInfo::runOnFunction(Function &F) { // Skip dead blocks. continue; - const DbgStopPointInst *DSI = findBBStopPoint(BB); Out << BB->getName(); Out << ":"; - if (DSI) { - Out << "; ("; - printStopPoint(DSI); - Out << ")"; - } - Out << "\n"; - // A dbgstoppoint's information is valid until we encounter a new one. - const DbgStopPointInst *LastDSP = DSI; - bool Printed = DSI != 0; for (BasicBlock::const_iterator i = BB->begin(), e = BB->end(); i != e; ++i) { - if (isa<DbgInfoIntrinsic>(i)) { - if ((DSI = dyn_cast<DbgStopPointInst>(i))) { - if (DSI->getContext() == LastDSP->getContext() && - DSI->getLineValue() == LastDSP->getLineValue() && - DSI->getColumnValue() == LastDSP->getColumnValue()) - // Don't print same location twice. - continue; - - LastDSP = cast<DbgStopPointInst>(i); - - // Don't print consecutive stoppoints, use a flag to know which one we - // printed. - Printed = false; - } else if (const DbgFuncStartInst *FS = dyn_cast<DbgFuncStartInst>(i)) { - printFuncStart(FS); - } - } else { - if (!Printed && LastDSP) { - Out << "; "; - printStopPoint(LastDSP); - Out << "\n"; - Printed = true; - } - Out << *i << '\n'; printVariableDeclaration(i); if (const User *U = dyn_cast<User>(i)) { for(unsigned i=0;i<U->getNumOperands();i++) printVariableDeclaration(U->getOperand(i)); } - } } } - return false; } diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index de2d839..59ba807 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -599,9 +599,7 @@ void DIVariable::dump() const { //===----------------------------------------------------------------------===// DIFactory::DIFactory(Module &m) - : M(m), VMContext(M.getContext()), DeclareFn(0) { - EmptyStructPtr = PointerType::getUnqual(StructType::get(VMContext)); -} + : M(m), VMContext(M.getContext()), DeclareFn(0) {} Constant *DIFactory::GetTagConstant(unsigned TAG) { assert((TAG & LLVMDebugVersionMask) == 0 && @@ -1033,58 +1031,52 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, - Instruction *InsertBefore) { - // Cast the storage to a {}* for the call to llvm.dbg.declare. - Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertBefore); - + Instruction *InsertBefore) { if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { Storage, D.getNode() }; + Value *Elts[] = { Storage }; + Value *Args[] = { MDNode::get(Storage->getContext(), Elts, 1), D.getNode() }; return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); } /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, - BasicBlock *InsertAtEnd) { - // Cast the storage to a {}* for the call to llvm.dbg.declare. - Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertAtEnd); - + BasicBlock *InsertAtEnd) { if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { Storage, D.getNode() }; + Value *Elts[] = { Storage }; + Value *Args[] = { MDNode::get(Storage->getContext(), Elts, 1), D.getNode() }; return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); } /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset, +Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable D, Instruction *InsertBefore) { assert(V && "no value passed to dbg.value"); - assert(Offset->getType() == Type::getInt64Ty(V->getContext()) && - "offset must be i64"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); Value *Elts[] = { V }; - Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset, + Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), D.getNode() }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); } /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset, +Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable D, BasicBlock *InsertAtEnd) { assert(V && "no value passed to dbg.value"); - assert(Offset->getType() == Type::getInt64Ty(V->getContext()) && - "offset must be i64"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); Value *Elts[] = { V }; - Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset, + Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), D.getNode() }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); } @@ -1242,52 +1234,6 @@ bool DebugInfoFinder::addSubprogram(DISubprogram SP) { return true; } -/// findStopPoint - Find the stoppoint coressponding to this instruction, that -/// is the stoppoint that dominates this instruction. -const DbgStopPointInst *llvm::findStopPoint(const Instruction *Inst) { - if (const DbgStopPointInst *DSI = dyn_cast<DbgStopPointInst>(Inst)) - return DSI; - - const BasicBlock *BB = Inst->getParent(); - BasicBlock::const_iterator I = Inst, B; - while (BB) { - B = BB->begin(); - - // A BB consisting only of a terminator can't have a stoppoint. - while (I != B) { - --I; - if (const DbgStopPointInst *DSI = dyn_cast<DbgStopPointInst>(I)) - return DSI; - } - - // This BB didn't have a stoppoint: if there is only one predecessor, look - // for a stoppoint there. We could use getIDom(), but that would require - // dominator info. - BB = I->getParent()->getUniquePredecessor(); - if (BB) - I = BB->getTerminator(); - } - - return 0; -} - -/// findBBStopPoint - Find the stoppoint corresponding to first real -/// (non-debug intrinsic) instruction in this Basic Block, and return the -/// stoppoint for it. -const DbgStopPointInst *llvm::findBBStopPoint(const BasicBlock *BB) { - for(BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (const DbgStopPointInst *DSI = dyn_cast<DbgStopPointInst>(I)) - return DSI; - - // Fallback to looking for stoppoint of unique predecessor. Useful if this - // BB contains no stoppoints, but unique predecessor does. - BB = BB->getUniquePredecessor(); - if (BB) - return findStopPoint(BB->getTerminator()); - - return 0; -} - Value *llvm::findDbgGlobalDeclare(GlobalVariable *V) { const Module *M = V->getParent(); NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); @@ -1306,25 +1252,24 @@ Value *llvm::findDbgGlobalDeclare(GlobalVariable *V) { /// Finds the llvm.dbg.declare intrinsic corresponding to this value if any. /// It looks through pointer casts too. -const DbgDeclareInst *llvm::findDbgDeclare(const Value *V, bool stripCasts) { - if (stripCasts) { - V = V->stripPointerCasts(); - - // Look for the bitcast. - for (Value::use_const_iterator I = V->use_begin(), E =V->use_end(); - I != E; ++I) - if (isa<BitCastInst>(I)) { - const DbgDeclareInst *DDI = findDbgDeclare(*I, false); - if (DDI) return DDI; - } +const DbgDeclareInst *llvm::findDbgDeclare(const Value *V) { + V = V->stripPointerCasts(); + + if (!isa<Instruction>(V) && !isa<Argument>(V)) return 0; - } - - // Find llvm.dbg.declare among uses of the instruction. - for (Value::use_const_iterator I = V->use_begin(), E =V->use_end(); - I != E; ++I) - if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) - return DDI; + + const Function *F = NULL; + if (const Instruction *I = dyn_cast<Instruction>(V)) + F = I->getParent()->getParent(); + else if (const Argument *A = dyn_cast<Argument>(V)) + F = A->getParent(); + + for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) + for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); + BI != BE; ++BI) + if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + if (DDI->getAddress() == V) + return DDI; return 0; } @@ -1372,29 +1317,6 @@ bool llvm::getLocationInfo(const Value *V, std::string &DisplayName, } /// ExtractDebugLocation - Extract debug location information -/// from llvm.dbg.stoppoint intrinsic. -DebugLoc llvm::ExtractDebugLocation(DbgStopPointInst &SPI, - DebugLocTracker &DebugLocInfo) { - DebugLoc DL; - Value *Context = SPI.getContext(); - - // If this location is already tracked then use it. - DebugLocTuple Tuple(cast<MDNode>(Context), NULL, SPI.getLine(), - SPI.getColumn()); - DenseMap<DebugLocTuple, unsigned>::iterator II - = DebugLocInfo.DebugIdMap.find(Tuple); - if (II != DebugLocInfo.DebugIdMap.end()) - return DebugLoc::get(II->second); - - // Add a new location entry. - unsigned Id = DebugLocInfo.DebugLocations.size(); - DebugLocInfo.DebugLocations.push_back(Tuple); - DebugLocInfo.DebugIdMap[Tuple] = Id; - - return DebugLoc::get(Id); -} - -/// ExtractDebugLocation - Extract debug location information /// from DILocation. DebugLoc llvm::ExtractDebugLocation(DILocation &Loc, DebugLocTracker &DebugLocInfo) { @@ -1419,32 +1341,6 @@ DebugLoc llvm::ExtractDebugLocation(DILocation &Loc, return DebugLoc::get(Id); } -/// ExtractDebugLocation - Extract debug location information -/// from llvm.dbg.func_start intrinsic. -DebugLoc llvm::ExtractDebugLocation(DbgFuncStartInst &FSI, - DebugLocTracker &DebugLocInfo) { - DebugLoc DL; - Value *SP = FSI.getSubprogram(); - - DISubprogram Subprogram(cast<MDNode>(SP)); - unsigned Line = Subprogram.getLineNumber(); - DICompileUnit CU(Subprogram.getCompileUnit()); - - // If this location is already tracked then use it. - DebugLocTuple Tuple(CU.getNode(), NULL, Line, /* Column */ 0); - DenseMap<DebugLocTuple, unsigned>::iterator II - = DebugLocInfo.DebugIdMap.find(Tuple); - if (II != DebugLocInfo.DebugIdMap.end()) - return DebugLoc::get(II->second); - - // Add a new location entry. - unsigned Id = DebugLocInfo.DebugLocations.size(); - DebugLocInfo.DebugLocations.push_back(Tuple); - DebugLocInfo.DebugIdMap[Tuple] = Id; - - return DebugLoc::get(Id); -} - /// getDISubprogram - Find subprogram that is enclosing this scope. DISubprogram llvm::getDISubprogram(MDNode *Scope) { DIDescriptor D(Scope); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index df9e31c..26c0c9e 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -128,8 +128,9 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, if (!AddRecStride->properlyDominates(Header, DT)) return false; - DEBUG(dbgs() << "[" << L->getHeader()->getName() - << "] Variable stride: " << *AddRec << "\n"); + DEBUG(dbgs() << "["; + WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); + dbgs() << "] Variable stride: " << *AddRec << "\n"); } Stride = AddRecStride; diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index bd9377b..651c918 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -102,6 +102,37 @@ unsigned InlineCostAnalyzer::FunctionInfo:: return Reduction; } +// callIsSmall - If a call is likely to lower to a single target instruction, or +// is otherwise deemed small return true. +// TODO: Perhaps calls like memcpy, strcpy, etc? +static bool callIsSmall(const Function *F) { + if (!F) return false; + + if (F->hasLocalLinkage()) return false; + + if (!F->hasName()) return false; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || + Name == "sin" || Name == "sinf" || Name == "sinl" || + Name == "cos" || Name == "cosf" || Name == "cosl" || + Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) + return true; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || + Name == "exp2" || Name == "exp2l" || Name == "exp2f" || + Name == "floor" || Name == "floorf" || Name == "ceil" || + Name == "round" || Name == "ffs" || Name == "ffsl" || + Name == "abs" || Name == "labs" || Name == "llabs") + return true; + + return false; +} + /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { @@ -129,7 +160,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { // Calls often compile into many machine instructions. Bump up their // cost to reflect this. - if (!isa<IntrinsicInst>(II)) + if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) NumInsts += InlineConstants::CallPenalty; } @@ -141,11 +172,16 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType())) ++NumVectorInsts; - // Noop casts, including ptr <-> int, don't count. if (const CastInst *CI = dyn_cast<CastInst>(II)) { + // Noop casts, including ptr <-> int, don't count. if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI)) continue; + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa<CmpInst>(CI->getOperand(0))) + continue; } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){ // If a GEP has all constant indices, it will probably be folded with // a load/store. diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 5d31c11..453af5a 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include <algorithm> @@ -385,6 +386,10 @@ BasicBlock *Loop::getUniqueExitBlock() const { return 0; } +void Loop::dump() const { + print(dbgs()); +} + //===----------------------------------------------------------------------===// // LoopInfo implementation // diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 17dc686..4d85ce4 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -316,7 +316,9 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const { OS << "{" << *Operands[0]; for (unsigned i = 1, e = Operands.size(); i != e; ++i) OS << ",+," << *Operands[i]; - OS << "}<" << L->getHeader()->getName() + ">"; + OS << "}<"; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ">"; } void SCEVFieldOffsetExpr::print(raw_ostream &OS) const { @@ -5193,7 +5195,9 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) PrintLoopInfo(OS, SE, *I); - OS << "Loop " << L->getHeader()->getName() << ": "; + OS << "Loop "; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ": "; SmallVector<BasicBlock *, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); @@ -5206,8 +5210,10 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, OS << "Unpredictable backedge-taken count. "; } - OS << "\n"; - OS << "Loop " << L->getHeader()->getName() << ": "; + OS << "\n" + "Loop "; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ": "; if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) { OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); @@ -5227,7 +5233,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { // const isn't dangerous. ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); - OS << "Classifying expressions for: " << F->getName() << "\n"; + OS << "Classifying expressions for: "; + WriteAsOperand(OS, F, /*PrintType=*/false); + OS << "\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isSCEVable(I->getType())) { OS << *I << '\n'; @@ -5256,7 +5264,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { OS << "\n"; } - OS << "Determining loop execution counts for: " << F->getName() << "\n"; + OS << "Determining loop execution counts for: "; + WriteAsOperand(OS, F, /*PrintType=*/false); + OS << "\n"; for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) PrintLoopInfo(OS, &SE, *I); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index acd3119..91e5bc3 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -726,8 +726,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); if (Tmp2 == 1) return 1; - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; case Instruction::Sub: Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); @@ -757,8 +756,24 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); if (Tmp == 1) return 1; // Early out. - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; + + case Instruction::PHI: { + PHINode *PN = cast<PHINode>(U); + // Don't analyze large in-degree PHIs. + if (PN->getNumIncomingValues() > 4) break; + + // Take the minimum of all incoming values. This can't infinitely loop + // because of our depth threshold. + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1); + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + if (Tmp == 1) return Tmp; + Tmp = std::min(Tmp, + ComputeNumSignBits(PN->getIncomingValue(1), TD, Depth+1)); + } + return Tmp; + } + case Instruction::Trunc: // FIXME: it's tricky to do anything useful for this, but it is an important // case for targets like X86. @@ -1348,7 +1363,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Make sure the index-ee is a pointer to array of i8. const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); - if (AT == 0 || AT->getElementType() != Type::getInt8Ty(V->getContext())) + if (AT == 0 || !AT->getElementType()->isInteger(8)) return false; // Check to make sure that the first operand of the GEP is an integer and @@ -1387,8 +1402,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Must be a Constant Array ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); - if (Array == 0 || - Array->getType()->getElementType() != Type::getInt8Ty(V->getContext())) + if (Array == 0 || !Array->getType()->getElementType()->isInteger(8)) return false; // Get the number of elements in the array diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 15a9832..e4039ab 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -510,12 +510,17 @@ bool LLParser::ParseNamedMetadata() { ParseToken(lltok::lbrace, "Expected '{' here")) return true; - SmallVector<MetadataBase *, 8> Elts; + SmallVector<MDNode *, 8> Elts; do { + // Null is a special case since it is typeless. + if (EatIfPresent(lltok::kw_null)) { + Elts.push_back(0); + continue; + } + if (ParseToken(lltok::exclaim, "Expected '!' here")) return true; - // FIXME: This rejects MDStrings. Are they legal in an named MDNode or not? MDNode *N = 0; if (ParseMDNodeID(N)) return true; Elts.push_back(N); @@ -543,7 +548,7 @@ bool LLParser::ParseStandaloneMetadata() { ParseType(Ty, TyLoc) || ParseToken(lltok::exclaim, "Expected '!' here") || ParseToken(lltok::lbrace, "Expected '{' here") || - ParseMDNodeVector(Elts) || + ParseMDNodeVector(Elts, NULL) || ParseToken(lltok::rbrace, "expected end of metadata node")) return true; @@ -1715,8 +1720,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, } // Don't make placeholders with invalid type. - if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && - Ty != Type::getLabelTy(F.getContext())) { + if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) { P.Error(Loc, "invalid use of a non-first-class type"); return 0; } @@ -1757,8 +1761,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty, return 0; } - if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && - Ty != Type::getLabelTy(F.getContext())) { + if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) { P.Error(Loc, "invalid use of a non-first-class type"); return 0; } @@ -1881,8 +1884,10 @@ BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name, /// ParseValID - Parse an abstract value that doesn't necessarily have a /// type implied. For example, if we parse "4" we don't know what integer type /// it has. The value will later be combined with its type and checked for -/// sanity. -bool LLParser::ParseValID(ValID &ID) { +/// sanity. PFS is used to convert function-local operands of metadata (since +/// metadata operands are not just parsed here but also converted to values). +/// PFS can be null when we are not parsing metadata values inside a function. +bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ID.Loc = Lex.getLoc(); switch (Lex.getKind()) { default: return TokError("expected value token"); @@ -1908,7 +1913,7 @@ bool LLParser::ParseValID(ValID &ID) { if (EatIfPresent(lltok::lbrace)) { SmallVector<Value*, 16> Elts; - if (ParseMDNodeVector(Elts) || + if (ParseMDNodeVector(Elts, PFS) || ParseToken(lltok::rbrace, "expected end of metadata node")) return true; @@ -2353,30 +2358,85 @@ bool LLParser::ParseValID(ValID &ID) { } /// ParseGlobalValue - Parse a global value with the specified type. -bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&V) { - V = 0; +bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&C) { + C = 0; ValID ID; - return ParseValID(ID) || - ConvertGlobalValIDToValue(Ty, ID, V); + Value *V = NULL; + bool Parsed = ParseValID(ID) || + ConvertValIDToValue(Ty, ID, V, NULL); + if (V && !(C = dyn_cast<Constant>(V))) + return Error(ID.Loc, "global values must be constants"); + return Parsed; +} + +bool LLParser::ParseGlobalTypeAndValue(Constant *&V) { + PATypeHolder Type(Type::getVoidTy(Context)); + return ParseType(Type) || + ParseGlobalValue(Type, V); +} + +/// ParseGlobalValueVector +/// ::= /*empty*/ +/// ::= TypeAndValue (',' TypeAndValue)* +bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) { + // Empty list. + if (Lex.getKind() == lltok::rbrace || + Lex.getKind() == lltok::rsquare || + Lex.getKind() == lltok::greater || + Lex.getKind() == lltok::rparen) + return false; + + Constant *C; + if (ParseGlobalTypeAndValue(C)) return true; + Elts.push_back(C); + + while (EatIfPresent(lltok::comma)) { + if (ParseGlobalTypeAndValue(C)) return true; + Elts.push_back(C); + } + + return false; } -/// ConvertGlobalValIDToValue - Apply a type to a ValID to get a fully resolved -/// constant. -bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, - Constant *&V) { + +//===----------------------------------------------------------------------===// +// Function Parsing. +//===----------------------------------------------------------------------===// + +bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, + PerFunctionState *PFS) { if (isa<FunctionType>(Ty)) return Error(ID.Loc, "functions are not values, refer to them as pointers"); switch (ID.Kind) { default: llvm_unreachable("Unknown ValID!"); - case ValID::t_MDNode: - case ValID::t_MDString: - return Error(ID.Loc, "invalid use of metadata"); case ValID::t_LocalID: + if (!PFS) return Error(ID.Loc, "invalid use of function-local name"); + V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc); + return (V == 0); case ValID::t_LocalName: - return Error(ID.Loc, "invalid use of function-local name"); - case ValID::t_InlineAsm: - return Error(ID.Loc, "inline asm can only be an operand of call/invoke"); + if (!PFS) return Error(ID.Loc, "invalid use of function-local name"); + V = PFS->GetVal(ID.StrVal, Ty, ID.Loc); + return (V == 0); + case ValID::t_InlineAsm: { + const PointerType *PTy = dyn_cast<PointerType>(Ty); + const FunctionType *FTy = + PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0; + if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) + return Error(ID.Loc, "invalid type for inline asm constraint string"); + V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1); + return false; + } + case ValID::t_MDNode: + if (!Ty->isMetadataTy()) + return Error(ID.Loc, "metadata value must have metadata type"); + V = ID.MDNodeVal; + return false; + case ValID::t_MDString: + if (!Ty->isMetadataTy()) + return Error(ID.Loc, "metadata value must have metadata type"); + V = ID.MDStringVal; + return false; case ValID::t_GlobalName: V = GetGlobalVal(ID.StrVal, Ty, ID.Loc); return V == 0; @@ -2440,90 +2500,11 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, } } -/// ConvertGlobalOrMetadataValIDToValue - Apply a type to a ValID to get a fully -/// resolved constant or metadata value. -bool LLParser::ConvertGlobalOrMetadataValIDToValue(const Type *Ty, ValID &ID, - Value *&V) { - switch (ID.Kind) { - case ValID::t_MDNode: - if (!Ty->isMetadataTy()) - return Error(ID.Loc, "metadata value must have metadata type"); - V = ID.MDNodeVal; - return false; - case ValID::t_MDString: - if (!Ty->isMetadataTy()) - return Error(ID.Loc, "metadata value must have metadata type"); - V = ID.MDStringVal; - return false; - default: - Constant *C; - if (ConvertGlobalValIDToValue(Ty, ID, C)) return true; - V = C; - return false; - } -} - - -bool LLParser::ParseGlobalTypeAndValue(Constant *&V) { - PATypeHolder Type(Type::getVoidTy(Context)); - return ParseType(Type) || - ParseGlobalValue(Type, V); -} - -/// ParseGlobalValueVector -/// ::= /*empty*/ -/// ::= TypeAndValue (',' TypeAndValue)* -bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) { - // Empty list. - if (Lex.getKind() == lltok::rbrace || - Lex.getKind() == lltok::rsquare || - Lex.getKind() == lltok::greater || - Lex.getKind() == lltok::rparen) - return false; - - Constant *C; - if (ParseGlobalTypeAndValue(C)) return true; - Elts.push_back(C); - - while (EatIfPresent(lltok::comma)) { - if (ParseGlobalTypeAndValue(C)) return true; - Elts.push_back(C); - } - - return false; -} - - -//===----------------------------------------------------------------------===// -// Function Parsing. -//===----------------------------------------------------------------------===// - -bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, - PerFunctionState &PFS) { - switch (ID.Kind) { - case ValID::t_LocalID: V = PFS.GetVal(ID.UIntVal, Ty, ID.Loc); break; - case ValID::t_LocalName: V = PFS.GetVal(ID.StrVal, Ty, ID.Loc); break; - case ValID::t_InlineAsm: { - const PointerType *PTy = dyn_cast<PointerType>(Ty); - const FunctionType *FTy = - PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0; - if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) - return Error(ID.Loc, "invalid type for inline asm constraint string"); - V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1); - return false; - } - default: - return ConvertGlobalOrMetadataValIDToValue(Ty, ID, V); - } - - return V == 0; -} - bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) { V = 0; ValID ID; - return ParseValID(ID) || - ConvertValIDToValue(Ty, ID, V, PFS); + return ParseValID(ID, &PFS) || + ConvertValIDToValue(Ty, ID, V, &PFS); } bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) { @@ -2663,8 +2644,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); - if (PAL.paramHasAttr(1, Attribute::StructRet) && - RetType != Type::getVoidTy(Context)) + if (PAL.paramHasAttr(1, Attribute::StructRet) && !RetType->isVoidTy()) return Error(RetTypeLoc, "functions with 'sret' argument must return void"); const FunctionType *FT = @@ -2766,6 +2746,10 @@ bool LLParser::ParseFunctionBody(Function &Fn) { PerFunctionState PFS(*this, Fn, FunctionNumber); + // We need at least one basic block. + if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_end) + return TokError("function body requires at least one basic block"); + while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end) if (ParseBasicBlock(PFS)) return true; @@ -3232,7 +3216,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { // Look up the callee. Value *Callee; - if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true; + if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true; // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional // function attributes. @@ -3578,7 +3562,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, // Look up the callee. Value *Callee; - if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true; + if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true; // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional // function attributes. @@ -3660,7 +3644,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, } } - if (Size && Size->getType() != Type::getInt32Ty(Context)) + if (Size && !Size->getType()->isInteger(32)) return Error(SizeLoc, "element count must be i32"); if (isAlloca) { @@ -3840,7 +3824,8 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) { /// ::= Element (',' Element)* /// Element /// ::= 'null' | TypeAndValue -bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts) { +bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts, + PerFunctionState *PFS) { do { // Null is a special case since it is typeless. if (EatIfPresent(lltok::kw_null)) { @@ -3851,8 +3836,8 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts) { Value *V = 0; PATypeHolder Ty(Type::getVoidTy(Context)); ValID ID; - if (ParseType(Ty) || ParseValID(ID) || - ConvertGlobalOrMetadataValIDToValue(Ty, ID, V)) + if (ParseType(Ty) || ParseValID(ID, PFS) || + ConvertValIDToValue(Ty, ID, V, PFS)) return true; Elts.push_back(V); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 803832f..bea0593 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -216,17 +216,6 @@ namespace llvm { bool ParseFunctionType(PATypeHolder &Result); PATypeHolder HandleUpRefs(const Type *Ty); - // Constants. - bool ParseValID(ValID &ID); - bool ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, Constant *&V); - bool ConvertGlobalOrMetadataValIDToValue(const Type *Ty, ValID &ID, - Value *&V); - bool ParseGlobalValue(const Type *Ty, Constant *&V); - bool ParseGlobalTypeAndValue(Constant *&V); - bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts); - bool ParseMDNodeVector(SmallVectorImpl<Value*> &); - - // Function Semantic Analysis. class PerFunctionState { LLParser &P; @@ -270,7 +259,7 @@ namespace llvm { }; bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, - PerFunctionState &PFS); + PerFunctionState *PFS); bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS); bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc, @@ -301,6 +290,13 @@ namespace llvm { bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList, PerFunctionState &PFS); + // Constant Parsing. + bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL); + bool ParseGlobalValue(const Type *Ty, Constant *&V); + bool ParseGlobalTypeAndValue(Constant *&V); + bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts); + bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS); + // Function Parsing. struct ArgInfo { LocTy Loc; diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index f513d41..32b97e8 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -18,9 +18,9 @@ using namespace llvm; /* Builds a module from the bitcode in the specified memory buffer, returning a reference to the module via the OutModule parameter. Returns 0 on success. - Optionally returns a human-readable error message via OutMessage. */ -int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, - LLVMModuleRef *OutModule, char **OutMessage) { + Optionally returns a human-readable error message via OutMessage. */ +LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutModule, char **OutMessage) { std::string Message; *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), getGlobalContext(), @@ -34,9 +34,10 @@ int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, return 0; } -int LLVMParseBitcodeInContext(LLVMContextRef ContextRef, - LLVMMemoryBufferRef MemBuf, - LLVMModuleRef *OutModule, char **OutMessage) { +LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutModule, + char **OutMessage) { std::string Message; *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef), @@ -53,9 +54,9 @@ int LLVMParseBitcodeInContext(LLVMContextRef ContextRef, /* Reads a module from the specified path, returning via the OutModule parameter a module provider which performs lazy deserialization. Returns 0 on success. Optionally returns a human-readable error message via OutMessage. */ -int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, - LLVMModuleProviderRef *OutMP, - char **OutMessage) { +LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, + LLVMModuleProviderRef *OutMP, + char **OutMessage) { std::string Message; *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), getGlobalContext(), @@ -70,10 +71,10 @@ int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, return 0; } -int LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, - LLVMMemoryBufferRef MemBuf, - LLVMModuleProviderRef *OutMP, - char **OutMessage) { +LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, + LLVMModuleProviderRef *OutMP, + char **OutMessage) { std::string Message; *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), *unwrap(ContextRef), diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 7dffafa..aabbc90 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -737,7 +737,7 @@ bool BitcodeReader::ParseValueSymbolTable() { } bool BitcodeReader::ParseMetadata() { - unsigned NextValueNo = MDValueList.size(); + unsigned NextMDValueNo = MDValueList.size(); if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID)) return Error("Malformed block record"); @@ -766,6 +766,7 @@ bool BitcodeReader::ParseMetadata() { continue; } + bool IsFunctionLocal = false; // Read a record. Record.clear(); switch (Stream.ReadRecord(Code, Record)) { @@ -787,17 +788,25 @@ bool BitcodeReader::ParseMetadata() { // Read named metadata elements. unsigned Size = Record.size(); - SmallVector<MetadataBase*, 8> Elts; + SmallVector<MDNode *, 8> Elts; for (unsigned i = 0; i != Size; ++i) { - Value *MD = MDValueList.getValueFwdRef(Record[i]); - if (MetadataBase *B = dyn_cast<MetadataBase>(MD)) - Elts.push_back(B); + if (Record[i] == ~0U) { + Elts.push_back(NULL); + continue; + } + MDNode *MD = dyn_cast<MDNode>(MDValueList.getValueFwdRef(Record[i])); + if (MD == 0) + return Error("Malformed metadata record"); + Elts.push_back(MD); } Value *V = NamedMDNode::Create(Context, Name.str(), Elts.data(), Elts.size(), TheModule); - MDValueList.AssignValue(V, NextValueNo++); + MDValueList.AssignValue(V, NextMDValueNo++); break; } + case bitc::METADATA_FN_NODE: + IsFunctionLocal = true; + // fall-through case bitc::METADATA_NODE: { if (Record.empty() || Record.size() % 2 == 1) return Error("Invalid METADATA_NODE record"); @@ -808,13 +817,15 @@ bool BitcodeReader::ParseMetadata() { const Type *Ty = getTypeByID(Record[i], false); if (Ty->isMetadataTy()) Elts.push_back(MDValueList.getValueFwdRef(Record[i+1])); - else if (Ty != Type::getVoidTy(Context)) + else if (!Ty->isVoidTy()) Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty)); else Elts.push_back(NULL); } - Value *V = MDNode::get(Context, &Elts[0], Elts.size()); - MDValueList.AssignValue(V, NextValueNo++); + Value *V = MDNode::getWhenValsUnresolved(Context, &Elts[0], Elts.size(), + IsFunctionLocal); + IsFunctionLocal = false; + MDValueList.AssignValue(V, NextMDValueNo++); break; } case bitc::METADATA_STRING: { @@ -825,7 +836,7 @@ bool BitcodeReader::ParseMetadata() { String[i] = Record[i]; Value *V = MDString::get(Context, StringRef(String.data(), String.size())); - MDValueList.AssignValue(V, NextValueNo++); + MDValueList.AssignValue(V, NextMDValueNo++); break; } case bitc::METADATA_KIND: { @@ -1646,6 +1657,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { case bitc::METADATA_ATTACHMENT_ID: if (ParseMetadataAttachment()) return true; break; + case bitc::METADATA_BLOCK_ID: + if (ParseMetadata()) return true; + break; } continue; } @@ -2238,7 +2252,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } // Non-void values get registered in the value table for future use. - if (I && I->getType() != Type::getVoidTy(Context)) + if (I && !I->getType()->isVoidTy()) ValueList.AssignValue(I, NextValueNo++); } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index c78a30e..5a4a1b2 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -484,7 +484,9 @@ static void WriteMDNode(const MDNode *N, Record.push_back(0); } } - Stream.EmitRecord(bitc::METADATA_NODE, Record, 0); + unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE : + bitc::METADATA_NODE; + Stream.EmitRecord(MDCode, Record, 0); Record.clear(); } @@ -497,11 +499,13 @@ static void WriteModuleMetadata(const ValueEnumerator &VE, for (unsigned i = 0, e = Vals.size(); i != e; ++i) { if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) { - if (!StartedMetadataBlock) { - Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); - StartedMetadataBlock = true; + if (!N->isFunctionLocal()) { + if (!StartedMetadataBlock) { + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + StartedMetadataBlock = true; + } + WriteMDNode(N, VE, Stream, Record); } - WriteMDNode(N, VE, Stream, Record); } else if (const MDString *MDS = dyn_cast<MDString>(Vals[i].first)) { if (!StartedMetadataBlock) { Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); @@ -528,10 +532,9 @@ static void WriteModuleMetadata(const ValueEnumerator &VE, } // Write name. - std::string Str = NMD->getNameStr(); - const char *StrBegin = Str.c_str(); - for (unsigned i = 0, e = Str.length(); i != e; ++i) - Record.push_back(StrBegin[i]); + StringRef Str = NMD->getName(); + for (unsigned i = 0, e = Str.size(); i != e; ++i) + Record.push_back(Str[i]); Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/); Record.clear(); @@ -540,7 +543,7 @@ static void WriteModuleMetadata(const ValueEnumerator &VE, if (NMD->getOperand(i)) Record.push_back(VE.getValueID(NMD->getOperand(i))); else - Record.push_back(0); + Record.push_back(~0U); } Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0); Record.clear(); @@ -551,6 +554,27 @@ static void WriteModuleMetadata(const ValueEnumerator &VE, Stream.ExitBlock(); } +static void WriteFunctionLocalMetadata(const Function &F, + const ValueEnumerator &VE, + BitstreamWriter &Stream) { + bool StartedMetadataBlock = false; + SmallVector<uint64_t, 64> Record; + const ValueEnumerator::ValueList &Vals = VE.getMDValues(); + + for (unsigned i = 0, e = Vals.size(); i != e; ++i) + if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) + if (N->getFunction() == &F) { + if (!StartedMetadataBlock) { + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + StartedMetadataBlock = true; + } + WriteMDNode(N, VE, Stream, Record); + } + + if (StartedMetadataBlock) + Stream.ExitBlock(); +} + static void WriteMetadataAttachment(const Function &F, const ValueEnumerator &VE, BitstreamWriter &Stream) { @@ -1194,6 +1218,9 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, VE.getFunctionConstantRange(CstStart, CstEnd); WriteConstants(CstStart, CstEnd, VE, Stream, false); + // If there is function-local metadata, emit it now. + WriteFunctionLocalMetadata(F, VE, Stream); + // Keep a running idea of what the instruction ID is. unsigned InstID = CstEnd; diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index d8128db..cb139e5 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -74,9 +74,10 @@ ValueEnumerator::ValueEnumerator(const Module *M) { // Enumerate types used by the type symbol table. EnumerateTypeSymbolTable(M->getTypeSymbolTable()); - // Insert constants that are named at module level into the slot pool so that - // the module symbol table can refer to them... + // Insert constants and metadata that are named at module level into the slot + // pool so that the module symbol table can refer to them... EnumerateValueSymbolTable(M->getValueSymbolTable()); + EnumerateMDSymbolTable(M->getMDSymbolTable()); SmallVector<std::pair<unsigned, MDNode*>, 8> MDs; @@ -90,8 +91,13 @@ ValueEnumerator::ValueEnumerator(const Module *M) { for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){ for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); - OI != E; ++OI) + OI != E; ++OI) { + if (MDNode *MD = dyn_cast<MDNode>(*OI)) + if (MD->isFunctionLocal()) + // These will get enumerated during function-incorporation. + continue; EnumerateOperandType(*OI); + } EnumerateType(I->getType()); if (const CallInst *CI = dyn_cast<CallInst>(I)) EnumerateAttributes(CI->getAttributes()); @@ -196,6 +202,33 @@ void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { EnumerateValue(VI->getValue()); } +/// EnumerateMDSymbolTable - Insert all of the values in the specified metadata +/// table. +void ValueEnumerator::EnumerateMDSymbolTable(const MDSymbolTable &MST) { + for (MDSymbolTable::const_iterator MI = MST.begin(), ME = MST.end(); + MI != ME; ++MI) + EnumerateValue(MI->getValue()); +} + +void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) { + // Check to see if it's already in! + unsigned &MDValueID = MDValueMap[MD]; + if (MDValueID) { + // Increment use count. + MDValues[MDValueID-1].second++; + return; + } + + // Enumerate the type of this value. + EnumerateType(MD->getType()); + + for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) + if (MDNode *E = MD->getOperand(i)) + EnumerateValue(E); + MDValues.push_back(std::make_pair(MD, 1U)); + MDValueMap[MD] = Values.size(); +} + void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) { // Check to see if it's already in! unsigned &MDValueID = MDValueMap[MD]; @@ -212,7 +245,7 @@ void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) { MDValues.push_back(std::make_pair(MD, 1U)); MDValueMap[MD] = MDValues.size(); MDValueID = MDValues.size(); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (Value *V = N->getOperand(i)) EnumerateValue(V); else @@ -221,14 +254,6 @@ void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) { return; } - if (const NamedMDNode *N = dyn_cast<NamedMDNode>(MD)) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - EnumerateValue(N->getOperand(i)); - MDValues.push_back(std::make_pair(MD, 1U)); - MDValueMap[MD] = Values.size(); - return; - } - // Add the value. assert(isa<MDString>(MD) && "Unknown metadata kind"); MDValues.push_back(std::make_pair(MD, 1U)); @@ -239,6 +264,8 @@ void ValueEnumerator::EnumerateValue(const Value *V) { assert(!V->getType()->isVoidTy() && "Can't insert void values!"); if (const MetadataBase *MB = dyn_cast<MetadataBase>(V)) return EnumerateMetadata(MB); + else if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(V)) + return EnumerateNamedMDNode(NMD); // Check to see if it's already in! unsigned &ValueID = ValueMap[V]; @@ -309,6 +336,7 @@ void ValueEnumerator::EnumerateType(const Type *Ty) { // walk through it, enumerating the types of the constant. void ValueEnumerator::EnumerateOperandType(const Value *V) { EnumerateType(V->getType()); + if (const Constant *C = dyn_cast<Constant>(V)) { // If this constant is already enumerated, ignore it, we know its type must // be enumerated. @@ -382,7 +410,15 @@ void ValueEnumerator::incorporateFunction(const Function &F) { // Add all of the instructions. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { - if (I->getType() != Type::getVoidTy(F.getContext())) + for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); + OI != E; ++OI) { + if (MDNode *MD = dyn_cast<MDNode>(*OI)) + if (!MD->isFunctionLocal()) + // These were already enumerated during ValueEnumerator creation. + continue; + EnumerateOperandType(*OI); + } + if (!I->getType()->isVoidTy()) EnumerateValue(I); } } diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 3c83e35..c50fe9c 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -27,9 +27,11 @@ class BasicBlock; class Function; class Module; class MetadataBase; +class NamedMDNode; class AttrListPtr; class TypeSymbolTable; class ValueSymbolTable; +class MDSymbolTable; class ValueEnumerator { public: @@ -126,6 +128,7 @@ private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); void EnumerateMetadata(const MetadataBase *MD); + void EnumerateNamedMDNode(const NamedMDNode *NMD); void EnumerateValue(const Value *V); void EnumerateType(const Type *T); void EnumerateOperandType(const Value *V); @@ -133,6 +136,7 @@ private: void EnumerateTypeSymbolTable(const TypeSymbolTable &ST); void EnumerateValueSymbolTable(const ValueSymbolTable &ST); + void EnumerateMDSymbolTable(const MDSymbolTable &ST); }; } // End llvm namespace diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 761fbc6..ca1f4a3 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -1,4 +1,4 @@ -//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===// +//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===// // // The LLVM Compiler Infrastructure // @@ -77,18 +77,18 @@ unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) { assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!"); assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!"); - + // find group for each register unsigned Group1 = GetGroup(Reg1); unsigned Group2 = GetGroup(Reg2); - + // if either group is 0, then that must become the parent unsigned Parent = (Group1 == 0) ? Group1 : Group2; unsigned Other = (Parent == Group1) ? Group2 : Group1; GroupNodes.at(Other) = Parent; return Parent; } - + unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) { // Create a new GroupNode for Reg. Reg's existing GroupNode must @@ -111,7 +111,7 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg) AggressiveAntiDepBreaker:: AggressiveAntiDepBreaker(MachineFunction& MFi, - TargetSubtarget::RegClassVector& CriticalPathRCs) : + TargetSubtarget::RegClassVector& CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TRI(MF.getTarget().getRegisterInfo()), @@ -126,9 +126,9 @@ AggressiveAntiDepBreaker(MachineFunction& MFi, else CriticalPathSet |= CPSet; } - + DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); - DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; + DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; r = CriticalPathSet.find_next(r)) dbgs() << " " << TRI->getName(r)); DEBUG(dbgs() << '\n'); @@ -232,10 +232,11 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, // schedule region). if (State->IsLive(Reg)) { DEBUG(if (State->GetGroup(Reg) != 0) - dbgs() << " " << TRI->getName(Reg) << "=g" << + dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg) << "->g0(region live-out)"); State->UnionGroups(Reg, 0); - } else if ((DefIndices[Reg] < InsertPosIndex) && (DefIndices[Reg] >= Count)) { + } else if ((DefIndices[Reg] < InsertPosIndex) + && (DefIndices[Reg] >= Count)) { DefIndices[Reg] = Count; } } @@ -266,7 +267,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; - if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || + if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || IsImplicitDefUse(MI, MO)) { const unsigned Reg = MO.getReg(); PassthruRegs.insert(Reg); @@ -320,11 +321,12 @@ static SUnit *CriticalPathStep(SUnit *SU) { } void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, - const char *tag, const char *header, + const char *tag, + const char *header, const char *footer) { unsigned *KillIndices = State->GetKillIndices(); unsigned *DefIndices = State->GetDefIndices(); - std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); if (!State->IsLive(Reg)) { @@ -355,10 +357,12 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer); } -void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count, - std::set<unsigned>& PassthruRegs) { +void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, + unsigned Count, + std::set<unsigned>& PassthruRegs) +{ unsigned *DefIndices = State->GetDefIndices(); - std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); // Handle dead defs by simulating a last-use of the register just @@ -371,7 +375,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - + HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n"); } @@ -382,7 +386,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou unsigned Reg = MO.getReg(); if (Reg == 0) continue; - DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); + DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers @@ -398,11 +402,11 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou unsigned AliasReg = *Alias; if (State->IsLive(AliasReg)) { State->UnionGroups(Reg, AliasReg); - DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << + DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << TRI->getName(AliasReg) << ")"); } } - + // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) @@ -438,7 +442,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, unsigned Count) { DEBUG(dbgs() << "\tUse Groups:"); - std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); // Scan the register uses for this instruction and update @@ -448,9 +452,9 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - - DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << - State->GetGroup(Reg)); + + DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << + State->GetGroup(Reg)); // It wasn't previously live but now it is, this is a kill. Forget // the previous live-range information and start a new live-range @@ -472,7 +476,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } - + DEBUG(dbgs() << '\n'); // Form a group of all defs and uses of a KILL instruction to ensure @@ -486,7 +490,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - + if (FirstReg != 0) { DEBUG(dbgs() << "=" << TRI->getName(Reg)); State->UnionGroups(FirstReg, Reg); @@ -495,7 +499,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, FirstReg = Reg; } } - + DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n'); } } @@ -507,13 +511,14 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { // Check all references that need rewriting for Reg. For each, use // the corresponding register class to narrow the set of registers // that are appropriate for renaming. - std::pair<std::multimap<unsigned, + std::pair<std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>::iterator, std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>::iterator> Range = State->GetRegRefs().equal_range(Reg); - for (std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>::iterator - Q = Range.first, QE = Range.second; Q != QE; ++Q) { + for (std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first, + QE = Range.second; Q != QE; ++Q) { const TargetRegisterClass *RC = Q->second.RC; if (RC == NULL) continue; @@ -527,9 +532,9 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { DEBUG(dbgs() << " " << RC->getName()); } - + return BV; -} +} bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( unsigned AntiDepGroupIndex, @@ -537,7 +542,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( std::map<unsigned, unsigned> &RenameMap) { unsigned *KillIndices = State->GetKillIndices(); unsigned *DefIndices = State->GetDefIndices(); - std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); // Collect all referenced registers in the same group as @@ -552,7 +557,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // Find the "superest" register in the group. At the same time, // collect the BitVector of registers that can be used to rename // each register. - DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex << ":\n"); + DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex + << ":\n"); std::map<unsigned, BitVector> RenameRegisterMap; unsigned SuperReg = 0; for (unsigned i = 0, e = Regs.size(); i != e; ++i) { @@ -563,7 +569,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // If Reg has any references, then collect possible rename regs if (RegRefs.count(Reg) > 0) { DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":"); - + BitVector BV = GetRenameRegisters(Reg); RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV)); @@ -590,7 +596,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( static int renamecnt = 0; if (renamecnt++ % DebugDiv != DebugMod) return false; - + dbgs() << "*** Performing rename " << TRI->getName(SuperReg) << " for debug ***\n"; } @@ -600,9 +606,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // order. If that register is available, and the corresponding // registers are available for the other group subregisters, then we // can use those registers to rename. - const TargetRegisterClass *SuperRC = + const TargetRegisterClass *SuperRC = TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other); - + const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF); const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF); if (RB == RE) { @@ -624,7 +630,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( const unsigned NewSuperReg = *R; // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; - + DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':'); RenameMap.clear(); @@ -643,7 +649,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( } DEBUG(dbgs() << " " << TRI->getName(NewReg)); - + // Check if Reg can be renamed to NewReg. BitVector BV = RenameRegisterMap[Reg]; if (!BV.test(NewReg)) { @@ -663,7 +669,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( for (const unsigned *Alias = TRI->getAliasSet(NewReg); *Alias; ++Alias) { unsigned AliasReg = *Alias; - if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) { + if (State->IsLive(AliasReg) || + (KillIndices[Reg] > DefIndices[AliasReg])) { DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)"); found = true; break; @@ -672,11 +679,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( if (found) goto next_super_reg; } - + // Record that 'Reg' can be renamed to 'NewReg'. RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg)); } - + // If we fall-out here, then every register in the group can be // renamed, as recorded in RenameMap. RenameOrder.erase(SuperRC); @@ -704,13 +711,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( unsigned InsertPosIndex) { unsigned *KillIndices = State->GetKillIndices(); unsigned *DefIndices = State->GetDefIndices(); - std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; - + // For each regclass the next register to use for renaming. RenameOrderType RenameOrder; @@ -729,17 +736,17 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( if (CriticalPathSet.any()) { for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { SUnit *SU = &SUnits[i]; - if (!CriticalPathSU || - ((SU->getDepth() + SU->Latency) > + if (!CriticalPathSU || + ((SU->getDepth() + SU->Latency) > (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { CriticalPathSU = SU; } } - + CriticalPathMI = CriticalPathSU->getInstr(); } -#ifndef NDEBUG +#ifndef NDEBUG DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n"); DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { @@ -766,7 +773,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Process the defs in MI... PrescanInstruction(MI, Count, PassthruRegs); - + // The dependence edges that represent anti- and output- // dependencies that are candidates for breaking. std::vector<SDep*> Edges; @@ -779,7 +786,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( if (MI == CriticalPathMI) { CriticalPathSU = CriticalPathStep(CriticalPathSU); CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; - } else { + } else { ExcludeRegs = &CriticalPathSet; } @@ -790,14 +797,14 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( for (unsigned i = 0, e = Edges.size(); i != e; ++i) { SDep *Edge = Edges[i]; SUnit *NextSU = Edge->getSUnit(); - + if ((Edge->getKind() != SDep::Anti) && (Edge->getKind() != SDep::Output)) continue; - + unsigned AntiDepReg = Edge->getReg(); DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - + if (!AllocatableSet.test(AntiDepReg)) { // Don't break anti-dependencies on non-allocatable registers. DEBUG(dbgs() << " (non-allocatable)\n"); @@ -816,12 +823,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( } else { // No anti-dep breaking for implicit deps MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg); - assert(AntiDepOp != NULL && "Can't find index for defined register operand"); + assert(AntiDepOp != NULL && + "Can't find index for defined register operand"); if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) { DEBUG(dbgs() << " (implicit)\n"); continue; } - + // If the SUnit has other dependencies on the SUnit that // it anti-depends on, don't bother breaking the // anti-dependency since those edges would prevent such @@ -847,58 +855,59 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( DEBUG(dbgs() << " (real dependency)\n"); AntiDepReg = 0; break; - } else if ((P->getSUnit() != NextSU) && - (P->getKind() == SDep::Data) && + } else if ((P->getSUnit() != NextSU) && + (P->getKind() == SDep::Data) && (P->getReg() == AntiDepReg)) { DEBUG(dbgs() << " (other dependency)\n"); AntiDepReg = 0; break; } } - + if (AntiDepReg == 0) continue; } - + assert(AntiDepReg != 0); if (AntiDepReg == 0) continue; - + // Determine AntiDepReg's register group. const unsigned GroupIndex = State->GetGroup(AntiDepReg); if (GroupIndex == 0) { DEBUG(dbgs() << " (zero group)\n"); continue; } - + DEBUG(dbgs() << '\n'); - + // Look for a suitable register to use to break the anti-dependence. std::map<unsigned, unsigned> RenameMap; if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) { DEBUG(dbgs() << "\tBreaking anti-dependence edge on " << TRI->getName(AntiDepReg) << ":"); - + // Handle each group register... for (std::map<unsigned, unsigned>::iterator S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) { unsigned CurrReg = S->first; unsigned NewReg = S->second; - - DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" << - TRI->getName(NewReg) << "(" << + + DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" << + TRI->getName(NewReg) << "(" << RegRefs.count(CurrReg) << " refs)"); - + // Update the references to the old register CurrReg to // refer to the new register NewReg. - std::pair<std::multimap<unsigned, - AggressiveAntiDepState::RegisterReference>::iterator, + std::pair<std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator, std::multimap<unsigned, - AggressiveAntiDepState::RegisterReference>::iterator> + AggressiveAntiDepState::RegisterReference>::iterator> Range = RegRefs.equal_range(CurrReg); - for (std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>::iterator + for (std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) { Q->second.Operand->setReg(NewReg); } - + // We just went back in time and modified history; the // liveness information for CurrReg is now inconsistent. Set // the state as if it were dead. @@ -906,7 +915,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( RegRefs.erase(NewReg); DefIndices[NewReg] = DefIndices[CurrReg]; KillIndices[NewReg] = KillIndices[CurrReg]; - + State->UnionGroups(CurrReg, 0); RegRefs.erase(CurrReg); DefIndices[CurrReg] = KillIndices[CurrReg]; @@ -915,7 +924,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( (DefIndices[CurrReg] == ~0u)) && "Kill and Def maps aren't consistent for AntiDepReg!"); } - + ++Broken; DEBUG(dbgs() << '\n'); } @@ -924,6 +933,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( ScanInstruction(MI, Count); } - + return Broken; } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index d385a21..a62d68c 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -30,7 +30,7 @@ #include <map> namespace llvm { - /// Class AggressiveAntiDepState + /// Class AggressiveAntiDepState /// Contains all the state necessary for anti-dep breaking. class AggressiveAntiDepState { public: @@ -54,27 +54,27 @@ namespace llvm { /// is the parent of a group, or point to another node to indicate /// that it is a member of the same group as that node. std::vector<unsigned> GroupNodes; - + /// GroupNodeIndices - For each register, the index of the GroupNode /// currently representing the group that the register belongs to. /// Register 0 is always represented by the 0 group, a group /// composed of registers that are not eligible for anti-aliasing. unsigned GroupNodeIndices[TargetRegisterInfo::FirstVirtualRegister]; - + /// RegRefs - Map registers to all their references within a live range. std::multimap<unsigned, RegisterReference> RegRefs; - + /// KillIndices - The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; - + /// DefIndices - The index of the most recent complete def (proceding bottom /// up), or ~0u if the register is live. unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; public: AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB); - + /// GetKillIndices - Return the kill indices. unsigned *GetKillIndices() { return KillIndices; } @@ -87,13 +87,14 @@ namespace llvm { // GetGroup - Get the group for a register. The returned value is // the index of the GroupNode representing the group. unsigned GetGroup(unsigned Reg); - + // GetGroupRegs - Return a vector of the registers belonging to a // group. If RegRefs is non-NULL then only included referenced registers. void GetGroupRegs( unsigned Group, std::vector<unsigned> &Regs, - std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs); + std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference> *RegRefs); // UnionGroups - Union Reg1's and Reg2's groups to form a new // group. Return the index of the GroupNode representing the @@ -110,7 +111,7 @@ namespace llvm { }; - /// Class AggressiveAntiDepBreaker + /// Class AggressiveAntiDepBreaker class AggressiveAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; @@ -130,14 +131,15 @@ namespace llvm { AggressiveAntiDepState *State; public: - AggressiveAntiDepBreaker(MachineFunction& MFi, + AggressiveAntiDepBreaker(MachineFunction& MFi, TargetSubtarget::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker(); - + /// Start - Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB); - /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical + /// path /// of the ScheduleDAG and break them by renaming registers. /// unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits, @@ -160,7 +162,7 @@ namespace llvm { /// IsImplicitDefUse - Return true if MO represents a register /// that is both implicitly used and defined in MI bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO); - + /// GetPassthruRegs - If MI implicitly def/uses a register, then /// return that register and all subregisters. void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 6b24e24..876f628 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -807,124 +807,145 @@ void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const { // Print out the specified constant, without a storage class. Only the // constants valid in constant expressions can occur here. void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { - if (CV->isNullValue() || isa<UndefValue>(CV)) + if (CV->isNullValue() || isa<UndefValue>(CV)) { O << '0'; - else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + return; + } + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { O << CI->getZExtValue(); - } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { + return; + } + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { // This is a constant address for a global variable or function. Use the // name of the variable or function as the address value. O << Mang->getMangledName(GV); - } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + return; + } + + if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) { + GetBlockAddressSymbol(BA)->print(O, MAI); + return; + } + + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); + if (CE == 0) { + llvm_unreachable("Unknown constant value!"); + O << '0'; + return; + } + + switch (CE->getOpcode()) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + default: + llvm_unreachable("FIXME: Don't support this constant cast expr"); + case Instruction::GetElementPtr: { + // generate a symbolic expression for the byte address const TargetData *TD = TM.getTargetData(); - unsigned Opcode = CE->getOpcode(); - switch (Opcode) { - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - llvm_unreachable("FIXME: Don't support this constant cast expr"); - case Instruction::GetElementPtr: { - // generate a symbolic expression for the byte address - const Constant *ptrVal = CE->getOperand(0); - SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end()); - if (int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], - idxVec.size())) { - // Truncate/sext the offset to the pointer size. - if (TD->getPointerSizeInBits() != 64) { - int SExtAmount = 64-TD->getPointerSizeInBits(); - Offset = (Offset << SExtAmount) >> SExtAmount; - } - - if (Offset) - O << '('; - EmitConstantValueOnly(ptrVal); - if (Offset > 0) - O << ") + " << Offset; - else if (Offset < 0) - O << ") - " << -Offset; - } else { - EmitConstantValueOnly(ptrVal); - } - break; + const Constant *ptrVal = CE->getOperand(0); + SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end()); + int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], + idxVec.size()); + if (Offset == 0) + return EmitConstantValueOnly(ptrVal); + + // Truncate/sext the offset to the pointer size. + if (TD->getPointerSizeInBits() != 64) { + int SExtAmount = 64-TD->getPointerSizeInBits(); + Offset = (Offset << SExtAmount) >> SExtAmount; } - case Instruction::BitCast: - return EmitConstantValueOnly(CE->getOperand(0)); - - case Instruction::IntToPtr: { - // Handle casts to pointers by changing them into casts to the appropriate - // integer type. This promotes constant folding and simplifies this code. - Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()), - false/*ZExt*/); + + if (Offset) + O << '('; + EmitConstantValueOnly(ptrVal); + if (Offset > 0) + O << ") + " << Offset; + else + O << ") - " << -Offset; + return; + } + case Instruction::BitCast: + return EmitConstantValueOnly(CE->getOperand(0)); + + case Instruction::IntToPtr: { + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + const TargetData *TD = TM.getTargetData(); + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()), + false/*ZExt*/); + return EmitConstantValueOnly(Op); + } + + case Instruction::PtrToInt: { + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + const Type *Ty = CE->getType(); + const TargetData *TD = TM.getTargetData(); + + // We can emit the pointer value into this slot if the slot is an + // integer slot greater or equal to the size of the pointer. + if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType())) return EmitConstantValueOnly(Op); - } - + + O << "(("; + EmitConstantValueOnly(Op); + APInt ptrMask = + APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType())); + + SmallString<40> S; + ptrMask.toStringUnsigned(S); + O << ") & " << S.str() << ')'; + return; + } - case Instruction::PtrToInt: { - // Support only foldable casts to/from pointers that can be eliminated by - // changing the pointer to the appropriately sized integer type. - Constant *Op = CE->getOperand(0); - const Type *Ty = CE->getType(); - - // We can emit the pointer value into this slot if the slot is an - // integer slot greater or equal to the size of the pointer. - if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType())) - return EmitConstantValueOnly(Op); - - O << "(("; - EmitConstantValueOnly(Op); - APInt ptrMask = - APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType())); + case Instruction::Trunc: + // We emit the value and depend on the assembler to truncate the generated + // expression properly. This is important for differences between + // blockaddress labels. Since the two labels are in the same function, it + // is reasonable to treat their delta as a 32-bit value. + return EmitConstantValueOnly(CE->getOperand(0)); - SmallString<40> S; - ptrMask.toStringUnsigned(S); - O << ") & " << S.str() << ')'; - break; - } + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + O << '('; + EmitConstantValueOnly(CE->getOperand(0)); + O << ')'; + switch (CE->getOpcode()) { case Instruction::Add: + O << " + "; + break; case Instruction::Sub: + O << " - "; + break; case Instruction::And: + O << " & "; + break; case Instruction::Or: + O << " | "; + break; case Instruction::Xor: - O << '('; - EmitConstantValueOnly(CE->getOperand(0)); - O << ')'; - switch (Opcode) { - case Instruction::Add: - O << " + "; - break; - case Instruction::Sub: - O << " - "; - break; - case Instruction::And: - O << " & "; - break; - case Instruction::Or: - O << " | "; - break; - case Instruction::Xor: - O << " ^ "; - break; - default: - break; - } - O << '('; - EmitConstantValueOnly(CE->getOperand(1)); - O << ')'; - break; + O << " ^ "; + break; default: - llvm_unreachable("Unsupported operator!"); + break; } - } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) { - GetBlockAddressSymbol(BA)->print(O, MAI); - } else { - llvm_unreachable("Unknown constant value!"); + O << '('; + EmitConstantValueOnly(CE->getOperand(1)); + O << ')'; + break; } } @@ -1225,8 +1246,7 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI, unsigned AddrSpace) { const TargetData *TD = TM.getTargetData(); unsigned BitWidth = CI->getBitWidth(); - assert(isPowerOf2_32(BitWidth) && - "Non-power-of-2-sized integers not handled!"); + assert((BitWidth & 63) == 0 && "only support multiples of 64-bits"); // We don't expect assemblers to support integer data directives // for more than 64 bits, so we emit the data in at most 64-bit @@ -1239,39 +1259,34 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI, else Val = RawData[i]; - if (MAI->getData64bitsDirective(AddrSpace)) + if (MAI->getData64bitsDirective(AddrSpace)) { O << MAI->getData64bitsDirective(AddrSpace) << Val << '\n'; - else if (TD->isBigEndian()) { - O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); - if (VerboseAsm) { - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() - << " most significant half of i64 " << Val; - } - O << '\n'; - O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val); - if (VerboseAsm) { - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() - << " least significant half of i64 " << Val; - } - O << '\n'; - } else { - O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val); - if (VerboseAsm) { - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() - << " least significant half of i64 " << Val; - } - O << '\n'; - O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); - if (VerboseAsm) { - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() - << " most significant half of i64 " << Val; - } - O << '\n'; + continue; } + + // Emit two 32-bit chunks, order depends on endianness. + unsigned FirstChunk = unsigned(Val), SecondChunk = unsigned(Val >> 32); + const char *FirstName = " least", *SecondName = " most"; + if (TD->isBigEndian()) { + std::swap(FirstChunk, SecondChunk); + std::swap(FirstName, SecondName); + } + + O << MAI->getData32bitsDirective(AddrSpace) << FirstChunk; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << FirstName << " significant half of i64 " << Val; + } + O << '\n'; + + O << MAI->getData32bitsDirective(AddrSpace) << SecondChunk; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << SecondName << " significant half of i64 " << Val; + } + O << '\n'; } } @@ -1284,22 +1299,39 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { if (CV->isNullValue() || isa<UndefValue>(CV)) { EmitZeros(Size, AddrSpace); return; - } else if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) { + } + + if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) { EmitGlobalConstantArray(CVA , AddrSpace); return; - } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) { + } + + if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) { EmitGlobalConstantStruct(CVS, AddrSpace); return; - } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { + } + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { EmitGlobalConstantFP(CFP, AddrSpace); return; - } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + } + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + // If we can directly emit an 8-byte constant, do it. + if (Size == 8) + if (const char *Data64Dir = MAI->getData64bitsDirective(AddrSpace)) { + O << Data64Dir << CI->getZExtValue() << '\n'; + return; + } + // Small integers are handled below; large integers are handled here. if (Size > 4) { EmitGlobalConstantLargeInt(CI, AddrSpace); return; } - } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { + } + + if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { EmitGlobalConstantVector(CP); return; } @@ -1617,7 +1649,7 @@ void AsmPrinter::printLabel(unsigned Id) const { /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM /// instruction, using the specified assembler variant. Targets should -/// overried this to format as appropriate. +/// override this to format as appropriate. bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode) { // Target doesn't support this yet! @@ -1645,15 +1677,17 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F, // This code must use the function name itself, and not the function number, // since it must be possible to generate the label name from within other // functions. - std::string FuncName = Mang->getMangledName(F); + SmallString<60> FnName; + Mang->getNameWithPrefix(FnName, F, false); - SmallString<60> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA" - << FuncName.size() << '_' << FuncName << '_' - << Mang->makeNameProper(BB->getName()) - << Suffix; + // FIXME: THIS IS BROKEN IF THE LLVM BASIC BLOCK DOESN'T HAVE A NAME! + SmallString<60> NameResult; + Mang->getNameWithPrefix(NameResult, + StringRef("BA") + Twine((unsigned)FnName.size()) + + "_" + FnName.str() + "_" + BB->getName() + Suffix, + Mangler::Private); - return OutContext.GetOrCreateSymbol(Name.str()); + return OutContext.GetOrCreateSymbol(NameResult.str()); } MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const { diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 8a3ceb6..15f37ae 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -212,19 +212,30 @@ public: /// void addVariable(DbgVariable *V) { Variables.push_back(V); } - void fixInstructionMarkers() { + void fixInstructionMarkers(DenseMap<const MachineInstr *, + unsigned> &MIIndexMap) { assert (getFirstInsn() && "First instruction is missing!"); - if (getLastInsn()) - return; - - // If a scope does not have an instruction to mark an end then use - // the end of last child scope. + + // Use the end of last child scope as end of this scope. SmallVector<DbgScope *, 4> &Scopes = getScopes(); - assert (!Scopes.empty() && "Inner most scope does not have last insn!"); - DbgScope *L = Scopes.back(); - if (!L->getLastInsn()) - L->fixInstructionMarkers(); - setLastInsn(L->getLastInsn()); + const MachineInstr *LastInsn = getFirstInsn(); + unsigned LIndex = 0; + if (Scopes.empty()) { + assert (getLastInsn() && "Inner most scope does not have last insn!"); + return; + } + for (SmallVector<DbgScope *, 4>::iterator SI = Scopes.begin(), + SE = Scopes.end(); SI != SE; ++SI) { + DbgScope *DS = *SI; + DS->fixInstructionMarkers(MIIndexMap); + const MachineInstr *DSLastInsn = DS->getLastInsn(); + unsigned DSI = MIIndexMap[DSLastInsn]; + if (DSI > LIndex) { + LastInsn = DSLastInsn; + LIndex = DSI; + } + } + setLastInsn(LastInsn); } #ifndef NDEBUG @@ -1021,6 +1032,16 @@ DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator *ETy) { return Enumerator; } +/// getRealLinkageName - If special LLVM prefix that is used to inform the asm +/// printer to not emit usual symbol prefix before the symbol name is used then +/// return linkage name after skipping this special LLVM prefix. +static StringRef getRealLinkageName(StringRef LinkageName) { + char One = '\1'; + if (LinkageName.startswith(StringRef(&One, 1))) + return LinkageName.substr(1); + return LinkageName; +} + /// createGlobalVariableDIE - Create new DIE using GV. DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) { // If the global variable was optmized out then no need to create debug info @@ -1033,16 +1054,10 @@ DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) { GV.getDisplayName()); StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) { - // Skip special LLVM prefix that is used to inform the asm printer to not - // emit usual symbol prefix before the symbol name. This happens for - // Objective-C symbol names and symbol whose name is replaced using GCC's - // __asm__ attribute. - if (LinkageName[0] == 1) - LinkageName = LinkageName.substr(1); + if (!LinkageName.empty()) addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - LinkageName); - } + getRealLinkageName(LinkageName)); + addType(GVDie, GV.getType()); if (!GV.isLocalToUnit()) addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); @@ -1074,10 +1089,9 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); uint64_t Offset = DT.getOffsetInBits(); - uint64_t FieldOffset = Offset; uint64_t AlignMask = ~(DT.getAlignInBits() - 1); uint64_t HiMark = (Offset + FieldSize) & AlignMask; - FieldOffset = (HiMark - FieldSize); + uint64_t FieldOffset = (HiMark - FieldSize); Offset -= FieldOffset; // Maybe we need to work from the other end. @@ -1119,16 +1133,10 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty()) { - // Skip special LLVM prefix that is used to inform the asm printer to not - // emit usual symbol prefix before the symbol name. This happens for - // Objective-C symbol names and symbol whose name is replaced using GCC's - // __asm__ attribute. - if (LinkageName[0] == 1) - LinkageName = LinkageName.substr(1); + if (!LinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - LinkageName); - } + getRealLinkageName(LinkageName)); + addSourceLine(SPDie, &SP); // Add prototyped tag, if C or ObjC. @@ -1382,7 +1390,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { I->second.push_back(std::make_pair(StartID, ScopeDIE)); StringPool.insert(InlinedSP.getName()); - StringPool.insert(InlinedSP.getLinkageName()); + StringPool.insert(getRealLinkageName(InlinedSP.getLinkageName())); + DILocation DL(Scope->getInlinedAt()); addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); @@ -1644,8 +1653,11 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { ModuleCU->insertDIE(N, VariableDie); // Add to context owner. - if (DI_GV.isDefinition() - && !DI_GV.getContext().isCompileUnit()) { + DIDescriptor GVContext = DI_GV.getContext(); + // Do not create specification DIE if context is either compile unit + // or a subprogram. + if (DI_GV.isDefinition() && !GVContext.isCompileUnit() + && !GVContext.isSubprogram()) { // Create specification DIE. DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, @@ -1663,7 +1675,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { Asm->Mang->getMangledName(DI_GV.getGlobal())); addBlock(VariableDie, dwarf::DW_AT_location, 0, Block); } - addToContextOwner(VariableDie, DI_GV.getContext()); + addToContextOwner(VariableDie, GVContext); // Expose as global. FIXME - need to check external flag. ModuleCU->addGlobal(DI_GV.getName(), VariableDie); @@ -1804,7 +1816,8 @@ void DwarfDebug::endModule() { DIE *NDie = ModuleCU->getDIE(N); if (!NDie) continue; addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); - addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + // FIXME - This is not the correct approach. + // addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); } // Standard sections final addresses. @@ -1976,12 +1989,15 @@ bool DwarfDebug::extractScopeInformation(MachineFunction *MF) { if (!DbgScopeMap.empty()) return false; + DenseMap<const MachineInstr *, unsigned> MIIndexMap; + unsigned MIIndex = 0; // Scan each instruction and create scopes. First build working set of scopes. for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; + MIIndexMap[MInsn] = MIIndex++; DebugLoc DL = MInsn->getDebugLoc(); if (DL.isUnknown()) continue; DebugLocTuple DLT = MF->getDebugLocTuple(DL); @@ -2014,16 +2030,10 @@ bool DwarfDebug::extractScopeInformation(MachineFunction *MF) { } } - // If a scope's last instruction is not set then use its child scope's - // last instruction as this scope's last instrunction. - for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(), - DE = DbgScopeMap.end(); DI != DE; ++DI) { - if (DI->second->isAbstractScope()) - continue; - assert (DI->second->getFirstInsn() && "Invalid first instruction!"); - DI->second->fixInstructionMarkers(); - assert (DI->second->getLastInsn() && "Invalid last instruction!"); - } + if (!CurrentFnDbgScope) + return false; + + CurrentFnDbgScope->fixInstructionMarkers(MIIndexMap); // Each scope has first instruction and last instruction to mark beginning // and end of a scope respectively. Create an inverse map that list scopes @@ -2105,38 +2115,41 @@ void DwarfDebug::endFunction(MachineFunction *MF) { if (DbgScopeMap.empty()) return; - // Define end label for subprogram. - EmitLabel("func_end", SubprogramCount); - - // Get function line info. - if (!Lines.empty()) { - // Get section line info. - unsigned ID = SectionMap.insert(Asm->getCurrentSection()); - if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID); - std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1]; - // Append the function info to section info. - SectionLineInfos.insert(SectionLineInfos.end(), - Lines.begin(), Lines.end()); + if (CurrentFnDbgScope) { + // Define end label for subprogram. + EmitLabel("func_end", SubprogramCount); + + // Get function line info. + if (!Lines.empty()) { + // Get section line info. + unsigned ID = SectionMap.insert(Asm->getCurrentSection()); + if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID); + std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1]; + // Append the function info to section info. + SectionLineInfos.insert(SectionLineInfos.end(), + Lines.begin(), Lines.end()); + } + + // Construct abstract scopes. + for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(), + AE = AbstractScopesList.end(); AI != AE; ++AI) + constructScopeDIE(*AI); + + constructScopeDIE(CurrentFnDbgScope); + + DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount, + MMI->getFrameMoves())); } - // Construct abstract scopes. - for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(), - AE = AbstractScopesList.end(); AI != AE; ++AI) - constructScopeDIE(*AI); - - constructScopeDIE(CurrentFnDbgScope); - - DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount, - MMI->getFrameMoves())); - // Clear debug info - CurrentFnDbgScope = NULL; - DbgScopeMap.clear(); - DbgScopeBeginMap.clear(); - DbgScopeEndMap.clear(); - ConcreteScopes.clear(); - AbstractScopesList.clear(); - + if (CurrentFnDbgScope) { + CurrentFnDbgScope = NULL; + DbgScopeMap.clear(); + DbgScopeBeginMap.clear(); + DbgScopeEndMap.clear(); + ConcreteScopes.clear(); + AbstractScopesList.clear(); + } Lines.clear(); if (TimePassesIsEnabled) @@ -2908,8 +2921,6 @@ void DwarfDebug::emitDebugInlineInfo() { for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(), E = InlinedSPNodes.end(); I != E; ++I) { -// for (ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator - // I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) { MDNode *Node = *I; ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II = InlineInfo.find(Node); @@ -2920,20 +2931,11 @@ void DwarfDebug::emitDebugInlineInfo() { if (LName.empty()) Asm->EmitString(Name); - else { - // Skip special LLVM prefix that is used to inform the asm printer to not - // emit usual symbol prefix before the symbol name. This happens for - // Objective-C symbol names and symbol whose name is replaced using GCC's - // __asm__ attribute. - if (LName[0] == 1) - LName = LName.substr(1); -// Asm->EmitString(LName); + else EmitSectionOffset("string", "section_str", - StringPool.idFor(LName), false, true); + StringPool.idFor(getRealLinkageName(LName)), false, true); - } Asm->EOL("MIPS linkage name"); -// Asm->EmitString(Name); EmitSectionOffset("string", "section_str", StringPool.idFor(Name), false, true); Asm->EOL("Function name"); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 7a969f0..6bc808c 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -35,12 +35,13 @@ add_llvm_library(LLVMCodeGen MachineModuleInfoImpls.cpp MachinePassRegistry.cpp MachineRegisterInfo.cpp - MachineSink.cpp MachineSSAUpdater.cpp + MachineSink.cpp MachineVerifier.cpp MaxStackAlignment.cpp ObjectCodeEmitter.cpp OcamlGC.cpp + OptimizeExts.cpp PHIElimination.cpp Passes.cpp PostRASchedulerList.cpp diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 3c7961c2..056e2d5 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -288,9 +288,11 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, } unsigned -CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg, +CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI, + unsigned AntiDepReg, unsigned LastNewReg, - const TargetRegisterClass *RC) { + const TargetRegisterClass *RC) +{ for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), RE = RC->allocation_order_end(MF); R != RE; ++R) { unsigned NewReg = *R; @@ -300,12 +302,16 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg, // an anti-dependence with this AntiDepReg, because that would // re-introduce that anti-dependence. if (NewReg == LastNewReg) continue; + // If the instruction already has a def of the NewReg, it's not suitable. + // For example, Instruction with multiple definitions can result in this + // condition. + if (MI->modifiesRegister(NewReg, TRI)) continue; // If NewReg is dead and NewReg's most recent def is not before // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. - assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && - "Kill and Def maps aren't consistent for AntiDepReg!"); - assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && - "Kill and Def maps aren't consistent for NewReg!"); + assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) + && "Kill and Def maps aren't consistent for AntiDepReg!"); + assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) + && "Kill and Def maps aren't consistent for NewReg!"); if (KillIndices[NewReg] != ~0u || Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) || KillIndices[AntiDepReg] > DefIndices[NewReg]) @@ -336,14 +342,14 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits, #ifndef NDEBUG { - DEBUG(errs() << "Critical path has total latency " + DEBUG(dbgs() << "Critical path has total latency " << (Max->getDepth() + Max->Latency) << "\n"); - DEBUG(errs() << "Available regs:"); + DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (KillIndices[Reg] == ~0u) - DEBUG(errs() << " " << TRI->getName(Reg)); + DEBUG(dbgs() << " " << TRI->getName(Reg)); } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); } #endif @@ -495,10 +501,10 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits, // TODO: Instead of picking the first free register, consider which might // be the best. if (AntiDepReg != 0) { - if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg, + if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg, LastNewReg[AntiDepReg], RC)) { - DEBUG(errs() << "Breaking anti-dependence edge on " + DEBUG(dbgs() << "Breaking anti-dependence edge on " << TRI->getName(AntiDepReg) << " with " << RegRefs.count(AntiDepReg) << " references" << " using " << TRI->getName(NewReg) << "!\n"); diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 496888d..9e8db02 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -64,11 +64,12 @@ namespace llvm { public: CriticalAntiDepBreaker(MachineFunction& MFi); ~CriticalAntiDepBreaker(); - + /// Start - Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB); - /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical + /// path /// of the ScheduleDAG and break them by renaming registers. /// unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits, @@ -87,7 +88,8 @@ namespace llvm { private: void PrescanInstruction(MachineInstr *MI); void ScanInstruction(MachineInstr *MI, unsigned Count); - unsigned findSuitableFreeRegister(unsigned AntiDepReg, + unsigned findSuitableFreeRegister(MachineInstr *MI, + unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *); }; diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 07a5d38..0982eab 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -109,7 +109,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // If the instruction is dead, delete it! if (isDead(MI)) { - DEBUG(errs() << "DeadMachineInstructionElim: DELETING: " << *MI); + DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); AnyChanges = true; MI->eraseFromParent(); MIE = MBB->rend(); diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index a6429f7..11a85a0 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -37,7 +37,7 @@ namespace llvm { /// startFunction - This callback is invoked when a new machine function is /// about to be emitted. void ELFCodeEmitter::startFunction(MachineFunction &MF) { - DEBUG(errs() << "processing function: " + DEBUG(dbgs() << "processing function: " << MF.getFunction()->getName() << "\n"); // Get the ELF Section that this function belongs in. diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index 3e1ee11..5e5f589 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -1076,7 +1076,7 @@ void ELFWriter::OutputSectionsAndSectionTable() { // Emit all of sections to the file and build the section header table. for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { ELFSection &S = *(*I); - DEBUG(errs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() + DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() << ", Size: " << S.Size << ", Offset: " << S.Offset << ", SectionData Size: " << S.size() << "\n"); diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index 36925b1..266c74c 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -48,7 +48,7 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) : Scoreboard = new unsigned[ScoreboardDepth]; ScoreboardHead = 0; - DEBUG(errs() << "Using exact hazard recognizer: ScoreboardDepth = " + DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " << ScoreboardDepth << '\n'); } @@ -66,7 +66,7 @@ unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) { } void ExactHazardRecognizer::dumpScoreboard() { - errs() << "Scoreboard:\n"; + dbgs() << "Scoreboard:\n"; unsigned last = ScoreboardDepth - 1; while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0)) @@ -74,10 +74,10 @@ void ExactHazardRecognizer::dumpScoreboard() { for (unsigned i = 0; i <= last; i++) { unsigned FUs = Scoreboard[getFutureIndex(i)]; - errs() << "\t"; + dbgs() << "\t"; for (int j = 31; j >= 0; j--) - errs() << ((FUs & (1 << j)) ? '1' : '0'); - errs() << '\n'; + dbgs() << ((FUs & (1 << j)) ? '1' : '0'); + dbgs() << '\n'; } } @@ -102,8 +102,8 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU unsigned index = getFutureIndex(cycle + i); unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; if (!freeUnits) { - DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", "); - DEBUG(errs() << "SU(" << SU->NodeNum << "): "); + DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); + DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); DEBUG(SU->getInstr()->dump()); return Hazard; } diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index 4d25dcc..055172b 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -17,6 +17,7 @@ #include "llvm/Pass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -92,7 +93,7 @@ GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, } } - errs() << "unsupported GC: " << Name << "\n"; + dbgs() << "unsupported GC: " << Name << "\n"; llvm_unreachable(0); } diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 6e0bde6..79b2986 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -109,7 +110,7 @@ GCStrategy::~GCStrategy() { bool GCStrategy::initializeCustomLowering(Module &M) { return false; } bool GCStrategy::performCustomLowering(Function &F) { - errs() << "gc " << getName() << " must override performCustomLowering.\n"; + dbgs() << "gc " << getName() << " must override performCustomLowering.\n"; llvm_unreachable(0); return 0; } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index c23d707..c61fd17 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -229,14 +229,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); if (!TII) return false; - DEBUG(errs() << "\nIfcvt: function (" << ++FnNum << ") \'" + DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" << MF.getFunction()->getName() << "\'"); if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { - DEBUG(errs() << " skipped\n"); + DEBUG(dbgs() << " skipped\n"); return false; } - DEBUG(errs() << "\n"); + DEBUG(dbgs() << "\n"); MF.RenumberBlocks(); BBAnalysis.resize(MF.getNumBlockIDs()); @@ -281,13 +281,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; - DEBUG(errs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") + DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") << "): BB#" << BBI.BB->getNumber() << " (" << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() : BBI.TrueBB->getNumber()) << ") "); RetVal = IfConvertSimple(BBI, Kind); - DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) NumSimpleFalse++; else NumSimple++; @@ -304,16 +304,16 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (DisableTriangleR && !isFalse && isRev) break; if (DisableTriangleF && isFalse && !isRev) break; if (DisableTriangleFR && isFalse && isRev) break; - DEBUG(errs() << "Ifcvt (Triangle"); + DEBUG(dbgs() << "Ifcvt (Triangle"); if (isFalse) - DEBUG(errs() << " false"); + DEBUG(dbgs() << " false"); if (isRev) - DEBUG(errs() << " rev"); - DEBUG(errs() << "): BB#" << BBI.BB->getNumber() << " (T:" + DEBUG(dbgs() << " rev"); + DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:" << BBI.TrueBB->getNumber() << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertTriangle(BBI, Kind); - DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) { if (isRev) NumTriangleFRev++; @@ -327,11 +327,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } case ICDiamond: { if (DisableDiamond) break; - DEBUG(errs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" + DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" << BBI.TrueBB->getNumber() << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); - DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) NumDiamonds++; break; } @@ -1141,7 +1141,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, continue; if (!TII->PredicateInstruction(I, Cond)) { #ifndef NDEBUG - errs() << "Unable to predicate " << *I << "!\n"; + dbgs() << "Unable to predicate " << *I << "!\n"; #endif llvm_unreachable(0); } @@ -1177,7 +1177,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, if (!isPredicated) if (!TII->PredicateInstruction(MI, Cond)) { #ifndef NDEBUG - errs() << "Unable to predicate " << *I << "!\n"; + dbgs() << "Unable to predicate " << *I << "!\n"; #endif llvm_unreachable(0); } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 8a3bd0b..9997a48 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -349,12 +349,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::setjmp: { Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(), Type::getInt32Ty(Context)); - if (CI->getType() != Type::getVoidTy(Context)) + if (!CI->getType()->isVoidTy()) CI->replaceAllUsesWith(V); break; } case Intrinsic::sigsetjmp: - if (CI->getType() != Type::getVoidTy(Context)) + if (!CI->getType()->isVoidTy()) CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); break; @@ -427,10 +427,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } - case Intrinsic::dbg_stoppoint: - case Intrinsic::dbg_region_start: - case Intrinsic::dbg_region_end: - case Intrinsic::dbg_func_start: case Intrinsic::dbg_declare: break; // Simply strip out debugging intrinsics @@ -512,7 +508,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { } case Intrinsic::flt_rounds: // Lower to "round to the nearest" - if (CI->getType() != Type::getVoidTy(Context)) + if (!CI->getType()->isVoidTy()) CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); break; case Intrinsic::invariant_start: diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index d5fd051..2b5fd2c 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -24,6 +24,7 @@ #include "llvm/Target/TargetRegistry.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; @@ -61,6 +62,7 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); + // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -246,7 +248,7 @@ static void printAndVerify(PassManagerBase &PM, const char *Banner, bool allowDoubleDefs = false) { if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(errs(), Banner)); + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); if (VerifyMachineCode) PM.add(createMachineVerifierPass(allowDoubleDefs)); @@ -269,7 +271,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); if (PrintLSR) - PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs())); + PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } // Turn exception handling constructs into something the code generators can @@ -278,8 +280,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, { case ExceptionHandling::SjLj: // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both - PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. PM.add(createSjLjEHPass(getTargetLowering())); + PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); break; case ExceptionHandling::Dwarf: PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); @@ -302,7 +309,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (PrintISelInput) PM.add(createPrintFunctionPass("\n\n" "*** Final LLVM Code input to ISel ***\n", - &errs())); + &dbgs())); // Standard Lower-Level Passes. @@ -323,6 +330,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, /* allowDoubleDefs= */ true); if (OptLevel != CodeGenOpt::None) { + PM.add(createOptimizeExtsPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); if (!DisableMachineSink) @@ -335,7 +343,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate && PreAllocTailDup) { PM.add(createTailDuplicatePass(true)); - printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate", + /* allowDoubleDefs= */ true); } // Run pre-ra passes. @@ -391,7 +400,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createGCMachineCodeAnalysisPass()); if (PrintGCInfo) - PM.add(createGCInfoPrinter(errs())); + PM.add(createGCInfoPrinter(dbgs())); if (OptLevel != CodeGenOpt::None && !DisableCodePlace) { PM.add(createCodePlacementOptPass()); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index cc286aa..e207f60 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -10,7 +10,7 @@ // This file implements the LiveRange and LiveInterval classes. Given some // numbering of each the machine instructions an interval [i, j) is said to be a // live interval for register v if there is no instruction with number j' > j -// such that v is live at j' abd there is no instruction with number i' < i such +// such that v is live at j' and there is no instruction with number i' < i such // that v is live at i'. In this implementation intervals can have holes, // i.e. an interval might look like [1,20), [50,65), [1000,1001). Each // individual range is represented as an instance of LiveRange, and the whole @@ -24,6 +24,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> @@ -813,7 +814,7 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { } void LiveRange::dump() const { - errs() << *this << "\n"; + dbgs() << *this << "\n"; } void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { @@ -872,7 +873,7 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { } void LiveInterval::dump() const { - errs() << *this << "\n"; + dbgs() << *this << "\n"; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 452f872..e0e2ec8 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -146,7 +146,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const { } void LiveIntervals::dumpInstrs() const { - printInstrs(errs()); + printInstrs(dbgs()); } bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, @@ -253,9 +253,9 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li, #ifndef NDEBUG static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) { if (TargetRegisterInfo::isPhysicalRegister(reg)) - errs() << tri_->getName(reg); + dbgs() << tri_->getName(reg); else - errs() << "%reg" << reg; + dbgs() << "%reg" << reg; } #endif @@ -266,7 +266,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, unsigned MOIdx, LiveInterval &interval) { DEBUG({ - errs() << "\t\tregister: "; + dbgs() << "\t\tregister: "; printRegName(interval.reg, tri_); }); @@ -314,7 +314,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, "Shouldn't be alive across any blocks!"); LiveRange LR(defIndex, killIdx, ValNo); interval.addRange(LR); - DEBUG(errs() << " +" << LR << "\n"); + DEBUG(dbgs() << " +" << LR << "\n"); ValNo->addKill(killIdx); return; } @@ -325,7 +325,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // live into some number of blocks, but gets killed. Start by adding a // range that goes from this definition to the end of the defining block. LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo); - DEBUG(errs() << " +" << NewLR); + DEBUG(dbgs() << " +" << NewLR); interval.addRange(NewLR); // Iterate over all of the blocks that the variable is completely @@ -336,7 +336,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I); LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo); interval.addRange(LR); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); } // Finally, this virtual register is live from the start of any killing @@ -348,7 +348,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo); interval.addRange(LR); ValNo->addKill(killIdx); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); } } else { @@ -393,7 +393,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); - DEBUG(errs() << " replace range with " << LR); + DEBUG(dbgs() << " replace range with " << LR); interval.addRange(LR); ValNo->addKill(RedefIndex); @@ -404,8 +404,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, OldValNo)); DEBUG({ - errs() << " RESULT: "; - interval.print(errs(), tri_); + dbgs() << " RESULT: "; + interval.print(dbgs(), tri_); }); } else { // Otherwise, this must be because of phi elimination. If this is the @@ -422,8 +422,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, SlotIndex Start = getMBBStartIdx(Killer->getParent()); SlotIndex End = getInstructionIndex(Killer).getDefIndex(); DEBUG({ - errs() << "\n\t\trenaming [" << Start << "," << End << "] in: "; - interval.print(errs(), tri_); + dbgs() << "\n\t\trenaming [" << Start << "," << End << "] in: "; + interval.print(dbgs(), tri_); }); interval.removeRange(Start, End); @@ -442,8 +442,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, VNI->addKill(indexes_->getTerminatorGap(killMBB)); VNI->setHasPHIKill(true); DEBUG({ - errs() << " RESULT: "; - interval.print(errs(), tri_); + dbgs() << " RESULT: "; + interval.print(dbgs(), tri_); }); } @@ -469,11 +469,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, interval.addRange(LR); ValNo->addKill(indexes_->getTerminatorGap(mbb)); ValNo->setHasPHIKill(true); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); } } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); } void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, @@ -485,7 +485,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // A physical register cannot be live across basic block, so its // lifetime must end somewhere in its defining basic block. DEBUG({ - errs() << "\t\tregister: "; + dbgs() << "\t\tregister: "; printRegName(interval.reg, tri_); }); @@ -502,7 +502,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // For earlyclobbers, the defSlot was pushed back one; the extra // advance below compensates. if (MO.isDead()) { - DEBUG(errs() << " dead"); + DEBUG(dbgs() << " dead"); end = start.getStoreIndex(); goto exit; } @@ -517,7 +517,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, baseIndex = indexes_->getNextNonNullIndex(baseIndex); if (mi->killsRegister(interval.reg, tri_)) { - DEBUG(errs() << " killed"); + DEBUG(dbgs() << " killed"); end = baseIndex.getDefIndex(); goto exit; } else { @@ -531,7 +531,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // Then the register is essentially dead at the instruction that defines // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) - DEBUG(errs() << " dead"); + DEBUG(dbgs() << " dead"); end = start.getStoreIndex(); } goto exit; @@ -560,7 +560,7 @@ exit: LiveRange LR(start, end, ValNo); interval.addRange(LR); LR.valno->addKill(end); - DEBUG(errs() << " +" << LR << '\n'); + DEBUG(dbgs() << " +" << LR << '\n'); } void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, @@ -595,7 +595,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, SlotIndex MIIdx, LiveInterval &interval, bool isAlias) { DEBUG({ - errs() << "\t\tlivein register: "; + dbgs() << "\t\tlivein register: "; printRegName(interval.reg, tri_); }); @@ -612,7 +612,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, while (mi != MBB->end()) { if (mi->killsRegister(interval.reg, tri_)) { - DEBUG(errs() << " killed"); + DEBUG(dbgs() << " killed"); end = baseIndex.getDefIndex(); SeenDefUse = true; break; @@ -621,7 +621,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, // Then the register is essentially dead at the instruction that defines // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) - DEBUG(errs() << " dead"); + DEBUG(dbgs() << " dead"); end = start.getStoreIndex(); SeenDefUse = true; break; @@ -636,10 +636,10 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, // Live-in register might not be used at all. if (!SeenDefUse) { if (isAlias) { - DEBUG(errs() << " dead"); + DEBUG(dbgs() << " dead"); end = MIIdx.getStoreIndex(); } else { - DEBUG(errs() << " live through"); + DEBUG(dbgs() << " live through"); end = baseIndex; } } @@ -652,7 +652,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, interval.addRange(LR); LR.valno->addKill(end); - DEBUG(errs() << " +" << LR << '\n'); + DEBUG(dbgs() << " +" << LR << '\n'); } /// computeIntervals - computes the live intervals for virtual @@ -660,7 +660,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, /// live interval is an interval [i, j) where 1 <= i <= j < N for /// which a variable is live void LiveIntervals::computeIntervals() { - DEBUG(errs() << "********** COMPUTING LIVE INTERVALS **********\n" + DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" << "********** Function: " << ((Value*)mf_->getFunction())->getName() << '\n'); @@ -670,7 +670,7 @@ void LiveIntervals::computeIntervals() { MachineBasicBlock *MBB = MBBI; // Track the index of the current machine instr. SlotIndex MIIndex = getMBBStartIdx(MBB); - DEBUG(errs() << MBB->getName() << ":\n"); + DEBUG(dbgs() << MBB->getName() << ":\n"); MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); @@ -690,7 +690,7 @@ void LiveIntervals::computeIntervals() { MIIndex = indexes_->getNextNonNullIndex(MIIndex); for (; MI != miEnd; ++MI) { - DEBUG(errs() << MIIndex << "\t" << *MI); + DEBUG(dbgs() << MIIndex << "\t" << *MI); // Handle defs. for (int i = MI->getNumOperands() - 1; i >= 0; --i) { @@ -1055,7 +1055,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, // If this is the rematerializable definition MI itself and // all of its uses are rematerialized, simply delete it. if (MI == ReMatOrigDefMI && CanDelete) { - DEBUG(errs() << "\t\t\t\tErasing re-materlizable def: " + DEBUG(dbgs() << "\t\t\t\tErasing re-materlizable def: " << MI << '\n'); RemoveMachineInstrFromMaps(MI); vrm.RemoveMachineInstrFromMaps(MI); @@ -1208,28 +1208,28 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (CreatedNewVReg) { LiveRange LR(index.getLoadIndex(), index.getDefIndex(), nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator)); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); nI.addRange(LR); } else { // Extend the split live interval to this def / use. SlotIndex End = index.getDefIndex(); LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End, nI.getValNumInfo(nI.getNumValNums()-1)); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); nI.addRange(LR); } } if (HasDef) { LiveRange LR(index.getDefIndex(), index.getStoreIndex(), nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator)); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); nI.addRange(LR); } DEBUG({ - errs() << "\t\t\t\tAdded new interval: "; - nI.print(errs(), tri_); - errs() << '\n'; + dbgs() << "\t\t\t\tAdded new interval: "; + nI.print(dbgs(), tri_); + dbgs() << '\n'; }); } return CanFold; @@ -1557,9 +1557,9 @@ addIntervalsForSpillsFast(const LiveInterval &li, "attempt to spill already spilled interval!"); DEBUG({ - errs() << "\t\t\t\tadding intervals for spills for interval: "; + dbgs() << "\t\t\t\tadding intervals for spills for interval: "; li.dump(); - errs() << '\n'; + dbgs() << '\n'; }); const TargetRegisterClass* rc = mri_->getRegClass(li.reg); @@ -1610,7 +1610,7 @@ addIntervalsForSpillsFast(const LiveInterval &li, LiveRange LR(index.getLoadIndex(), index.getUseIndex(), nI.getNextValue(SlotIndex(), 0, false, getVNInfoAllocator())); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); nI.addRange(LR); vrm.addRestorePoint(NewVReg, MI); } @@ -1618,7 +1618,7 @@ addIntervalsForSpillsFast(const LiveInterval &li, LiveRange LR(index.getDefIndex(), index.getStoreIndex(), nI.getNextValue(SlotIndex(), 0, false, getVNInfoAllocator())); - DEBUG(errs() << " +" << LR); + DEBUG(dbgs() << " +" << LR); nI.addRange(LR); vrm.addSpillPoint(NewVReg, true, MI); } @@ -1626,9 +1626,9 @@ addIntervalsForSpillsFast(const LiveInterval &li, added.push_back(&nI); DEBUG({ - errs() << "\t\t\t\tadded new interval: "; + dbgs() << "\t\t\t\tadded new interval: "; nI.dump(); - errs() << '\n'; + dbgs() << '\n'; }); } @@ -1651,9 +1651,9 @@ addIntervalsForSpills(const LiveInterval &li, "attempt to spill already spilled interval!"); DEBUG({ - errs() << "\t\t\t\tadding intervals for spills for interval: "; - li.print(errs(), tri_); - errs() << '\n'; + dbgs() << "\t\t\t\tadding intervals for spills for interval: "; + li.print(dbgs(), tri_); + dbgs() << '\n'; }); // Each bit specify whether a spill is required in the MBB. diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 3c88e37..b44a220 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -59,17 +60,17 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { } void LiveVariables::VarInfo::dump() const { - errs() << " Alive in blocks: "; + dbgs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), E = AliveBlocks.end(); I != E; ++I) - errs() << *I << ", "; - errs() << "\n Killed by:"; + dbgs() << *I << ", "; + dbgs() << "\n Killed by:"; if (Kills.empty()) - errs() << " No instructions.\n"; + dbgs() << " No instructions.\n"; else { for (unsigned i = 0, e = Kills.size(); i != e; ++i) - errs() << "\n #" << i << ": " << *Kills[i]; - errs() << "\n"; + dbgs() << "\n #" << i << ": " << *Kills[i]; + dbgs() << "\n"; } } @@ -289,7 +290,6 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; - MachineInstr *LastPartDef = 0; unsigned LastPartDefDist = 0; for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { @@ -298,13 +298,9 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { // There was a def of this sub-register in between. This is a partial // def, keep track of the last one. unsigned Dist = DistanceMap[Def]; - if (Dist > LastPartDefDist) { + if (Dist > LastPartDefDist) LastPartDefDist = Dist; - LastPartDef = Def; - } - continue; - } - if (MachineInstr *Use = PhysRegUse[SubReg]) { + } else if (MachineInstr *Use = PhysRegUse[SubReg]) { unsigned Dist = DistanceMap[Use]; if (Dist > LastRefOrPartRefDist) { LastRefOrPartRefDist = Dist; diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index 80eb6cd..1121d9b 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -122,7 +122,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { "Extract destination must be in a physical register"); assert(SrcReg && "invalid subregister index for register"); - DEBUG(errs() << "subreg: CONVERTING: " << *MI); + DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); if (SrcReg == DstReg) { // No need to insert an identity copy instruction. @@ -131,11 +131,11 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { // instruction with KILL. MI->setDesc(TII->get(TargetInstrInfo::KILL)); MI->RemoveOperand(2); // SubIdx - DEBUG(errs() << "subreg: replace by: " << *MI); + DEBUG(dbgs() << "subreg: replace by: " << *MI); return true; } - DEBUG(errs() << "subreg: eliminated!"); + DEBUG(dbgs() << "subreg: eliminated!"); } else { // Insert copy const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg); @@ -150,11 +150,11 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { TransferKillFlag(MI, SuperReg, TRI, true); DEBUG({ MachineBasicBlock::iterator dMI = MI; - errs() << "subreg: " << *(--dMI); + dbgs() << "subreg: " << *(--dMI); }); } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); MBB->erase(MI); return true; } @@ -179,7 +179,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); - DEBUG(errs() << "subreg: CONVERTING: " << *MI); + DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); if (DstSubReg == InsReg && InsSIdx == 0) { // No need to insert an identify copy instruction. @@ -188,7 +188,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3 // The first def is defining RAX, not EAX so the top bits were not // zero extended. - DEBUG(errs() << "subreg: eliminated!"); + DEBUG(dbgs() << "subreg: eliminated!"); } else { // Insert sub-register copy const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg); @@ -203,11 +203,11 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { TransferKillFlag(MI, InsReg, TRI); DEBUG({ MachineBasicBlock::iterator dMI = MI; - errs() << "subreg: " << *(--dMI); + dbgs() << "subreg: " << *(--dMI); }); } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); MBB->erase(MI); return true; } @@ -235,7 +235,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); - DEBUG(errs() << "subreg: CONVERTING: " << *MI); + DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); if (DstSubReg == InsReg) { // No need to insert an identity copy instruction. If the SrcReg was @@ -248,7 +248,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { else MIB.addReg(InsReg, RegState::Kill); } else { - DEBUG(errs() << "subreg: eliminated!\n"); + DEBUG(dbgs() << "subreg: eliminated!\n"); MBB->erase(MI); return true; } @@ -288,7 +288,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { DEBUG({ MachineBasicBlock::iterator dMI = MI; - errs() << "subreg: " << *(--dMI) << "\n"; + dbgs() << "subreg: " << *(--dMI) << "\n"; }); MBB->erase(MI); @@ -299,7 +299,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { /// copies. /// bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DEBUG(errs() << "Machine Function\n" + DEBUG(dbgs() << "Machine Function\n" << "********** LOWERING SUBREG INSTRS **********\n" << "********** Function: " << MF.getFunction()->getName() << '\n'); diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp index 73b15ed..337eab1 100644 --- a/lib/CodeGen/MachOWriter.cpp +++ b/lib/CodeGen/MachOWriter.cpp @@ -33,6 +33,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetMachOWriterInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/OutputBuffer.h" #include "llvm/Support/ErrorHandling.h" @@ -634,7 +635,7 @@ void MachOWriter::InitMem(const Constant *C, uintptr_t Offset, } case Instruction::Add: default: - errs() << "ConstantExpr not handled as global var init: " << *CE <<"\n"; + dbgs() << "ConstantExpr not handled as global var init: " << *CE <<"\n"; llvm_unreachable(0); } } else if (PC->getType()->isSingleValueType()) { @@ -732,7 +733,7 @@ void MachOWriter::InitMem(const Constant *C, uintptr_t Offset, WorkList.push_back(CPair(CPS->getOperand(i), PA+SL->getElementOffset(i))); } else { - errs() << "Bad Type: " << *PC->getType() << "\n"; + dbgs() << "Bad Type: " << *PC->getType() << "\n"; llvm_unreachable("Unknown constant type to initialize memory with!"); } } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 74a0d57..e2ce642 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -19,6 +19,7 @@ #include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Assembly/Writer.h" @@ -158,7 +159,7 @@ bool MachineBasicBlock::isOnlyReachableByFallthrough() const { } void MachineBasicBlock::dump() const { - print(errs()); + print(dbgs()); } static inline void OutputReg(raw_ostream &os, unsigned RegNo, diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index dd6fd7e..ae9451c 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -299,7 +300,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, } void MachineFunction::dump() const { - print(errs()); + print(dbgs()); } void MachineFunction::print(raw_ostream &OS) const { @@ -519,7 +520,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ } void MachineFrameInfo::dump(const MachineFunction &MF) const { - print(MF, errs()); + print(MF, dbgs()); } //===----------------------------------------------------------------------===// @@ -579,7 +580,7 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { OS << '\n'; } -void MachineJumpTableInfo::dump() const { print(errs()); } +void MachineJumpTableInfo::dump() const { print(dbgs()); } //===----------------------------------------------------------------------===// @@ -702,4 +703,4 @@ void MachineConstantPool::print(raw_ostream &OS) const { } } -void MachineConstantPool::dump() const { print(errs()); } +void MachineConstantPool::dump() const { print(dbgs()); } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index a761c2d..cf3e3e1 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -28,11 +28,13 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/Metadata.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -277,10 +279,15 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << '>'; break; case MachineOperand::MO_BlockAddress: - OS << "<"; + OS << '<'; WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false); OS << '>'; break; + case MachineOperand::MO_Metadata: + OS << '<'; + WriteAsOperand(OS, getMetadata(), /*PrintType=*/false); + OS << '>'; + break; default: llvm_unreachable("Unrecognized operand type"); } @@ -1094,7 +1101,7 @@ unsigned MachineInstr::isConstantValuePHI() const { } void MachineInstr::dump() const { - errs() << " " << *this; + dbgs() << " " << *this; } void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { @@ -1313,3 +1320,12 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, true /*IsDead*/)); return true; } + +void MachineInstr::addRegisterDefined(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo) { + MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo); + if (!MO || MO->getSubReg()) + addOperand(MachineOperand::CreateReg(IncomingReg, + true /*IsDef*/, + true /*IsImp*/)); +} diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 0a57ea1..ffcc8ab 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -161,7 +161,7 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { /// loop. /// bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - DEBUG(errs() << "******** Machine LICM ********\n"); + DEBUG(dbgs() << "******** Machine LICM ********\n"); Changed = FirstInLoop = false; MCP = MF.getConstantPool(); @@ -253,28 +253,28 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } DEBUG({ - errs() << "--- Checking if we can hoist " << I; + dbgs() << "--- Checking if we can hoist " << I; if (I.getDesc().getImplicitUses()) { - errs() << " * Instruction has implicit uses:\n"; + dbgs() << " * Instruction has implicit uses:\n"; const TargetRegisterInfo *TRI = TM->getRegisterInfo(); for (const unsigned *ImpUses = I.getDesc().getImplicitUses(); *ImpUses; ++ImpUses) - errs() << " -> " << TRI->getName(*ImpUses) << "\n"; + dbgs() << " -> " << TRI->getName(*ImpUses) << "\n"; } if (I.getDesc().getImplicitDefs()) { - errs() << " * Instruction has implicit defines:\n"; + dbgs() << " * Instruction has implicit defines:\n"; const TargetRegisterInfo *TRI = TM->getRegisterInfo(); for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs(); *ImpDefs; ++ImpDefs) - errs() << " -> " << TRI->getName(*ImpDefs) << "\n"; + dbgs() << " -> " << TRI->getName(*ImpDefs) << "\n"; } }); if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) { - DEBUG(errs() << "Cannot hoist with implicit defines or uses\n"); + DEBUG(dbgs() << "Cannot hoist with implicit defines or uses\n"); return false; } @@ -479,7 +479,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, return false; if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { - DEBUG(errs() << "CSEing " << *MI << " with " << *Dup); + DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) @@ -506,14 +506,14 @@ void MachineLICM::Hoist(MachineInstr *MI) { // Now move the instructions to the predecessor, inserting it before any // terminator instructions. DEBUG({ - errs() << "Hoisting " << *MI; + dbgs() << "Hoisting " << *MI; if (CurPreheader->getBasicBlock()) - errs() << " to MachineBasicBlock " + dbgs() << " to MachineBasicBlock " << CurPreheader->getName(); if (MI->getParent()->getBasicBlock()) - errs() << " from MachineBasicBlock " + dbgs() << " from MachineBasicBlock " << MI->getParent()->getName(); - errs() << "\n"; + dbgs() << "\n"; }); // If this is the first instruction being hoisted to the preheader, diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index d561a5b..269538b 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" using namespace llvm; namespace llvm { @@ -73,3 +74,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { } return BotMBB; } + +void MachineLoop::dump() const { + print(dbgs()); +} diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 292096f..467ea5d 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -210,7 +210,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); - DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI->getOperand(0).getReg(); } @@ -383,7 +383,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ InsertedPHI->eraseFromParent(); InsertedVal = ConstVal; } else { - DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index e040738..c177e3c 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -90,7 +90,7 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { - DEBUG(errs() << "******** Machine Sinking ********\n"); + DEBUG(dbgs() << "******** Machine Sinking ********\n"); const TargetMachine &TM = MF.getTarget(); TII = TM.getInstrInfo(); @@ -255,15 +255,15 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (MI->getParent() == SuccToSinkTo) return false; - DEBUG(errs() << "Sink instr " << *MI); - DEBUG(errs() << "to block " << *SuccToSinkTo); + DEBUG(dbgs() << "Sink instr " << *MI); + DEBUG(dbgs() << "to block " << *SuccToSinkTo); // If the block has multiple predecessors, this would introduce computation on // a path that it doesn't already exist. We could split the critical edge, // but for now we just punt. // FIXME: Split critical edges if not backedges. if (SuccToSinkTo->pred_size() > 1) { - DEBUG(errs() << " *** PUNTING: Critical edge found\n"); + DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); return false; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 0772319..584c21b 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -190,8 +190,7 @@ namespace { void report(const char *msg, const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); - void calcMaxRegsPassed(); - void calcMinRegsPassed(); + void calcRegsPassed(); void checkPHIOps(const MachineBasicBlock *MBB); void calcRegsRequired(); @@ -710,7 +709,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { // Calculate the largest possible vregsPassed sets. These are the registers that // can pass through an MBB live, but may not be live every time. It is assumed // that all vregsPassed sets are empty before the call. -void MachineVerifier::calcMaxRegsPassed() { +void MachineVerifier::calcRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. DenseSet<const MachineBasicBlock*> todo; @@ -745,45 +744,9 @@ void MachineVerifier::calcMaxRegsPassed() { } } -// Calculate the minimum vregsPassed set. These are the registers that always -// pass live through an MBB. The calculation assumes that calcMaxRegsPassed has -// been called earlier. -void MachineVerifier::calcMinRegsPassed() { - DenseSet<const MachineBasicBlock*> todo; - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) - todo.insert(MFI); - - while (!todo.empty()) { - const MachineBasicBlock *MBB = *todo.begin(); - todo.erase(MBB); - BBInfo &MInfo = MBBInfoMap[MBB]; - - // Remove entries from vRegsPassed that are not live out from all - // reachable predecessors. - RegSet dead; - for (RegSet::iterator I = MInfo.vregsPassed.begin(), - E = MInfo.vregsPassed.end(); I != E; ++I) { - for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), - PrE = MBB->pred_end(); PrI != PrE; ++PrI) { - BBInfo &PrInfo = MBBInfoMap[*PrI]; - if (PrInfo.reachable && !PrInfo.isLiveOut(*I)) { - dead.insert(*I); - break; - } - } - } - // If any regs removed, we need to recheck successors. - if (!dead.empty()) { - set_subtract(MInfo.vregsPassed, dead); - todo.insert(MBB->succ_begin(), MBB->succ_end()); - } - } -} - // Calculate the set of virtual registers that must be passed through each basic // block in order to satisfy the requirements of successor blocks. This is very -// similar to calcMaxRegsPassed, only backwards. +// similar to calcRegsPassed, only backwards. void MachineVerifier::calcRegsRequired() { // First push live-in regs to predecessors' vregsRequired. DenseSet<const MachineBasicBlock*> todo; @@ -817,7 +780,7 @@ void MachineVerifier::calcRegsRequired() { } // Check PHI instructions at the beginning of MBB. It is assumed that -// calcMinRegsPassed has been run so BBInfo::isLiveOut is valid. +// calcRegsPassed has been run so BBInfo::isLiveOut is valid. void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) { @@ -848,9 +811,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { } void MachineVerifier::visitMachineFunctionAfter() { - calcMaxRegsPassed(); + calcRegsPassed(); - // With the maximal set of vregsPassed we can verify dead-in registers. for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { BBInfo &MInfo = MBBInfoMap[MFI]; @@ -859,31 +821,16 @@ void MachineVerifier::visitMachineFunctionAfter() { if (!MInfo.reachable) continue; - for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), - PrE = MFI->pred_end(); PrI != PrE; ++PrI) { - BBInfo &PrInfo = MBBInfoMap[*PrI]; - if (!PrInfo.reachable) - continue; - - // Verify physical live-ins. EH landing pads have magic live-ins so we - // ignore them. - if (!MFI->isLandingPad()) { - for (MachineBasicBlock::const_livein_iterator I = MFI->livein_begin(), - E = MFI->livein_end(); I != E; ++I) { - if (TargetRegisterInfo::isPhysicalRegister(*I) && - !isReserved (*I) && !PrInfo.isLiveOut(*I)) { - report("Live-in physical register is not live-out from predecessor", - MFI); - *OS << "Register " << TRI->getName(*I) - << " is not live-out from BB#" << (*PrI)->getNumber() - << ".\n"; - } - } - } + checkPHIOps(MFI); + // Verify dead-in virtual registers. + if (!allowVirtDoubleDefs) { + for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), + PrE = MFI->pred_end(); PrI != PrE; ++PrI) { + BBInfo &PrInfo = MBBInfoMap[*PrI]; + if (!PrInfo.reachable) + continue; - // Verify dead-in virtual registers. - if (!allowVirtDoubleDefs) { for (RegMap::iterator I = MInfo.vregsDeadIn.begin(), E = MInfo.vregsDeadIn.end(); I != E; ++I) { // DeadIn register must be in neither regsLiveOut or vregsPassed of @@ -899,39 +846,6 @@ void MachineVerifier::visitMachineFunctionAfter() { } } - calcMinRegsPassed(); - - // With the minimal set of vregsPassed we can verify live-in virtual - // registers, including PHI instructions. - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - BBInfo &MInfo = MBBInfoMap[MFI]; - - // Skip unreachable MBBs. - if (!MInfo.reachable) - continue; - - checkPHIOps(MFI); - - for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), - PrE = MFI->pred_end(); PrI != PrE; ++PrI) { - BBInfo &PrInfo = MBBInfoMap[*PrI]; - if (!PrInfo.reachable) - continue; - - for (RegMap::iterator I = MInfo.vregsLiveIn.begin(), - E = MInfo.vregsLiveIn.end(); I != E; ++I) { - if (!PrInfo.isLiveOut(I->first)) { - report("Used virtual register is not live-in", I->second); - *OS << "Register %reg" << I->first - << " is not live-out from predecessor MBB #" - << (*PrI)->getNumber() - << ".\n"; - } - } - } - } - // Now check LiveVariables info if available if (LiveVars) { calcRegsRequired(); diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp new file mode 100644 index 0000000..625ff89 --- /dev/null +++ b/lib/CodeGen/OptimizeExts.cpp @@ -0,0 +1,185 @@ +//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs optimization of sign / zero extension instructions. It +// may be extended to handle other instructions of similar property. +// +// On some targets, some instructions, e.g. X86 sign / zero extension, may +// leave the source value in the lower part of the result. This pass will +// replace (some) uses of the pre-extension value with uses of the sub-register +// of the results. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ext-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden, + cl::desc("Aggressive extension optimization")); + +STATISTIC(NumReuse, "Number of extension results reused"); + +namespace { + class OptimizeExts : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + MachineDominatorTree *DT; // Machine dominator tree + + public: + static char ID; // Pass identification + OptimizeExts() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + if (Aggressive) { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + } + } + + private: + bool OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &LocalMIs); + }; +} + +char OptimizeExts::ID = 0; +static RegisterPass<OptimizeExts> +X("opt-exts", "Optimize sign / zero extensions"); + +FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); } + +/// OptimizeInstr - If instruction is a copy-like instruction, i.e. it reads +/// a single register and writes a single register and it does not modify +/// the source, and if the source value is preserved as a sub-register of +/// the result, then replace all reachable uses of the source with the subreg +/// of the result. +bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &LocalMIs) { + bool Changed = false; + LocalMIs.insert(MI); + + unsigned SrcReg, DstReg, SubIdx; + if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) { + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg); + if (++UI == MRI->use_end()) + // No other uses. + return false; + + // Ok, the source has other uses. See if we can replace the other uses + // with use of the result of the extension. + SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; + UI = MRI->use_begin(DstReg); + for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE; + ++UI) + ReachedBBs.insert(UI->getParent()); + + bool ExtendLife = true; + // Uses that are in the same BB of uses of the result of the instruction. + SmallVector<MachineOperand*, 8> Uses; + // Uses that the result of the instruction can reach. + SmallVector<MachineOperand*, 8> ExtendedUses; + + UI = MRI->use_begin(SrcReg); + for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE; + ++UI) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + if (UseMI == MI) + continue; + if (UseMI->getOpcode() == TargetInstrInfo::PHI) { + ExtendLife = false; + continue; + } + + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (UseMBB == MBB) { + // Local uses that come after the extension. + if (!LocalMIs.count(UseMI)) + Uses.push_back(&UseMO); + } else if (ReachedBBs.count(UseMBB)) + // Non-local uses where the result of extension is used. Always + // replace these unless it's a PHI. + Uses.push_back(&UseMO); + else if (Aggressive && DT->dominates(MBB, UseMBB)) + // We may want to extend live range of the extension result in order + // to replace these uses. + ExtendedUses.push_back(&UseMO); + else { + // Both will be live out of the def MBB anyway. Don't extend live + // range of the extension result. + ExtendLife = false; + break; + } + } + + if (ExtendLife && !ExtendedUses.empty()) + // Ok, we'll extend the liveness of the extension result. + std::copy(ExtendedUses.begin(), ExtendedUses.end(), + std::back_inserter(Uses)); + + // Now replace all uses. + if (!Uses.empty()) { + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + for (unsigned i = 0, e = Uses.size(); i != e; ++i) { + MachineOperand *UseMO = Uses[i]; + MachineInstr *UseMI = UseMO->getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + unsigned NewVR = MRI->createVirtualRegister(RC); + BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR) + .addReg(DstReg).addImm(SubIdx); + UseMO->setReg(NewVR); + ++NumReuse; + Changed = true; + } + } + } + + return Changed; +} + +bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) { + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + MRI = &MF.getRegInfo(); + DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; + + bool Changed = false; + + SmallPtrSet<MachineInstr*, 8> LocalMIs; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + LocalMIs.clear(); + for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME; + ++MII) { + MachineInstr *MI = &*MII; + Changed |= OptimizeInstr(MI, MBB, LocalMIs); + } + } + + return Changed; +} diff --git a/lib/CodeGen/PBQP/AnnotatedGraph.h b/lib/CodeGen/PBQP/AnnotatedGraph.h index a47dce9..738dea0 100644 --- a/lib/CodeGen/PBQP/AnnotatedGraph.h +++ b/lib/CodeGen/PBQP/AnnotatedGraph.h @@ -1,4 +1,4 @@ -//===-- AnnotatedGraph.h - Annotated PBQP Graph ----------------*- C++ --*-===// +//===-- AnnotatedGraph.h - Annotated PBQP Graph -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/ExhaustiveSolver.h b/lib/CodeGen/PBQP/ExhaustiveSolver.h index b2f2e6f..35ec4f1 100644 --- a/lib/CodeGen/PBQP/ExhaustiveSolver.h +++ b/lib/CodeGen/PBQP/ExhaustiveSolver.h @@ -1,4 +1,4 @@ -//===-- ExhaustiveSolver.h - Brute Force PBQP Solver -----------*- C++ --*-===// +//===-- ExhaustiveSolver.h - Brute Force PBQP Solver ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/GraphBase.h b/lib/CodeGen/PBQP/GraphBase.h index 0c7493b..becd98a 100644 --- a/lib/CodeGen/PBQP/GraphBase.h +++ b/lib/CodeGen/PBQP/GraphBase.h @@ -1,4 +1,4 @@ -//===-- GraphBase.h - Abstract Base PBQP Graph -----------------*- C++ --*-===// +//===-- GraphBase.h - Abstract Base PBQP Graph ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h index 1670877..f78a58a 100644 --- a/lib/CodeGen/PBQP/HeuristicSolver.h +++ b/lib/CodeGen/PBQP/HeuristicSolver.h @@ -1,4 +1,4 @@ -//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ --*-===// +//===-- HeuristicSolver.h - Heuristic PBQP Solver ---------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h index 3ac9e70..1228f65 100644 --- a/lib/CodeGen/PBQP/Heuristics/Briggs.h +++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -1,4 +1,4 @@ -//===-- Briggs.h --- Briggs Heuristic for PBQP -----------------*- C++ --*-===// +//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/PBQPMath.h b/lib/CodeGen/PBQP/PBQPMath.h index 11f4b4b..20737a2 100644 --- a/lib/CodeGen/PBQP/PBQPMath.h +++ b/lib/CodeGen/PBQP/PBQPMath.h @@ -1,4 +1,4 @@ -//===-- PBQPMath.h - PBQP Vector and Matrix classes ------------*- C++ --*-===// +//===-- PBQPMath.h - PBQP Vector and Matrix classes -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/SimpleGraph.h b/lib/CodeGen/PBQP/SimpleGraph.h index 1ca9cae..13e63ce 100644 --- a/lib/CodeGen/PBQP/SimpleGraph.h +++ b/lib/CodeGen/PBQP/SimpleGraph.h @@ -1,4 +1,4 @@ -//===-- SimpleGraph.h - Simple PBQP Graph ----------------------*- C++ --*-===// +//===-- SimpleGraph.h - Simple PBQP Graph -----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h index c91e2fa..aee684d 100644 --- a/lib/CodeGen/PBQP/Solution.h +++ b/lib/CodeGen/PBQP/Solution.h @@ -1,4 +1,4 @@ -//===-- Solution.h ------- PBQP Solution -----------------------*- C++ --*-===// +//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PBQP/Solver.h b/lib/CodeGen/PBQP/Solver.h index a9c5f83..a445de8 100644 --- a/lib/CodeGen/PBQP/Solver.h +++ b/lib/CodeGen/PBQP/Solver.h @@ -1,4 +1,4 @@ -//===-- Solver.h ------- PBQP solver interface -----------------*- C++ --*-===// +//===-- Solver.h ------- PBQP solver interface ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 58c3dec..365df30 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -207,7 +207,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( IncomingReg = entry; reusedIncoming = true; ++NumReused; - DEBUG(errs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); + DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); } else { entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } @@ -234,7 +234,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( // AfterPHIsIt, so it appears before the current PHICopy. if (reusedIncoming) if (MachineInstr *OldKill = VI.findKill(&MBB)) { - DEBUG(errs() << "Remove old kill from " << *OldKill); + DEBUG(dbgs() << "Remove old kill from " << *OldKill); LV->removeVirtualRegisterKilled(IncomingReg, OldKill); DEBUG(MBB.dump()); } @@ -421,7 +421,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB); - DEBUG(errs() << "PHIElimination splitting critical edge:" + DEBUG(dbgs() << "PHIElimination splitting critical edge:" " BB#" << A->getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << B->getNumber() << '\n'); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 79be295..f43395f 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -233,7 +233,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TargetSubtarget::ANTIDEP_NONE; } - DEBUG(errs() << "PostRAScheduler\n"); + DEBUG(dbgs() << "PostRAScheduler\n"); const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); @@ -258,7 +258,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; - errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << + dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << ":BB#" << MBB->getNumber() << " ***\n"; } #endif @@ -342,7 +342,7 @@ void SchedulePostRATDList::Schedule() { } } - DEBUG(errs() << "********** List Scheduling **********\n"); + DEBUG(dbgs() << "********** List Scheduling **********\n"); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -448,7 +448,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, /// incorrect by instruction reordering. /// void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { - DEBUG(errs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); + DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); std::set<unsigned> killedRegs; BitVector ReservedRegs = TRI->getReservedRegs(MF); @@ -511,7 +511,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { } if (MO.isKill() != kill) { - DEBUG(errs() << "Fixing " << MO << " in "); + DEBUG(dbgs() << "Fixing " << MO << " in "); // Warning: ToggleKillFlag may invalidate MO. ToggleKillFlag(MI, MO); DEBUG(MI->dump()); @@ -549,9 +549,9 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - errs() << " has been released too many times!\n"; + dbgs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif @@ -580,7 +580,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); Sequence.push_back(SU); @@ -640,11 +640,11 @@ void SchedulePostRATDList::ListScheduleTopDown() { MinDepth = PendingQueue[i]->getDepth(); } - DEBUG(errs() << "\n*** Examining Available\n"; + DEBUG(dbgs() << "\n*** Examining Available\n"; LatencyPriorityQueue q = AvailableQueue; while (!q.empty()) { SUnit *su = q.pop(); - errs() << "Height " << su->getHeight() << ": "; + dbgs() << "Height " << su->getHeight() << ": "; su->dump(this); }); @@ -689,19 +689,19 @@ void SchedulePostRATDList::ListScheduleTopDown() { } } else { if (CycleHasInsts) { - DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n'); + DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); HazardRec->AdvanceCycle(); } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, // just advance the current cycle and try again. - DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n'); + DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n'); HazardRec->AdvanceCycle(); ++NumStalls; } else { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n'); + DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); HazardRec->EmitNoop(); Sequence.push_back(0); // NULL here means noop ++NumNoops; diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 1c5222c..8cbc8c2 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -481,32 +481,21 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, // Search for the use in this block that precedes the instruction we care // about, going to the fallback case if we don't find it. - if (UseI == MBB->begin()) - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - MachineBasicBlock::iterator Walker = UseI; - --Walker; bool found = false; while (Walker != MBB->begin()) { + --Walker; if (BlockUses.count(Walker)) { found = true; break; } - --Walker; - } - - // Must check begin() too. - if (!found) { - if (BlockUses.count(Walker)) - found = true; - else - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); } + if (!found) + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + SlotIndex UseIndex = LIs->getInstructionIndex(Walker); UseIndex = UseIndex.getUseIndex(); SlotIndex EndIndex; @@ -533,17 +522,11 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, // This case is basically a merging of the two preceding case, with the // special note that checking for defs must take precedence over checking // for uses, because of two-address instructions. - - if (UseI == MBB->begin()) - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses, - NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - MachineBasicBlock::iterator Walker = UseI; - --Walker; bool foundDef = false; bool foundUse = false; while (Walker != MBB->begin()) { + --Walker; if (BlockDefs.count(Walker)) { foundDef = true; break; @@ -551,21 +534,13 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, foundUse = true; break; } - --Walker; - } - - // Must check begin() too. - if (!foundDef && !foundUse) { - if (BlockDefs.count(Walker)) - foundDef = true; - else if (BlockUses.count(Walker)) - foundUse = true; - else - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); } + if (!foundDef && !foundUse) + return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, + Uses, NewVNs, LiveOut, Phis, + IsTopLevel, IsIntraBlock); + SlotIndex StartIndex = LIs->getInstructionIndex(Walker); StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex(); SlotIndex EndIndex; @@ -1022,7 +997,7 @@ MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, /// so it would not cross the barrier that's being processed. Shrink wrap /// (minimize) the live interval to the last uses. bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { - DEBUG(errs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier + DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier << " result: "); CurrLI = LI; @@ -1039,7 +1014,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { // If this would create a new join point, do not split. if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) { - DEBUG(errs() << "FAILED (would create a new join point).\n"); + DEBUG(dbgs() << "FAILED (would create a new join point).\n"); return false; } @@ -1056,13 +1031,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { MachineBasicBlock::iterator RestorePt = findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB); if (RestorePt == BarrierMBB->end()) { - DEBUG(errs() << "FAILED (could not find a suitable restore point).\n"); + DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n"); return false; } if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI)) if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) { - DEBUG(errs() << "success (remat).\n"); + DEBUG(dbgs() << "success (remat).\n"); return true; } @@ -1081,7 +1056,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { MachineBasicBlock::iterator SpillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); if (SpillPt == BarrierMBB->begin()) { - DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); + DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. } // Add spill. @@ -1096,7 +1071,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { // If it's already split, just restore the value. There is no need to spill // the def again. if (!DefMI) { - DEBUG(errs() << "FAILED (def is dead).\n"); + DEBUG(dbgs() << "FAILED (def is dead).\n"); return false; // Def is dead. Do nothing. } @@ -1111,13 +1086,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI, RefsInMBB); if (SpillPt == DefMBB->begin()) { - DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); + DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. } } else { SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); if (SpillPt == DefMBB->end()) { - DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); + DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. } } @@ -1160,7 +1135,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { } ++NumSplits; - DEBUG(errs() << "success.\n"); + DEBUG(dbgs() << "success.\n"); return true; } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index c9a33d8..a00f450 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -61,7 +61,7 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, /// implicit_def defs and their uses. bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { - DEBUG(errs() << "********** PROCESS IMPLICIT DEFS **********\n" + DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" << "********** Function: " << ((Value*)fn.getFunction())->getName() << '\n'); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 9e97d89..8e44a57 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -277,7 +277,7 @@ namespace { bool Error = false; for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { if (regUse_[i] != 0) { - errs() << tri_->getName(i) << " is still in use!\n"; + dbgs() << tri_->getName(i) << " is still in use!\n"; Error = true; } } @@ -344,16 +344,16 @@ namespace { void printIntervals(const char* const str, ItTy i, ItTy e) const { DEBUG({ if (str) - errs() << str << " intervals:\n"; + dbgs() << str << " intervals:\n"; for (; i != e; ++i) { - errs() << "\t" << *i->first << " -> "; + dbgs() << "\t" << *i->first << " -> "; unsigned reg = i->first->reg; if (TargetRegisterInfo::isVirtualRegister(reg)) reg = vrm_->getPhys(reg); - errs() << tri_->getName(reg) << '\n'; + dbgs() << tri_->getName(reg) << '\n'; } }); } @@ -455,7 +455,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { return Reg; // Try to coalesce. - DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) + DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) << '\n'); vrm_->clearVirt(cur.reg); vrm_->assignVirt2Phys(cur.reg, CandReg); @@ -544,7 +544,7 @@ void RALinScan::initIntervalSets() void RALinScan::linearScan() { // linear scan algorithm DEBUG({ - errs() << "********** LINEAR SCAN **********\n" + dbgs() << "********** LINEAR SCAN **********\n" << "********** Function: " << mf_->getFunction()->getName() << '\n'; printIntervals("fixed", fixed_.begin(), fixed_.end()); @@ -555,7 +555,7 @@ void RALinScan::linearScan() { LiveInterval* cur = unhandled_.top(); unhandled_.pop(); ++NumIters; - DEBUG(errs() << "\n*** CURRENT ***: " << *cur << '\n'); + DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n'); assert(!cur->empty() && "Empty interval in unhandled set."); @@ -580,7 +580,7 @@ void RALinScan::linearScan() { while (!active_.empty()) { IntervalPtr &IP = active_.back(); unsigned reg = IP.first->reg; - DEBUG(errs() << "\tinterval " << *IP.first << " expired\n"); + DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -592,7 +592,7 @@ void RALinScan::linearScan() { DEBUG({ for (IntervalPtrs::reverse_iterator i = inactive_.rbegin(); i != inactive_.rend(); ++i) - errs() << "\tinterval " << *i->first << " expired\n"; + dbgs() << "\tinterval " << *i->first << " expired\n"; }); inactive_.clear(); @@ -628,7 +628,7 @@ void RALinScan::linearScan() { } } - DEBUG(errs() << *vrm_); + DEBUG(dbgs() << *vrm_); // Look for physical registers that end up not being allocated even though // register allocator had to spill other registers in its register class. @@ -642,7 +642,7 @@ void RALinScan::linearScan() { /// to the inactive list. void RALinScan::processActiveIntervals(SlotIndex CurPoint) { - DEBUG(errs() << "\tprocessing active intervals:\n"); + DEBUG(dbgs() << "\tprocessing active intervals:\n"); for (unsigned i = 0, e = active_.size(); i != e; ++i) { LiveInterval *Interval = active_[i].first; @@ -652,7 +652,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // Remove expired intervals. - DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n"); + DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -665,7 +665,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint) } else if (IntervalPos->start > CurPoint) { // Move inactive intervals to inactive list. - DEBUG(errs() << "\t\tinterval " << *Interval << " inactive\n"); + DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -688,7 +688,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint) /// ones to the active list. void RALinScan::processInactiveIntervals(SlotIndex CurPoint) { - DEBUG(errs() << "\tprocessing inactive intervals:\n"); + DEBUG(dbgs() << "\tprocessing inactive intervals:\n"); for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { LiveInterval *Interval = inactive_[i].first; @@ -698,7 +698,7 @@ void RALinScan::processInactiveIntervals(SlotIndex CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // remove expired intervals. - DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n"); + DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); // Pop off the end of the list. inactive_[i] = inactive_.back(); @@ -706,7 +706,7 @@ void RALinScan::processInactiveIntervals(SlotIndex CurPoint) --i; --e; } else if (IntervalPos->start <= CurPoint) { // move re-activated intervals in active list - DEBUG(errs() << "\t\tinterval " << *Interval << " active\n"); + DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -834,10 +834,10 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur, SmallVector<LiveInterval*, 8> SLIs[3]; DEBUG({ - errs() << "\tConsidering " << NumCands << " candidates: "; + dbgs() << "\tConsidering " << NumCands << " candidates: "; for (unsigned i = 0; i != NumCands; ++i) - errs() << tri_->getName(Candidates[i].first) << " "; - errs() << "\n"; + dbgs() << tri_->getName(Candidates[i].first) << " "; + dbgs() << "\n"; }); // Calculate the number of conflicts of each candidate. @@ -950,7 +950,7 @@ namespace { /// assignRegOrStackSlotAtInterval - assign a register if one is available, or /// spill. void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { - DEBUG(errs() << "\tallocating current interval: "); + DEBUG(dbgs() << "\tallocating current interval: "); // This is an implicitly defined live interval, just assign any register. const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); @@ -958,7 +958,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { unsigned physReg = vrm_->getRegAllocPref(cur->reg); if (!physReg) physReg = *RC->allocation_order_begin(*mf_); - DEBUG(errs() << tri_->getName(physReg) << '\n'); + DEBUG(dbgs() << tri_->getName(physReg) << '\n'); // Note the register is not really in use. vrm_->assignVirt2Phys(cur->reg, physReg); return; @@ -1092,7 +1092,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // the free physical register and add this interval to the active // list. if (physReg) { - DEBUG(errs() << tri_->getName(physReg) << '\n'); + DEBUG(dbgs() << tri_->getName(physReg) << '\n'); vrm_->assignVirt2Phys(cur->reg, physReg); addRegUse(physReg); active_.push_back(std::make_pair(cur, cur->begin())); @@ -1108,7 +1108,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { } return; } - DEBUG(errs() << "no free registers\n"); + DEBUG(dbgs() << "no free registers\n"); // Compile the spill weights into an array that is better for scanning. std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f); @@ -1126,7 +1126,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { updateSpillWeights(SpillWeights, reg, i->first->weight, RC); } - DEBUG(errs() << "\tassigning stack slot at interval "<< *cur << ":\n"); + DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n"); // Find a register to spill. float minWeight = HUGE_VALF; @@ -1196,10 +1196,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { } DEBUG({ - errs() << "\t\tregister(s) with min weight(s): "; + dbgs() << "\t\tregister(s) with min weight(s): "; for (unsigned i = 0; i != LastCandidate; ++i) - errs() << tri_->getName(RegsWeights[i].first) + dbgs() << tri_->getName(RegsWeights[i].first) << " (" << RegsWeights[i].second << ")\n"; }); @@ -1207,7 +1207,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // add any added intervals back to unhandled, and restart // linearscan. if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { - DEBUG(errs() << "\t\t\tspilling(c): " << *cur << '\n'); + DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); SmallVector<LiveInterval*, 8> spillIs; std::vector<LiveInterval*> added; @@ -1285,7 +1285,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { while (!spillIs.empty()) { LiveInterval *sli = spillIs.back(); spillIs.pop_back(); - DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n'); + DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); if (sli->beginIndex() < earliestStart) earliestStart = sli->beginIndex(); @@ -1296,7 +1296,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { spilled.insert(sli->reg); } - DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n'); + DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n'); // Scan handled in reverse order up to the earliest start of a // spilled live interval and undo each one, restoring the state of @@ -1306,7 +1306,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // If this interval starts before t we are done. if (!i->empty() && i->beginIndex() < earliestStart) break; - DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n'); + DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n'); handled_.pop_back(); // When undoing a live interval allocation we must know if it is active or @@ -1356,7 +1356,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { LiveInterval *HI = handled_[i]; if (!HI->expiredAt(earliestStart) && HI->expiredAt(cur->beginIndex())) { - DEBUG(errs() << "\t\t\tundo changes for: " << *HI << '\n'); + DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n'); active_.push_back(std::make_pair(HI, HI->begin())); assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); addRegUse(vrm_->getPhys(HI->reg)); @@ -1492,7 +1492,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { // available first. unsigned Preference = vrm_->getRegAllocPref(cur->reg); if (Preference) { - DEBUG(errs() << "(preferred: " << tri_->getName(Preference) << ") "); + DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") "); if (isRegAvail(Preference) && RC->contains(Preference)) return Preference; diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index aea5cff..cbb5826 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -296,11 +296,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, assert(VirtReg && "Spilling a physical register is illegal!" " Must not have appropriate kill for the register or use exists beyond" " the intended one."); - DEBUG(errs() << " Spilling register " << TRI->getName(PhysReg) + DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg) << " containing %reg" << VirtReg); if (!isVirtRegModified(VirtReg)) { - DEBUG(errs() << " which has not been modified, so no store necessary!"); + DEBUG(dbgs() << " which has not been modified, so no store necessary!"); std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg); if (LastUse.first) LastUse.first->getOperand(LastUse.second).setIsKill(); @@ -310,7 +310,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, // modified. const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(errs() << " to stack slot #" << FrameIndex); + DEBUG(dbgs() << " to stack slot #" << FrameIndex); // If the instruction reads the register that's spilled, (e.g. this can // happen if it is a move to a physical register), then the spill // instruction is not a kill. @@ -321,7 +321,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); removePhysReg(PhysReg); } @@ -516,7 +516,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded - DEBUG(errs() << " Reloading %reg" << VirtReg << " into " + DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " << TRI->getName(PhysReg) << "\n"); // Add move instruction(s) @@ -725,7 +725,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { DEBUG({ const BasicBlock *LBB = MBB.getBasicBlock(); if (LBB) - errs() << "\nStarting RegAlloc of BB: " << LBB->getName(); + dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName(); }); // Add live-in registers as active. @@ -752,13 +752,13 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { MachineInstr *MI = MII++; const TargetInstrDesc &TID = MI->getDesc(); DEBUG({ - errs() << "\nStarting RegAlloc of: " << *MI; - errs() << " Regs have values: "; + dbgs() << "\nStarting RegAlloc of: " << *MI; + dbgs() << " Regs have values: "; for (unsigned i = 0; i != TRI->getNumRegs(); ++i) if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) - errs() << "[" << TRI->getName(i) + dbgs() << "[" << TRI->getName(i) << ",%reg" << PhysRegsUsed[i] << "] "; - errs() << '\n'; + dbgs() << '\n'; }); // Determine whether this is a copy instruction. The cases where the @@ -809,7 +809,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { markVirtRegModified(DestVirtReg); getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DEBUG(errs() << " Assigning " << TRI->getName(DestPhysReg) + DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) << " to %reg" << DestVirtReg << "\n"); MO.setReg(DestPhysReg); // Assign the earlyclobber register } else { @@ -876,13 +876,13 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } if (PhysReg) { - DEBUG(errs() << " Last use of " << TRI->getName(PhysReg) + DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg) << "[%reg" << VirtReg <<"], removing it from live set\n"); removePhysReg(PhysReg); for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); *SubRegs; ++SubRegs) { if (PhysRegsUsed[*SubRegs] != -2) { - DEBUG(errs() << " Last use of " + DEBUG(dbgs() << " Last use of " << TRI->getName(*SubRegs) << "[%reg" << VirtReg <<"], removing it from live set\n"); removePhysReg(*SubRegs); @@ -978,7 +978,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { MF->getRegInfo().setPhysRegUsed(DestPhysReg); markVirtRegModified(DestVirtReg); getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DEBUG(errs() << " Assigning " << TRI->getName(DestPhysReg) + DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) << " to %reg" << DestVirtReg << "\n"); MO.setReg(DestPhysReg); // Assign the output register } @@ -1001,14 +1001,14 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } if (PhysReg) { - DEBUG(errs() << " Register " << TRI->getName(PhysReg) + DEBUG(dbgs() << " Register " << TRI->getName(PhysReg) << " [%reg" << VirtReg << "] is never used, removing it from live set\n"); removePhysReg(PhysReg); for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); *AliasSet; ++AliasSet) { if (PhysRegsUsed[*AliasSet] != -2) { - DEBUG(errs() << " Register " << TRI->getName(*AliasSet) + DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet) << " [%reg" << *AliasSet << "] is never used, removing it from live set\n"); removePhysReg(*AliasSet); @@ -1058,7 +1058,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { /// runOnMachineFunction - Register allocate the whole function /// bool RALocal::runOnMachineFunction(MachineFunction &Fn) { - DEBUG(errs() << "Machine Function\n"); + DEBUG(dbgs() << "Machine Function\n"); MF = &Fn; TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index c2014a7..fc59653 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -717,7 +717,7 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { // Get the physical reg, subtracting 1 to account for the spill option. unsigned physReg = allowedSets[node][allocSelection - 1]; - DEBUG(errs() << "VREG " << virtReg << " -> " + DEBUG(dbgs() << "VREG " << virtReg << " -> " << tri->getName(physReg) << "\n"); assert(physReg != 0); @@ -741,7 +741,7 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { addStackInterval(spillInterval, mri); (void) oldSpillWeight; - DEBUG(errs() << "VREG " << virtReg << " -> SPILLED (Cost: " + DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: " << oldSpillWeight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to @@ -752,12 +752,12 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { assert(!(*itr)->empty() && "Empty spill range."); - DEBUG(errs() << (*itr)->reg << " "); + DEBUG(dbgs() << (*itr)->reg << " "); vregIntervalsToAlloc.insert(*itr); } - DEBUG(errs() << ")\n"); + DEBUG(dbgs() << ")\n"); // We need another round if spill intervals were added. anotherRoundNeeded |= !newSpills.empty(); @@ -849,7 +849,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { vrm = &getAnalysis<VirtRegMap>(); - DEBUG(errs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n"); // Allocator main loop: // @@ -874,7 +874,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { unsigned round = 0; while (!pbqpAllocComplete) { - DEBUG(errs() << " PBQP Regalloc round " << round << ":\n"); + DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); PBQP::SimpleGraph problem = constructPBQPProblem(); PBQP::HeuristicSolver<PBQP::Heuristics::Briggs> solver; @@ -896,7 +896,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { node2LI.clear(); allowedSets.clear(); - DEBUG(errs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); + DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); // Run rewriter std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter()); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 71693d2..1f3e295 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -41,7 +41,7 @@ void ScheduleDAG::dumpSchedule() const { if (SUnit *SU = Sequence[i]) SU->dump(this); else - errs() << "**** NOOP ****\n"; + dbgs() << "**** NOOP ****\n"; } } @@ -61,9 +61,9 @@ void ScheduleDAG::Run(MachineBasicBlock *bb, Schedule(); DEBUG({ - errs() << "*** Final schedule ***\n"; + dbgs() << "*** Final schedule ***\n"; dumpSchedule(); - errs() << '\n'; + dbgs() << '\n'; }); } @@ -271,58 +271,58 @@ void SUnit::ComputeHeight() { /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. void SUnit::dump(const ScheduleDAG *G) const { - errs() << "SU(" << NodeNum << "): "; + dbgs() << "SU(" << NodeNum << "): "; G->dumpNode(this); } void SUnit::dumpAll(const ScheduleDAG *G) const { dump(G); - errs() << " # preds left : " << NumPredsLeft << "\n"; - errs() << " # succs left : " << NumSuccsLeft << "\n"; - errs() << " Latency : " << Latency << "\n"; - errs() << " Depth : " << Depth << "\n"; - errs() << " Height : " << Height << "\n"; + dbgs() << " # preds left : " << NumPredsLeft << "\n"; + dbgs() << " # succs left : " << NumSuccsLeft << "\n"; + dbgs() << " Latency : " << Latency << "\n"; + dbgs() << " Depth : " << Depth << "\n"; + dbgs() << " Height : " << Height << "\n"; if (Preds.size() != 0) { - errs() << " Predecessors:\n"; + dbgs() << " Predecessors:\n"; for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { - errs() << " "; + dbgs() << " "; switch (I->getKind()) { - case SDep::Data: errs() << "val "; break; - case SDep::Anti: errs() << "anti"; break; - case SDep::Output: errs() << "out "; break; - case SDep::Order: errs() << "ch "; break; + case SDep::Data: dbgs() << "val "; break; + case SDep::Anti: dbgs() << "anti"; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ch "; break; } - errs() << "#"; - errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + dbgs() << "#"; + dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) - errs() << " *"; - errs() << ": Latency=" << I->getLatency(); - errs() << "\n"; + dbgs() << " *"; + dbgs() << ": Latency=" << I->getLatency(); + dbgs() << "\n"; } } if (Succs.size() != 0) { - errs() << " Successors:\n"; + dbgs() << " Successors:\n"; for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); I != E; ++I) { - errs() << " "; + dbgs() << " "; switch (I->getKind()) { - case SDep::Data: errs() << "val "; break; - case SDep::Anti: errs() << "anti"; break; - case SDep::Output: errs() << "out "; break; - case SDep::Order: errs() << "ch "; break; + case SDep::Data: dbgs() << "val "; break; + case SDep::Anti: dbgs() << "anti"; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ch "; break; } - errs() << "#"; - errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + dbgs() << "#"; + dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) - errs() << " *"; - errs() << ": Latency=" << I->getLatency(); - errs() << "\n"; + dbgs() << " *"; + dbgs() << ": Latency=" << I->getLatency(); + dbgs() << "\n"; } } - errs() << "\n"; + dbgs() << "\n"; } #ifndef NDEBUG @@ -340,35 +340,35 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { continue; } if (!AnyNotSched) - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - errs() << "has not been scheduled!\n"; + dbgs() << "has not been scheduled!\n"; AnyNotSched = true; } if (SUnits[i].isScheduled && (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) > unsigned(INT_MAX)) { if (!AnyNotSched) - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - errs() << "has an unexpected " + dbgs() << "has an unexpected " << (isBottomUp ? "Height" : "Depth") << " value!\n"; AnyNotSched = true; } if (isBottomUp) { if (SUnits[i].NumSuccsLeft != 0) { if (!AnyNotSched) - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - errs() << "has successors left!\n"; + dbgs() << "has successors left!\n"; AnyNotSched = true; } } else { if (SUnits[i].NumPredsLeft != 0) { if (!AnyNotSched) - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - errs() << "has predecessors left!\n"; + dbgs() << "has predecessors left!\n"; AnyNotSched = true; } } diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp index 38839c4..4e6c1fc 100644 --- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp +++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -69,7 +70,7 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG - errs() << "Formal argument #" << i << " has unhandled type " + dbgs() << "Formal argument #" << i << " has unhandled type " << ArgVT.getEVTString(); #endif llvm_unreachable(0); @@ -102,7 +103,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG - errs() << "Return operand #" << i << " has unhandled type " + dbgs() << "Return operand #" << i << " has unhandled type " << VT.getEVTString(); #endif llvm_unreachable(0); @@ -121,7 +122,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG - errs() << "Call operand #" << i << " has unhandled type " + dbgs() << "Call operand #" << i << " has unhandled type " << ArgVT.getEVTString(); #endif llvm_unreachable(0); @@ -140,7 +141,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, ISD::ArgFlagsTy ArgFlags = Flags[i]; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG - errs() << "Call operand #" << i << " has unhandled type " + dbgs() << "Call operand #" << i << " has unhandled type " << ArgVT.getEVTString(); #endif llvm_unreachable(0); @@ -157,7 +158,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, ISD::ArgFlagsTy Flags = Ins[i].Flags; if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { #ifndef NDEBUG - errs() << "Call result #" << i << " has unhandled type " + dbgs() << "Call result #" << i << " has unhandled type " << VT.getEVTString(); #endif llvm_unreachable(0); @@ -170,7 +171,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) { if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { #ifndef NDEBUG - errs() << "Call result has unhandled type " + dbgs() << "Call result has unhandled type " << VT.getEVTString(); #endif llvm_unreachable(0); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e6aa14c..549527c 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -541,11 +541,11 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo) { assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; - DEBUG(errs() << "\nReplacing.1 "; + DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; To[0].getNode()->dump(&DAG); - errs() << " and " << NumTo-1 << " other values\n"; + dbgs() << " and " << NumTo-1 << " other values\n"; for (unsigned i = 0, e = NumTo; i != e; ++i) assert((!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && @@ -619,11 +619,11 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { // Replace the old value with the new one. ++NodesCombined; - DEBUG(errs() << "\nReplacing.2 "; + DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); - errs() << '\n'); + dbgs() << '\n'); CommitTargetLoweringOpt(TLO); return true; @@ -689,11 +689,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getNode()->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DEBUG(errs() << "\nReplacing.3 "; + DEBUG(dbgs() << "\nReplacing.3 "; N->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; RV.getNode()->dump(&DAG); - errs() << '\n'); + dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); @@ -1684,22 +1684,25 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { EVT VT = N0.getValueType(); assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); + // Bail early if none of these transforms apply. + if (N0.getNode()->getNumOperands() == 0) return SDValue(); + // For each of OP in AND/OR/XOR: // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) // // do not sink logical op inside of a vector extend, since it may combine // into a vsetcc. - if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| + EVT Op0VT = N0.getOperand(0).getValueType(); + if ((N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || - (N0.getOpcode() == ISD::TRUNCATE && - !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && + (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && !VT.isVector() && - N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && - (!LegalOperations || - TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) { + Op0VT == N1.getOperand(0).getValueType() && + (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); @@ -1839,6 +1842,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + // fold (zext_inreg (extload x)) -> (zextload x) if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -1885,48 +1889,69 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) - if (N1C && N0.getOpcode() == ISD::LOAD) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); + // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) + if (N1C && (N0.getOpcode() == ISD::LOAD || + (N0.getOpcode() == ISD::ANY_EXTEND && + N0.getOperand(0).getOpcode() == ISD::LOAD))) { + bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; + LoadSDNode *LN0 = HasAnyExt + ? cast<LoadSDNode>(N0.getOperand(0)) + : cast<LoadSDNode>(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && - // Do not change the width of a volatile load. - !LN0->isVolatile()) { - EVT ExtVT = MVT::Other; + LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); - if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())) - ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - - EVT LoadedVT = LN0->getMemoryVT(); - - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { - EVT PtrType = N0.getOperand(1).getValueType(); - - // For big endian targets, we need to add an offset to the pointer to - // load the correct bytes. For little endian systems, we merely need to - // read fewer bytes from the same pointer. - unsigned LVTStoreBytes = LoadedVT.getStoreSize(); - unsigned EVTStoreBytes = ExtVT.getStoreSize(); - unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - unsigned Alignment = LN0->getAlignment(); - SDValue NewPtr = LN0->getBasePtr(); - - if (TLI.isBigEndian()) { - NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, - NewPtr, DAG.getConstant(PtrOff, PtrType)); - Alignment = MinAlign(Alignment, PtrOff); + if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT LoadedVT = LN0->getMemoryVT(); + + if (ExtVT == LoadedVT && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + + SDValue NewLoad = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + CombineTo(LN0, NewLoad, NewLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } + + // Do not change the width of a volatile load. + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT PtrType = LN0->getOperand(1).getValueType(); + + unsigned Alignment = LN0->getAlignment(); + SDValue NewPtr = LN0->getBasePtr(); + + // For big endian targets, we need to add an offset to the pointer + // to load the correct bytes. For little endian systems, we merely + // need to read fewer bytes from the same pointer. + if (TLI.isBigEndian()) { + unsigned LVTStoreBytes = LoadedVT.getStoreSize(); + unsigned EVTStoreBytes = ExtVT.getStoreSize(); + unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; + NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr, DAG.getConstant(PtrOff, PtrType)); + Alignment = MinAlign(Alignment, PtrOff); + } - AddToWorkList(NewPtr.getNode()); - SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(), - NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - ExtVT, LN0->isVolatile(), Alignment); - AddToWorkList(N); - CombineTo(N0.getNode(), Load, Load.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + AddToWorkList(NewPtr.getNode()); + + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), Alignment); + AddToWorkList(N); + CombineTo(LN0, Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } } } } @@ -2555,10 +2580,14 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // sext_inreg. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); - EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); - if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); + if (VT.isVector()) + ExtVT = EVT::getVectorVT(*DAG.getContext(), + ExtVT, VT.getVectorNumElements()); + if ((!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, - N0.getOperand(0), DAG.getValueType(EVT)); + N0.getOperand(0), DAG.getValueType(ExtVT)); } // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) @@ -2778,9 +2807,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // However when after the source operand of SRL is optimized into AND, the SRL // itself may not be optimized further. Look for it and add the BRCOND into // the worklist. - if (N->hasOneUse() && - N->use_begin()->getOpcode() == ISD::BRCOND) - AddToWorkList(*N->use_begin()); + if (N->hasOneUse()) { + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { + // Also look pass the truncate. + Use = *Use->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + } + } return SDValue(); } @@ -3034,9 +3071,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // See if the value being truncated is already sign extended. If so, just // eliminate the trunc/sext pair. SDValue Op = N0.getOperand(0); - unsigned OpBits = Op.getValueType().getSizeInBits(); - unsigned MidBits = N0.getValueType().getSizeInBits(); - unsigned DestBits = VT.getSizeInBits(); + unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); + unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); + unsigned DestBits = VT.getScalarType().getSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op); if (OpBits == DestBits) { @@ -3059,12 +3096,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext (truncate x)) -> (sextinreg x). if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, N0.getValueType())) { - if (Op.getValueType().bitsLT(VT)) + if (OpBits < DestBits) Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); - else if (Op.getValueType().bitsGT(VT)) + else if (OpBits > DestBits) Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, - DAG.getValueType(N0.getValueType().getScalarType())); + DAG.getValueType(N0.getValueType())); } } @@ -3198,7 +3235,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) && + (!TLI.isTruncateFree(N0.getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -3322,7 +3362,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DebugLoc dl = N->getDebugLoc(); return DAG.getNode(N0.getOpcode(), dl, VT, DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), - DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); + DAG.getNode(ISD::ZERO_EXTEND, dl, + N0.getOperand(1).getValueType(), + N0.getOperand(1))); } return SDValue(); @@ -3512,7 +3554,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (VT.isVector()) return SDValue(); - // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then + // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; @@ -3586,7 +3628,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { EVT VT = N->getValueType(0); EVT EVT = cast<VTSDNode>(N1)->getVT(); unsigned VTBits = VT.getScalarType().getSizeInBits(); - unsigned EVTBits = EVT.getSizeInBits(); + unsigned EVTBits = EVT.getScalarType().getSizeInBits(); // fold (sext_in_reg c1) -> c1 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) @@ -3702,7 +3744,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); else // if the source and dest are the same type, we can drop both the extend - // and the truncate + // and the truncate. return N0.getOperand(0); } @@ -4513,6 +4555,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { N1.getOperand(0), N1.getOperand(1), N2); } + SDNode *Trunc = 0; + if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) { + // Look pass truncate. + Trunc = N1.getNode(); + N1 = N1.getOperand(0); + } + if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) { // Match this pattern so that we can generate simpler code: // @@ -4524,7 +4573,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // into // // %a = ... - // %b = and %a, 2 + // %b = and i32 %a, 2 // %c = setcc eq %b, 0 // brcond %c ... // @@ -4535,7 +4584,6 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Op1 = N1.getOperand(1); if (Op0.getOpcode() == ISD::AND && - Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant) { SDValue AndOp1 = Op0.getOperand(1); @@ -4550,12 +4598,21 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETNE); + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + // Don't add the new BRCond into the worklist or else SimplifySelectCC + // will convert it back to (X & C1) >> C2. + CombineTo(N, NewBRCond, false); + // Truncate is dead. + if (Trunc) { + removeFromWorkList(Trunc); + DAG.DeleteNode(Trunc); + } // Replace the uses of SRL with SETCC DAG.ReplaceAllUsesOfValueWith(N1, SetCC); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), - MVT::Other, Chain, SetCC, N2); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } @@ -4692,11 +4749,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; - DEBUG(errs() << "\nReplacing.4 "; + DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); - errs() << '\n'); + dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4826,11 +4883,11 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; - DEBUG(errs() << "\nReplacing.5 "; + DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); - errs() << '\n'); + dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4889,11 +4946,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // v3 = add v2, c // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. - DEBUG(errs() << "\nReplacing.6 "; + DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); - errs() << "\nWith chain: "; + dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); @@ -4909,11 +4966,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); - DEBUG(errs() << "\nReplacing.6 "; + DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); - errs() << "\nWith: "; + dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); - errs() << " and 2 other values\n"); + dbgs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), @@ -5738,35 +5795,48 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LLD->getMemoryVT() == RLD->getMemoryVT()) { // FIXME: this discards src value information. This is // over-conservative. It would be beneficial to be able to remember - // both potential memory locations. + // both potential memory locations. Since we are discarding + // src value info, don't do the transformation if the memory + // locations are not in the default address space. + unsigned LLDAddrSpace = 0, RLDAddrSpace = 0; + if (const Value *LLDVal = LLD->getMemOperand()->getValue()) { + if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType())) + LLDAddrSpace = PT->getAddressSpace(); + } + if (const Value *RLDVal = RLD->getMemOperand()->getValue()) { + if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType())) + RLDAddrSpace = PT->getAddressSpace(); + } SDValue Addr; - if (TheSelect->getOpcode() == ISD::SELECT) { - // Check that the condition doesn't reach either load. If so, folding - // this will induce a cycle into the DAG. - if ((!LLD->hasAnyUseOfValue(1) || - !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && - (!RLD->hasAnyUseOfValue(1) || - !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { - Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), LLD->getBasePtr(), - RLD->getBasePtr()); - } - } else { - // Check that the condition doesn't reach either load. If so, folding - // this will induce a cycle into the DAG. - if ((!LLD->hasAnyUseOfValue(1) || - (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && - (!RLD->hasAnyUseOfValue(1) || - (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { - Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), - TheSelect->getOperand(1), - LLD->getBasePtr(), RLD->getBasePtr(), - TheSelect->getOperand(4)); + if (LLDAddrSpace == 0 && RLDAddrSpace == 0) { + if (TheSelect->getOpcode() == ISD::SELECT) { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if ((!LLD->hasAnyUseOfValue(1) || + !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && + (!RLD->hasAnyUseOfValue(1) || + !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { + Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } + } else { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if ((!LLD->hasAnyUseOfValue(1) || + (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && + (!RLD->hasAnyUseOfValue(1) || + (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { + Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } } } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 33694f2..09fd657 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -78,7 +78,7 @@ unsigned FastISel::getRegForValue(Value *V) { // Look up the value to see if we already have a register for it. We // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA - // def-dominatess-use requirement enforced. + // def-dominates-use requirement enforced. if (ValueMap.count(V)) return ValueMap[V]; unsigned Reg = LocalValueMap[V]; @@ -188,7 +188,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) { /// SelectBinaryOp - Select and emit code for a binary operator instruction, /// which has an opcode which directly corresponds to the given ISD opcode. /// -bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) { +bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) { EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. @@ -325,12 +325,6 @@ bool FastISel::SelectCall(User *I) { unsigned IID = F->getIntrinsicID(); switch (IID) { default: break; - case Intrinsic::dbg_stoppoint: - case Intrinsic::dbg_region_start: - case Intrinsic::dbg_region_end: - case Intrinsic::dbg_func_start: - // FIXME - Remove this instructions once the dust settles. - return true; case Intrinsic::dbg_declare: { DbgDeclareInst *DI = cast<DbgDeclareInst>(I); if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None)||!DW @@ -338,8 +332,6 @@ bool FastISel::SelectCall(User *I) { return true; Value *Address = DI->getAddress(); - if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) - Address = BCI->getOperand(0); AllocaInst *AI = dyn_cast<AllocaInst>(Address); // Don't handle byval struct arguments or VLAs, for example. if (!AI) break; @@ -424,7 +416,7 @@ bool FastISel::SelectCall(User *I) { return false; } -bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) { +bool FastISel::SelectCast(User *I, unsigned Opcode) { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); @@ -742,44 +734,44 @@ FastISel::FastISel(MachineFunction &mf, FastISel::~FastISel() {} unsigned FastISel::FastEmit_(MVT, MVT, - ISD::NodeType) { + unsigned) { return 0; } unsigned FastISel::FastEmit_r(MVT, MVT, - ISD::NodeType, unsigned /*Op0*/) { + unsigned, unsigned /*Op0*/) { return 0; } unsigned FastISel::FastEmit_rr(MVT, MVT, - ISD::NodeType, unsigned /*Op0*/, + unsigned, unsigned /*Op0*/, unsigned /*Op0*/) { return 0; } -unsigned FastISel::FastEmit_i(MVT, MVT, ISD::NodeType, uint64_t /*Imm*/) { +unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { return 0; } unsigned FastISel::FastEmit_f(MVT, MVT, - ISD::NodeType, ConstantFP * /*FPImm*/) { + unsigned, ConstantFP * /*FPImm*/) { return 0; } unsigned FastISel::FastEmit_ri(MVT, MVT, - ISD::NodeType, unsigned /*Op0*/, + unsigned, unsigned /*Op0*/, uint64_t /*Imm*/) { return 0; } unsigned FastISel::FastEmit_rf(MVT, MVT, - ISD::NodeType, unsigned /*Op0*/, + unsigned, unsigned /*Op0*/, ConstantFP * /*FPImm*/) { return 0; } unsigned FastISel::FastEmit_rri(MVT, MVT, - ISD::NodeType, + unsigned, unsigned /*Op0*/, unsigned /*Op1*/, uint64_t /*Imm*/) { return 0; @@ -789,7 +781,7 @@ unsigned FastISel::FastEmit_rri(MVT, MVT, /// to emit an instruction with an immediate operand using FastEmit_ri. /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. -unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode, +unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, uint64_t Imm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the ri form. @@ -806,7 +798,7 @@ unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode, /// to emit an instruction with a floating-point immediate operand using /// FastEmit_rf. If that fails, it materializes the immediate into a register /// and try FastEmit_rr instead. -unsigned FastISel::FastEmit_rf_(MVT VT, ISD::NodeType Opcode, +unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, unsigned Op0, ConstantFP *FPImm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the rf form. diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index e3b25c2..4868c9e 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -113,7 +113,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, return; } // Interpret void as zero return values. - if (Ty == Type::getVoidTy(Ty->getContext())) + if (Ty->isVoidTy()) return; // Base case: we can get an EVT for this LLVM IR type. ValueVTs.push_back(TLI.getValueType(Ty)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 474d833..5e3f58a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -32,6 +32,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/LLVMContext.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -950,9 +951,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { switch (Node->getOpcode()) { default: #ifndef NDEBUG - errs() << "NODE: "; - Node->dump(&DAG); - errs() << "\n"; + dbgs() << "NODE: "; + Node->dump( &DAG); + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to legalize this operator!"); @@ -2292,12 +2293,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); EVT ShiftAmountTy = TLI.getShiftAmountTy(); - if (VT.isVector()) { + if (VT.isVector()) ShiftAmountTy = VT; - VT = VT.getVectorElementType(); - } - unsigned BitsDiff = VT.getSizeInBits() - - ExtraVT.getSizeInBits(); + unsigned BitsDiff = VT.getScalarType().getSizeInBits() - + ExtraVT.getScalarType().getSizeInBits(); SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy); Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), Node->getOperand(0), ShiftCst); diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 2831617..4f0fce7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -43,15 +43,15 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, //===----------------------------------------------------------------------===// void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); - errs() << "\n"); + DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "SoftenFloatResult #" << ResNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "SoftenFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to soften the result of this operator!"); @@ -531,15 +531,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); - errs() << "\n"); + DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "SoftenFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to soften this operator's operand!"); @@ -768,7 +768,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Expand float result: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -779,8 +779,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ExpandFloatResult #" << ResNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "ExpandFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to expand the result of this operator!"); @@ -1167,7 +1167,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, /// types of the node are known to be legal, but other operands of the node may /// need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Expand float operand: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) @@ -1178,8 +1178,8 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ExpandFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to expand this operator's operand!"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index bd3b97a..9932cf4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -33,7 +33,7 @@ using namespace llvm; /// may also have invalid operands or may have other results that need /// expansion, we just know that (at least) one result needs promotion. void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Promote integer result: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom expand this node. @@ -43,8 +43,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "PromoteIntegerResult #" << ResNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "PromoteIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to promote this operator!"); case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; @@ -599,7 +599,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Promote integer operand: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -608,8 +608,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "PromoteIntegerOperand Op #" << OpNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to promote this operator's operand!"); @@ -910,7 +910,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Expand integer result: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -921,8 +921,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ExpandIntegerResult #" << ResNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "ExpandIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to expand the result of this operator!"); @@ -1965,7 +1965,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Expand integer operand: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -1974,8 +1974,8 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ExpandIntegerOperand Op #" << OpNo << ": "; - N->dump(&DAG); errs() << "\n"; + dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; #endif llvm_unreachable("Do not know how to expand this operator's operand!"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index d9efd4f..37f36a3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -123,42 +123,42 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { // another node that has not been seen by the LegalizeTypes machinery. if ((I->getNodeId() == NewNode && Mapped > 1) || (I->getNodeId() != NewNode && Mapped != 0)) { - errs() << "Unprocessed value in a map!"; + dbgs() << "Unprocessed value in a map!"; Failed = true; } } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { if (Mapped > 1) { - errs() << "Value with legal type was transformed!"; + dbgs() << "Value with legal type was transformed!"; Failed = true; } } else { if (Mapped == 0) { - errs() << "Processed value not in any map!"; + dbgs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { - errs() << "Value in multiple maps!"; + dbgs() << "Value in multiple maps!"; Failed = true; } } if (Failed) { if (Mapped & 1) - errs() << " ReplacedValues"; + dbgs() << " ReplacedValues"; if (Mapped & 2) - errs() << " PromotedIntegers"; + dbgs() << " PromotedIntegers"; if (Mapped & 4) - errs() << " SoftenedFloats"; + dbgs() << " SoftenedFloats"; if (Mapped & 8) - errs() << " ScalarizedVectors"; + dbgs() << " ScalarizedVectors"; if (Mapped & 16) - errs() << " ExpandedIntegers"; + dbgs() << " ExpandedIntegers"; if (Mapped & 32) - errs() << " ExpandedFloats"; + dbgs() << " ExpandedFloats"; if (Mapped & 64) - errs() << " SplitVectors"; + dbgs() << " SplitVectors"; if (Mapped & 128) - errs() << " WidenedVectors"; - errs() << "\n"; + dbgs() << " WidenedVectors"; + dbgs() << "\n"; llvm_unreachable(0); } } @@ -342,7 +342,7 @@ ScanOperands: } if (i == NumOperands) { - DEBUG(errs() << "Legally typed node: "; N->dump(&DAG); errs() << "\n"); + DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n"); } } NodeDone: @@ -411,7 +411,7 @@ NodeDone: if (!IgnoreNodeResults(I)) for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) if (!isTypeLegal(I->getValueType(i))) { - errs() << "Result type " << i << " illegal!\n"; + dbgs() << "Result type " << i << " illegal!\n"; Failed = true; } @@ -419,24 +419,24 @@ NodeDone: for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(I->getOperand(i).getNode()) && !isTypeLegal(I->getOperand(i).getValueType())) { - errs() << "Operand type " << i << " illegal!\n"; + dbgs() << "Operand type " << i << " illegal!\n"; Failed = true; } if (I->getNodeId() != Processed) { if (I->getNodeId() == NewNode) - errs() << "New node not analyzed?\n"; + dbgs() << "New node not analyzed?\n"; else if (I->getNodeId() == Unanalyzed) - errs() << "Unanalyzed node not noticed?\n"; + dbgs() << "Unanalyzed node not noticed?\n"; else if (I->getNodeId() > 0) - errs() << "Operand not processed?\n"; + dbgs() << "Operand not processed?\n"; else if (I->getNodeId() == ReadyToProcess) - errs() << "Not added to worklist?\n"; + dbgs() << "Not added to worklist?\n"; Failed = true; } if (Failed) { - I->dump(&DAG); errs() << "\n"; + I->dump(&DAG); dbgs() << "\n"; llvm_unreachable(0); } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c35f7ad..b5dbd41 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -509,6 +509,7 @@ private: void ScalarizeVectorResult(SDNode *N, unsigned OpNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); + SDValue ScalarizeVecRes_InregOp(SDNode *N); SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); @@ -550,6 +551,7 @@ private: void SplitVectorResult(SDNode *N, unsigned OpNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -615,6 +617,7 @@ private: SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); + SDValue WidenVecRes_InregOp(SDNode *N); // Widen Vector Operand. bool WidenVectorOperand(SDNode *N, unsigned ResNo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 2625245..b5f84c0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -179,9 +179,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: - case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; + case ISD::SIGN_EXTEND_INREG: + case ISD::FP_ROUND_INREG: + QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: QueryType = Node->getOperand(0).getValueType(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index cf67ab9..808bac7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -32,17 +32,17 @@ using namespace llvm; //===----------------------------------------------------------------------===// void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Scalarize node result " << ResNo << ": "; + DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ScalarizeVectorResult #" << ResNo << ": "; + dbgs() << "ScalarizeVectorResult #" << ResNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to scalarize the result of this operator!"); @@ -50,11 +50,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: R = N->getOperand(0); break; case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; - case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_SIGN_EXTEND_INREG(N); break; + case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; @@ -186,6 +187,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType(); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT, + LHS, DAG.getValueType(ExtVT)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { // If the operand is wider than the vector element type then it is implicitly // truncated. Make that explicit here. @@ -196,13 +205,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { return InOp; } -SDValue DAGTypeLegalizer::ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N) { - EVT EltVT = N->getValueType(0).getVectorElementType(); - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), EltVT, - LHS, N->getOperand(1)); -} - SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(ISD::SELECT, N->getDebugLoc(), @@ -278,18 +280,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Scalarize node operand " << OpNo << ": "; + DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); SDValue Res = SDValue(); if (Res.getNode() == 0) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; + dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to scalarize this operator's operand!"); case ISD::BIT_CONVERT: @@ -382,17 +384,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ /// legalization, we just know that (at least) one result needs vector /// splitting. void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Split node result: "; + DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); SDValue Lo, Hi; switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "SplitVectorResult #" << ResNo << ": "; + dbgs() << "SplitVectorResult #" << ResNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to split the result of this operator!"); @@ -406,10 +408,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; - case ISD::SIGN_EXTEND_INREG: SplitVecRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; @@ -654,6 +657,21 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); } +void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + DebugLoc dl = N->getDebugLoc(); + + EVT LoVT, HiVT; + GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, + DAG.getValueType(LoVT)); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, + DAG.getValueType(HiVT)); +} + void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -709,18 +727,6 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, Hi = DAG.getUNDEF(HiVT); } -void DAGTypeLegalizer::SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue LHSLo, LHSHi; - GetSplitVector(N->getOperand(0), LHSLo, LHSHi); - DebugLoc dl = N->getDebugLoc(); - - Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, - N->getOperand(1)); - Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, - N->getOperand(1)); -} - void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); @@ -945,18 +951,18 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, /// result types of the node are known to be legal, but other operands of the /// node may need legalization as well as the specified one. bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(errs() << "Split node operand: "; + DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); SDValue Res = SDValue(); if (Res.getNode() == 0) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "SplitVectorOperand Op #" << OpNo << ": "; + dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to split this operator's operand!"); @@ -1136,9 +1142,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { //===----------------------------------------------------------------------===// void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Widen node result " << ResNo << ": "; + DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); // See if the target wants to custom widen this node. if (CustomWidenLowerNode(N, N->getValueType(ResNo))) @@ -1148,9 +1154,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "WidenVectorResult #" << ResNo << ": "; + dbgs() << "WidenVectorResult #" << ResNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to widen the result of this operator!"); @@ -1159,10 +1165,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; - case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_SIGN_EXTEND_INREG(N); break; + case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; @@ -1331,6 +1338,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); } +SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), + cast<VTSDNode>(N->getOperand(1))->getVT() + .getVectorElementType(), + WidenVT.getVectorNumElements()); + SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + WidenVT, WidenLHS, DAG.getValueType(ExtVT)); +} + SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); @@ -1713,13 +1731,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { WidenVT, N->getOperand(0)); } -SDValue DAGTypeLegalizer::WidenVecRes_SIGN_EXTEND_INREG(SDNode *N) { - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), - WidenVT, WidenLHS, N->getOperand(1)); -} - SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -1806,17 +1817,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { // Widen Vector Operand //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { - DEBUG(errs() << "Widen node operand " << ResNo << ": "; + DEBUG(dbgs() << "Widen node operand " << ResNo << ": "; N->dump(&DAG); - errs() << "\n"); + dbgs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - errs() << "WidenVectorOperand op #" << ResNo << ": "; + dbgs() << "WidenVectorOperand op #" << ResNo << ": "; N->dump(&DAG); - errs() << "\n"; + dbgs() << "\n"; #endif llvm_unreachable("Do not know how to widen this operator's operand!"); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 4045a34..0c3c974c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -109,7 +109,7 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGFast::Schedule() { - DEBUG(errs() << "********** List Scheduling **********\n"); + DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); @@ -136,9 +136,9 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { #ifndef NDEBUG if (PredSU->NumSuccsLeft == 0) { - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); - errs() << " has been released too many times!\n"; + dbgs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif @@ -175,7 +175,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); @@ -233,7 +233,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; - DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -343,7 +343,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); NewSU = Clone(SU); // New SUnit has the exact same predecessors. @@ -550,7 +550,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(errs() << "Adding an edge from SU # " << TrySU->NodeNum + DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, @@ -558,7 +558,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { NewDef = Copies.back(); } - DEBUG(errs() << "Adding an edge from SU # " << NewDef->NodeNum + DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index faf21f7..b92a672 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -87,7 +87,7 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGList::Schedule() { - DEBUG(errs() << "********** List Scheduling **********\n"); + DEBUG(dbgs() << "********** List Scheduling **********\n"); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -110,9 +110,9 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - errs() << " has been released too many times!\n"; + dbgs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif @@ -141,7 +141,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); Sequence.push_back(SU); @@ -233,7 +233,7 @@ void ScheduleDAGList::ListScheduleTopDown() { } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, just advance // the current cycle and try again. - DEBUG(errs() << "*** Advancing cycle, no work to do\n"); + DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); HazardRec->AdvanceCycle(); ++NumStalls; ++CurCycle; @@ -241,7 +241,7 @@ void ScheduleDAGList::ListScheduleTopDown() { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DEBUG(errs() << "*** Emitting noop\n"); + DEBUG(dbgs() << "*** Emitting noop\n"); HazardRec->EmitNoop(); Sequence.push_back(0); // NULL here means noop ++NumNoops; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 7e1015a..1ad7919 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -164,7 +164,7 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { - DEBUG(errs() << "********** List Scheduling **********\n"); + DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); @@ -199,9 +199,9 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { #ifndef NDEBUG if (PredSU->NumSuccsLeft == 0) { - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); - errs() << " has been released too many times!\n"; + dbgs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif @@ -238,7 +238,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); @@ -284,7 +284,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and /// its predecessor states to reflect the change. void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { - DEBUG(errs() << "*** Unscheduling [" << SU->getHeight() << "]: "); + DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); DEBUG(SU->dump(this)); AvailableQueue->UnscheduledNode(SU); @@ -371,7 +371,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; - DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -490,7 +490,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); NewSU = CreateClone(SU); // New SUnit has the exact same predecessors. @@ -771,7 +771,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(errs() << "Adding an edge from SU #" << TrySU->NodeNum + DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, @@ -780,7 +780,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { NewDef = Copies.back(); } - DEBUG(errs() << "Adding an edge from SU #" << NewDef->NodeNum + DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, @@ -827,9 +827,9 @@ void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { - errs() << "*** Scheduling failed! ***\n"; + dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - errs() << " has been released too many times!\n"; + dbgs() << " has been released too many times!\n"; llvm_unreachable(0); } #endif @@ -858,7 +858,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); @@ -1038,6 +1038,10 @@ namespace { return 0; return SethiUllmanNumbers[SU->NodeNum]; } + + unsigned getNodeOrdering(const SUnit *SU) const { + return scheduleDAG->DAG->GetOrdering(SU->getNode()); + } unsigned size() const { return Queue.size(); } @@ -1120,6 +1124,14 @@ static unsigned calcMaxScratches(const SUnit *SU) { // Bottom up bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); if (LPriority != RPriority) @@ -1329,7 +1341,7 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { // Ok, the transformation is safe and the heuristics suggest it is // profitable. Update the graph. - DEBUG(errs() << "Prescheduling SU # " << SU->NodeNum + DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum << " next to PredSU # " << PredSU->NodeNum << " to guide scheduling in the presence of multiple uses\n"); for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { @@ -1419,7 +1431,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || (!SU->isCommutable && SuccSU->isCommutable)) && !scheduleDAG->IsReachable(SuccSU, SU)) { - DEBUG(errs() << "Adding a pseudo-two-addr edge from SU # " + DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/false, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index d53de34..aaaa2b3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -253,19 +253,19 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { if (!SU->getNode()) { - errs() << "PHYS REG COPY\n"; + dbgs() << "PHYS REG COPY\n"; return; } SU->getNode()->dump(DAG); - errs() << "\n"; + dbgs() << "\n"; SmallVector<SDNode *, 4> FlaggedNodes; for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) FlaggedNodes.push_back(N); while (!FlaggedNodes.empty()) { - errs() << " "; + dbgs() << " "; FlaggedNodes.back()->dump(DAG); - errs() << "\n"; + dbgs() << "\n"; FlaggedNodes.pop_back(); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 77301b0..cb1a0d6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -36,6 +36,7 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" @@ -644,7 +645,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && !N->isMachineOpcode() && !doNotCSE(N)) { N->dump(this); - errs() << "\n"; + dbgs() << "\n"; llvm_unreachable("Node is not in map!"); } #endif @@ -1740,7 +1741,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; case ISD::SIGN_EXTEND_INREG: { EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - unsigned EBits = EVT.getSizeInBits(); + unsigned EBits = EVT.getScalarType().getSizeInBits(); // Sign extension. Compute the demanded bits in the result that are not // present in the input. @@ -1785,7 +1786,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ISD::isZEXTLoad(Op.getNode())) { LoadSDNode *LD = cast<LoadSDNode>(Op); EVT VT = LD->getMemoryVT(); - unsigned MemBits = VT.getSizeInBits(); + unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; } return; @@ -2024,7 +2025,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ case ISD::SIGN_EXTEND_INREG: // Max of the input and what this extends. - Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + Tmp = + cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits(); Tmp = VTBits-Tmp+1; Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); @@ -2168,10 +2170,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ switch (ExtType) { default: break; case ISD::SEXTLOAD: // '17' bits known - Tmp = LD->getMemoryVT().getSizeInBits(); + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); return VTBits-Tmp+1; case ISD::ZEXTLOAD: // '16' bits known - Tmp = LD->getMemoryVT().getSizeInBits(); + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); return VTBits-Tmp; } } @@ -2655,12 +2657,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // size of the value, the shift/rotate count is guaranteed to be zero. if (VT == MVT::i1) return N1; + if (N2C && N2C->isNullValue()) + return N1; break; case ISD::FP_ROUND_INREG: { EVT EVT = cast<VTSDNode>(N2)->getVT(); assert(VT == N1.getValueType() && "Not an inreg round!"); assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && "Cannot FP_ROUND_INREG integer types"); + assert(EVT.isVector() == VT.isVector() && + "FP_ROUND_INREG type should be vector iff the operand " + "type is vector!"); + assert((!EVT.isVector() || + EVT.getVectorNumElements() == VT.getVectorNumElements()) && + "Vector element counts must match in FP_ROUND_INREG"); assert(EVT.bitsLE(VT) && "Not rounding down!"); if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. break; @@ -2690,15 +2700,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, assert(VT == N1.getValueType() && "Not an inreg extend!"); assert(VT.isInteger() && EVT.isInteger() && "Cannot *_EXTEND_INREG FP types"); - assert(!EVT.isVector() && - "SIGN_EXTEND_INREG type should be the vector element type rather " - "than the vector type!"); - assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!"); + assert(EVT.isVector() == VT.isVector() && + "SIGN_EXTEND_INREG type should be vector iff the operand " + "type is vector!"); + assert((!EVT.isVector() || + EVT.getVectorNumElements() == VT.getVectorNumElements()) && + "Vector element counts must match in SIGN_EXTEND_INREG"); + assert(EVT.bitsLE(VT) && "Not extending!"); if (EVT == VT) return N1; // Not actually extending if (N1C) { APInt Val = N1C->getAPIntValue(); - unsigned FromBits = EVT.getSizeInBits(); + unsigned FromBits = EVT.getScalarType().getSizeInBits(); Val <<= Val.getBitWidth()-FromBits; Val = Val.ashr(Val.getBitWidth()-FromBits); return getConstant(Val, VT); @@ -4106,7 +4119,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { // If the and is only masking out bits that cannot effect the shift, // eliminate the and. - unsigned NumBits = VT.getSizeInBits()*2; + unsigned NumBits = VT.getScalarType().getSizeInBits()*2; if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); } @@ -5713,7 +5726,7 @@ std::string ISD::ArgFlagsTy::getArgFlagsString() { void SDNode::dump() const { dump(0); } void SDNode::dump(const SelectionDAG *G) const { - print(errs(), G); + print(dbgs(), G); } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { @@ -5885,12 +5898,12 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { if (N->getOperand(i).getNode()->hasOneUse()) DumpNodes(N->getOperand(i).getNode(), indent+2, G); else - errs() << "\n" << std::string(indent+2, ' ') - << (void*)N->getOperand(i).getNode() << ": <multiple use>"; + dbgs() << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": <multiple use>"; - errs() << "\n"; - errs().indent(indent); + dbgs() << "\n"; + dbgs().indent(indent); N->dump(G); } @@ -5943,6 +5956,13 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], getShiftAmountOperand(Operands[1]))); break; + case ISD::SIGN_EXTEND_INREG: + case ISD::FP_ROUND_INREG: { + EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType(); + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, + Operands[0], + getValueType(ExtVT))); + } } } @@ -6048,7 +6068,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { } void SelectionDAG::dump() const { - errs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) { @@ -6059,7 +6079,7 @@ void SelectionDAG::dump() const { if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); - errs() << "\n\n"; + dbgs() << "\n\n"; } void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { @@ -6106,12 +6126,12 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, void SDNode::dumpr() const { VisitedSDNodeSet once; - DumpNodesr(errs(), this, 0, 0, once); + DumpNodesr(dbgs(), this, 0, 0, once); } void SDNode::dumpr(const SelectionDAG *G) const { VisitedSDNodeSet once; - DumpNodesr(errs(), this, 0, G, once); + DumpNodesr(dbgs(), this, 0, G, once); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 74d624f..5e3a3b5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1195,6 +1195,18 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ return false; } + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].CC == Cases[1].CC && + isa<Constant>(Cases[0].CmpRHS) && + cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + return true; } @@ -1733,7 +1745,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, if (Density < 0.4) return false; - DEBUG(errs() << "Lowering jump table\n" + DEBUG(dbgs() << "Lowering jump table\n" << "First entry: " << First << ". Last entry: " << Last << '\n' << "Range: " << Range << "Size: " << TSize << ". Density: " << Density << "\n\n"); @@ -1837,7 +1849,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, APInt LSize = FrontCase.size(); APInt RSize = TSize-LSize; - DEBUG(errs() << "Selecting best pivot: \n" + DEBUG(dbgs() << "Selecting best pivot: \n" << "First: " << First << ", Last: " << Last <<'\n' << "LSize: " << LSize << ", RSize: " << RSize << '\n'); for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; @@ -1853,7 +1865,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, (Last - RBegin + 1ULL).roundToDouble(); double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place - DEBUG(errs() <<"=>Step\n" + DEBUG(dbgs() <<"=>Step\n" << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' << "LDensity: " << LDensity << ", RDensity: " << RDensity << '\n' @@ -1861,7 +1873,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, if (FMetric < Metric) { Pivot = J; FMetric = Metric; - DEBUG(errs() << "Current metric set to: " << FMetric << '\n'); + DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); } LSize += J->size(); @@ -1965,7 +1977,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, // Don't bother the code below, if there are too much unique destinations return false; } - DEBUG(errs() << "Total number of unique destinations: " + DEBUG(dbgs() << "Total number of unique destinations: " << Dests.size() << '\n' << "Total number of comparisons: " << numCmps << '\n'); @@ -1974,7 +1986,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); APInt cmpRange = maxValue - minValue; - DEBUG(errs() << "Compare range: " << cmpRange << '\n' + DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' << "Low bound: " << minValue << '\n' << "High bound: " << maxValue << '\n'); @@ -1984,7 +1996,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, !(Dests.size() >= 3 && numCmps >= 6))) return false; - DEBUG(errs() << "Emitting bit tests\n"); + DEBUG(dbgs() << "Emitting bit tests\n"); APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); // Optimize the case where all the case values fit in a @@ -2034,9 +2046,9 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - DEBUG(errs() << "Cases:\n"); + DEBUG(dbgs() << "Cases:\n"); for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { - DEBUG(errs() << "Mask: " << CasesBits[i].Mask + DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask << ", Bits: " << CasesBits[i].Bits << ", BB: " << CasesBits[i].BB << '\n'); @@ -2135,7 +2147,7 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { // create a binary search tree from them. CaseVector Cases; size_t numCmps = Clusterify(Cases, SI); - DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size() + DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() << ". Total compares: " << numCmps << '\n'); numCmps = 0; @@ -3157,7 +3169,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I, } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), VTs, &Ops[0], Ops.size()); - } else if (I.getType() != Type::getVoidTy(*DAG.getContext())) { + } else if (!I.getType()->isVoidTy()) { Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), VTs, &Ops[0], Ops.size()); } else { @@ -3176,7 +3188,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I, DAG.setRoot(Chain); } - if (I.getType() != Type::getVoidTy(*DAG.getContext())) { + if (!I.getType()->isVoidTy()) { if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) { EVT VT = TLI.getValueType(PTy); Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); @@ -4406,12 +4418,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DAG.AssignOrdering(Res.getNode(), SDNodeOrder); return 0; } - case Intrinsic::dbg_stoppoint: - case Intrinsic::dbg_region_start: - case Intrinsic::dbg_region_end: - case Intrinsic::dbg_func_start: - // FIXME - Remove this instructions once the dust settles. - return 0; case Intrinsic::dbg_declare: { if (OptLevel != CodeGenOpt::None) // FIXME: Variable debug info is not supported here. @@ -5931,7 +5937,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // The return value of the call is this value. As such, there is no // corresponding argument. - assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) && + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (const StructType *STy = dyn_cast<StructType>(CS.getType())) { OpVT = TLI.getValueType(STy->getElementType(ResNo)); @@ -6056,7 +6062,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { std::vector<SDValue> AsmNodeOperands; AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back( - DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other)); + DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), + TLI.getPointerTy())); // Loop over all of the inputs, copying the operand values into the @@ -6100,8 +6107,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { OpInfo.CallOperandVal)); } else { // This is the result value of the call. - assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) && - "Bad inline asm!"); + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); // Concatenate this output onto the outputs list. RetValRegs.append(OpInfo.AssignedRegs); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 88a2017..db656e3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -163,7 +163,7 @@ private: /// The comparison function for sorting the switch case values in the vector. /// WARNING: Case ranges should be disjoint! struct CaseCmp { - bool operator () (const Case& C1, const Case& C2) { + bool operator()(const Case &C1, const Case &C2) { assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); @@ -172,12 +172,12 @@ private: }; struct CaseBitsCmp { - bool operator () (const CaseBits& C1, const CaseBits& C2) { + bool operator()(const CaseBits &C1, const CaseBits &C2) { return C1.Bits > C2.Bits; } }; - size_t Clusterify(CaseVector& Cases, const SwitchInst &SI); + size_t Clusterify(CaseVector &Cases, const SwitchInst &SI); /// CaseBlock - This structure is used to communicate between /// SelectionDAGBuilder and SDISel for the code generation of additional basic @@ -215,7 +215,7 @@ private: MachineBasicBlock *Default; }; struct JumpTableHeader { - JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H, + JumpTableHeader(APInt F, APInt L, Value *SV, MachineBasicBlock *H, bool E = false): First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} APInt First; @@ -230,8 +230,8 @@ private: BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr): Mask(M), ThisBB(T), TargetBB(Tr) { } uint64_t Mask; - MachineBasicBlock* ThisBB; - MachineBasicBlock* TargetBB; + MachineBasicBlock *ThisBB; + MachineBasicBlock *TargetBB; }; typedef SmallVector<BitTestCase, 3> BitTestInfo; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 05669c0..9ac8f83 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -162,7 +162,7 @@ MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { #ifndef NDEBUG - errs() << "If a target marks an instruction with " + dbgs() << "If a target marks an instruction with " "'usesCustomInserter', it must implement " "TargetLowering::EmitInstrWithCustomInserter!"; #endif @@ -325,7 +325,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { else GFI = 0; RegInfo = &MF->getRegInfo(); - DEBUG(errs() << "\n\n\n=== " << Fn.getName() << "\n"); + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>(); @@ -438,6 +438,95 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, SDB->clear(); } +namespace { +/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted +/// nodes from the worklist. +class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener { + SmallVector<SDNode*, 128> &Worklist; +public: + SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl) : Worklist(wl) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N), + Worklist.end()); + } + + virtual void NodeUpdated(SDNode *N) { + // Ignore updates. + } +}; +} + +/// ShrinkDemandedOps - A late transformation pass that shrink expressions +/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts +/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. +void SelectionDAGISel::ShrinkDemandedOps() { + SmallVector<SDNode*, 128> Worklist; + + // Add all the dag nodes to the worklist. + Worklist.reserve(CurDAG->allnodes_size()); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) + Worklist.push_back(I); + + APInt Mask; + APInt KnownZero; + APInt KnownOne; + + TargetLowering::TargetLoweringOpt TLO(*CurDAG, true); + while (!Worklist.empty()) { + SDNode *N = Worklist.pop_back_val(); + + if (N->use_empty() && N != CurDAG->getRoot().getNode()) { + CurDAG->DeleteNode(N); + continue; + } + + // Run ShrinkDemandedOp on scalar binary operations. + if (N->getNumValues() == 1 && + N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) { + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Demanded = APInt::getAllOnesValue(BitWidth); + APInt KnownZero, KnownOne; + if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded, + KnownZero, KnownOne, TLO)) { + // Revisit the node. + Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N), + Worklist.end()); + Worklist.push_back(N); + + // Replace the old value with the new one. + DEBUG(errs() << "\nReplacing "; + TLO.Old.getNode()->dump(CurDAG); + errs() << "\nWith: "; + TLO.New.getNode()->dump(CurDAG); + errs() << '\n'); + + Worklist.push_back(TLO.New.getNode()); + + SDOPsWorkListRemover DeadNodes(Worklist); + CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); + + if (TLO.Old.getNode()->use_empty()) { + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); + i != e; ++i) { + SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); + if (OpNode->hasOneUse()) { + Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), + OpNode), Worklist.end()); + Worklist.push_back(OpNode); + } + } + + Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), + TLO.Old.getNode()), Worklist.end()); + CurDAG->DeleteNode(TLO.Old.getNode()); + } + } + } + } +} + void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet<SDNode*, 128> VisitedNodes; SmallVector<SDNode*, 128> Worklist; @@ -448,9 +537,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { APInt KnownZero; APInt KnownOne; - while (!Worklist.empty()) { - SDNode *N = Worklist.back(); - Worklist.pop_back(); + do { + SDNode *N = Worklist.pop_back_val(); // If we've already seen this node, ignore it. if (!VisitedNodes.insert(N)) @@ -490,7 +578,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { LOI.KnownOne = KnownOne; LOI.KnownZero = KnownZero; } - } + } while (!Worklist.empty()); } void SelectionDAGISel::CodeGenAndEmitDAG() { @@ -504,7 +592,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { BlockName = MF->getFunction()->getNameStr() + ":" + BB->getBasicBlock()->getNameStr(); - DEBUG(errs() << "Initial selection DAG:\n"); + DEBUG(dbgs() << "Initial selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -517,7 +605,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(Unrestricted, *AA, OptLevel); } - DEBUG(errs() << "Optimized lowered selection DAG:\n"); + DEBUG(dbgs() << "Optimized lowered selection DAG:\n"); DEBUG(CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that @@ -533,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DEBUG(errs() << "Type-legalized selection DAG:\n"); + DEBUG(dbgs() << "Type-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); if (Changed) { @@ -548,7 +636,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DEBUG(errs() << "Optimized type-legalized selection DAG:\n"); + DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); } @@ -578,7 +666,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(errs() << "Optimized vector-legalized selection DAG:\n"); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); } @@ -591,7 +679,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(OptLevel); } - DEBUG(errs() << "Legalized selection DAG:\n"); + DEBUG(dbgs() << "Legalized selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -604,13 +692,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(errs() << "Optimized legalized selection DAG:\n"); + DEBUG(dbgs() << "Optimized legalized selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None) { + ShrinkDemandedOps(); ComputeLiveOutVRegInfo(); + } // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. @@ -621,7 +711,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { InstructionSelect(); } - DEBUG(errs() << "Selected selection DAG:\n"); + DEBUG(dbgs() << "Selected selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -654,7 +744,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { delete Scheduler; } - DEBUG(errs() << "Selected machine code:\n"); + DEBUG(dbgs() << "Selected machine code:\n"); DEBUG(BB->dump()); } @@ -699,7 +789,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, I != E; ++I, ++j) if (Fn.paramHasAttr(j, Attribute::ByVal)) { if (EnableFastISelVerbose || EnableFastISelAbort) - errs() << "FastISel skips entry block due to byval argument\n"; + dbgs() << "FastISel skips entry block due to byval argument\n"; SuppressFastISel = true; break; } @@ -729,10 +819,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // information is provided by an intrinsic (eh.selector) that can be moved // to unexpected places by the optimizers: if the unwind edge is critical, // then breaking it can result in the intrinsics being in the successor of - // the landing pad, not the landing pad itself. This results in exceptions - // not being caught because no typeids are associated with the invoke. - // This may not be the only way things can go wrong, but it is the only way - // we try to work around for the moment. + // the landing pad, not the landing pad itself. This results + // in exceptions not being caught because no typeids are associated with + // the invoke. This may not be the only way things can go wrong, but it + // is the only way we try to work around for the moment. BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); if (Br && Br->isUnconditional()) { // Critical edge? @@ -765,7 +855,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { ResetDebugLoc(SDB, FastIS); if (EnableFastISelVerbose || EnableFastISelAbort) { - errs() << "FastISel miss: "; + dbgs() << "FastISel miss: "; BI->dump(); } assert(!EnableFastISelAbort && @@ -775,7 +865,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, SetDebugLoc(MDDbgKind, BI, SDB, FastIS, &MF); - // First try normal tablegen-generated "fast" selection. + // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(BI)) { ResetDebugLoc(SDB, FastIS); continue; @@ -788,11 +878,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(BI)) { if (EnableFastISelVerbose || EnableFastISelAbort) { - errs() << "FastISel missed call: "; + dbgs() << "FastISel missed call: "; BI->dump(); } - if (BI->getType() != Type::getVoidTy(*CurDAG->getContext())) { + if (!BI->getType()->isVoidTy()) { unsigned &R = FuncInfo->ValueMap[BI]; if (!R) R = FuncInfo->CreateRegForValue(BI); @@ -817,7 +907,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // For now, be a little lenient about non-branch terminators. if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) { if (EnableFastISelVerbose || EnableFastISelAbort) { - errs() << "FastISel miss: "; + dbgs() << "FastISel miss: "; BI->dump(); } if (EnableFastISelAbort) @@ -846,13 +936,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, void SelectionDAGISel::FinishBasicBlock() { - DEBUG(errs() << "Target-post-processed machine code:\n"); + DEBUG(dbgs() << "Target-post-processed machine code:\n"); DEBUG(BB->dump()); - DEBUG(errs() << "Total amount of phi nodes to update: " + DEBUG(dbgs() << "Total amount of phi nodes to update: " << SDB->PHINodesToUpdate.size() << "\n"); DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) - errs() << "Node " << i << " : (" + dbgs() << "Node " << i << " : (" << SDB->PHINodesToUpdate[i].first << ", " << SDB->PHINodesToUpdate[i].second << ")\n"); @@ -915,11 +1005,11 @@ SelectionDAGISel::FinishBasicBlock() { // This is "default" BB. We have two jumps to it. From "header" BB and // from last "case" BB. if (PHIBB == SDB->BitTestCases[i].Default) { - PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, - false)); + PHI->addOperand(MachineOperand:: + CreateReg(SDB->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent)); - PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, - false)); + PHI->addOperand(MachineOperand:: + CreateReg(SDB->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases. back().ThisBB)); } @@ -927,10 +1017,9 @@ SelectionDAGISel::FinishBasicBlock() { for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; - if (cBB->succ_end() != - std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) { - PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, - false)); + if (cBB->isSuccessor(PHIBB)) { + PHI->addOperand(MachineOperand:: + CreateReg(SDB->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(cBB)); } } @@ -977,7 +1066,7 @@ SelectionDAGISel::FinishBasicBlock() { (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); } // JT BB. Just iterate over successors here - if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) { + if (BB->isSuccessor(PHIBB)) { PHI->addOperand (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); @@ -1023,17 +1112,23 @@ SelectionDAGISel::FinishBasicBlock() { SDB->EdgeMapping.find(BB); if (EI != SDB->EdgeMapping.end()) ThisBB = EI->second; - for (MachineBasicBlock::iterator Phi = BB->begin(); - Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){ - // This value for this PHI node is recorded in PHINodesToUpdate, get it. - for (unsigned pn = 0; ; ++pn) { - assert(pn != SDB->PHINodesToUpdate.size() && - "Didn't find PHI entry!"); - if (SDB->PHINodesToUpdate[pn].first == Phi) { - Phi->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pn]. - second, false)); - Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); - break; + + // BB may have been removed from the CFG if a branch was constant folded. + if (ThisBB->isSuccessor(BB)) { + for (MachineBasicBlock::iterator Phi = BB->begin(); + Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; + ++Phi) { + // This value for this PHI node is recorded in PHINodesToUpdate. + for (unsigned pn = 0; ; ++pn) { + assert(pn != SDB->PHINodesToUpdate.size() && + "Didn't find PHI entry!"); + if (SDB->PHINodesToUpdate[pn].first == Phi) { + Phi->addOperand(MachineOperand:: + CreateReg(SDB->PHINodesToUpdate[pn].second, + false)); + Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); + break; + } } } } @@ -1302,45 +1397,47 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, return !isNonImmUse(Root, N, U); } -SDNode *SelectionDAGISel::Select_INLINEASM(SDValue N) { - std::vector<SDValue> Ops(N.getNode()->op_begin(), N.getNode()->op_end()); +SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { + std::vector<SDValue> Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops); std::vector<EVT> VTs; VTs.push_back(MVT::Other); VTs.push_back(MVT::Flag); - SDValue New = CurDAG->getNode(ISD::INLINEASM, N.getDebugLoc(), + SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), VTs, &Ops[0], Ops.size()); return New.getNode(); } -SDNode *SelectionDAGISel::Select_UNDEF(const SDValue &N) { - return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::IMPLICIT_DEF, - N.getValueType()); +SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { + return CurDAG->SelectNodeTo(N, TargetInstrInfo::IMPLICIT_DEF, + N->getValueType(0)); } -SDNode *SelectionDAGISel::Select_EH_LABEL(const SDValue &N) { - SDValue Chain = N.getOperand(0); +SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) { + SDValue Chain = N->getOperand(0); unsigned C = cast<LabelSDNode>(N)->getLabelID(); SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32); - return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::EH_LABEL, + return CurDAG->SelectNodeTo(N, TargetInstrInfo::EH_LABEL, MVT::Other, Tmp, Chain); } -void SelectionDAGISel::CannotYetSelect(SDValue N) { +void SelectionDAGISel::CannotYetSelect(SDNode *N) { std::string msg; raw_string_ostream Msg(msg); Msg << "Cannot yet select: "; - N.getNode()->print(Msg, CurDAG); + N->print(Msg, CurDAG); llvm_report_error(Msg.str()); } -void SelectionDAGISel::CannotYetSelectIntrinsic(SDValue N) { - errs() << "Cannot yet select: "; +void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) { + dbgs() << "Cannot yet select: "; unsigned iid = - cast<ConstantSDNode>(N.getOperand(N.getOperand(0).getValueType() == MVT::Other))->getZExtValue(); + cast<ConstantSDNode>(N->getOperand(N->getOperand(0).getValueType() == + MVT::Other))->getZExtValue(); if (iid < Intrinsic::num_intrinsics) - llvm_report_error("Cannot yet select: intrinsic %" + Intrinsic::getName((Intrinsic::ID)iid)); + llvm_report_error("Cannot yet select: intrinsic %" + + Intrinsic::getName((Intrinsic::ID)iid)); else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo()) llvm_report_error(Twine("Cannot yet select: target intrinsic %") + tii->getName(iid)); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 83fa5a8..3786bd1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -225,7 +225,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet if (level >= 20) { if (!printed) { printed = true; - DEBUG(errs() << "setSubgraphColor hit max level\n"); + DEBUG(dbgs() << "setSubgraphColor hit max level\n"); } return true; } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d9a5a13..81c51c4 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -990,7 +990,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -1024,7 +1024,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1049,7 +1049,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -1272,19 +1272,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Sign extension. Compute the demanded bits in the result that are not // present in the input. - APInt NewBits = APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getSizeInBits()) & - NewMask; + APInt NewBits = + APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getScalarType().getSizeInBits()) & + NewMask; // If none of the extended bits are demanded, eliminate the sextinreg. if (NewBits == 0) return TLO.CombineTo(Op, Op.getOperand(0)); - APInt InSignBit = APInt::getSignBit(EVT.getSizeInBits()); + APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits()); InSignBit.zext(BitWidth); - APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, - EVT.getSizeInBits()) & - NewMask; + APInt InputDemandedBits = + APInt::getLowBitsSet(BitWidth, + EVT.getScalarType().getSizeInBits()) & + NewMask; // Since the sign extended bits are demanded, we know that the sign // bit is demanded. @@ -1313,7 +1315,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::ZERO_EXTEND: { - unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); APInt InMask = NewMask; InMask.trunc(OperandBitWidth); @@ -1336,7 +1339,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getSizeInBits(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); APInt NewBits = ~InMask & NewMask; @@ -1376,7 +1379,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::ANY_EXTEND: { - unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); APInt InMask = NewMask; InMask.trunc(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), InMask, @@ -1480,7 +1484,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne2, TLO, Depth+1)) return true; // See if the operation should be performed at a smaller bit width. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; } // FALL THROUGH @@ -1597,7 +1601,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && N0.getOperand(0).getOpcode() == ISD::CTLZ && N0.getOperand(1).getOpcode() == ISD::Constant) { - unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + const APInt &ShAmt + = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { if ((C1 == 0) == (Cond == ISD::SETEQ)) { @@ -1625,27 +1630,26 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOperand(0).getNode()->hasOneUse() && isa<ConstantSDNode>(N0.getOperand(1))) { LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); - uint64_t bestMask = 0; + APInt bestMask; unsigned bestWidth = 0, bestOffset = 0; - if (!Lod->isVolatile() && Lod->isUnindexed() && - // FIXME: This uses getZExtValue() below so it only works on i64 and - // below. - N0.getValueType().getSizeInBits() <= 64) { + if (!Lod->isVolatile() && Lod->isUnindexed()) { unsigned origWidth = N0.getValueType().getSizeInBits(); + unsigned maskWidth = origWidth; // We can narrow (e.g.) 16-bit extending loads on 32-bit target to // 8 bits, but have to be careful... if (Lod->getExtensionType() != ISD::NON_EXTLOAD) origWidth = Lod->getMemoryVT().getSizeInBits(); - uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + const APInt &Mask = + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); for (unsigned width = origWidth / 2; width>=8; width /= 2) { - uint64_t newMask = (1ULL << width) - 1; + APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offset<origWidth/width; offset++) { if ((newMask & Mask) == Mask) { if (!TD->isLittleEndian()) bestOffset = (origWidth/width - offset - 1) * (width/8); else bestOffset = (uint64_t)offset * (width/8); - bestMask = Mask >> (offset * (width/8) * 8); + bestMask = Mask.lshr(offset * (width/8) * 8); bestWidth = width; break; } @@ -1668,7 +1672,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, false, NewAlign); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, - DAG.getConstant(bestMask, newVT)), + DAG.getConstant(bestMask.trunc(bestWidth), + newVT)), DAG.getConstant(0LL, newVT), Cond); } } @@ -1760,7 +1765,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC) { - bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1); + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); if (TrueWhenTrue) return N0; @@ -1876,24 +1881,27 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - VT == N0.getValueType() && N0.getOpcode() == ISD::AND) + (VT == N0.getValueType() || + (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && + N0.getOpcode() == ISD::AND) if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. - if (isPowerOf2_64(AndRHS->getZExtValue())) { - return DAG.getNode(ISD::SRL, dl, VT, N0, - DAG.getConstant(Log2_64(AndRHS->getZExtValue()), - ShiftTy)); + if (AndRHS->getAPIntValue().isPowerOf2()) { + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, + DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy))); } - } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) { + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { // (X & 8) == 8 --> (X & 8) >> 3 // Perform the xform if C1 is a single bit. if (C1.isPowerOf2()) { - return DAG.getNode(ISD::SRL, dl, VT, N0, - DAG.getConstant(C1.logBase2(), ShiftTy)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, + DAG.getConstant(C1.logBase2(), ShiftTy))); } } } diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 8070570..aeaa38b 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -185,7 +185,7 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { initShrinkWrappingInfo(); DEBUG(if (ShrinkWrapThisFunction) { - errs() << "Place CSR spills/restores for " + dbgs() << "Place CSR spills/restores for " << MF->getFunction()->getName() << "\n"; }); @@ -299,7 +299,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) { DEBUG({ if (ShrinkWrapDebugging >= Details) { - errs() + dbgs() << "-----------------------------------------------------------\n" << " Antic/Avail Sets:\n" << "-----------------------------------------------------------\n" @@ -314,7 +314,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) { dumpSets(MBB); } - errs() + dbgs() << "-----------------------------------------------------------\n"; } }); @@ -363,7 +363,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // If no CSRs used, we are done. if (CSI.empty()) { DEBUG(if (ShrinkWrapThisFunction) - errs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": uses no callee-saved registers\n"); return false; } @@ -383,7 +383,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // implementation to functions with <= 500 MBBs. if (Fn.size() > 500) { DEBUG(if (ShrinkWrapThisFunction) - errs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": too large (" << Fn.size() << " MBBs)\n"); ShrinkWrapThisFunction = false; } @@ -465,7 +465,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { } if (allCSRUsesInEntryBlock) { - DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": all CSRs used in EntryBlock\n"); ShrinkWrapThisFunction = false; } else { @@ -477,7 +477,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { allCSRsUsedInEntryFanout = false; } if (allCSRsUsedInEntryFanout) { - DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": all CSRs used in imm successors of EntryBlock\n"); ShrinkWrapThisFunction = false; } @@ -504,7 +504,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { if (dominatesExitNodes) { CSRUsedInChokePoints |= CSRUsed[MBB]; if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": all CSRs used in choke point(s) at " << getBasicBlockName(MBB) << "\n"); ShrinkWrapThisFunction = false; @@ -520,16 +520,16 @@ bool PEI::calculateSets(MachineFunction &Fn) { return false; DEBUG({ - errs() << "ENABLED: " << Fn.getFunction()->getName(); + dbgs() << "ENABLED: " << Fn.getFunction()->getName(); if (HasFastExitPath) - errs() << " (fast exit path)"; - errs() << "\n"; + dbgs() << " (fast exit path)"; + dbgs() << "\n"; if (ShrinkWrapDebugging >= BasicInfo) { - errs() << "------------------------------" + dbgs() << "------------------------------" << "-----------------------------\n"; - errs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; + dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; if (ShrinkWrapDebugging >= Details) { - errs() << "------------------------------" + dbgs() << "------------------------------" << "-----------------------------\n"; dumpAllUsed(); } @@ -602,7 +602,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, addedUses = true; blks.push_back(SUCC); DEBUG(if (ShrinkWrapDebugging >= Iterations) - errs() << getBasicBlockName(MBB) + dbgs() << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(prop) << ")->" << "successor " << getBasicBlockName(SUCC) << "\n"); } @@ -618,7 +618,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, addedUses = true; blks.push_back(PRED); DEBUG(if (ShrinkWrapDebugging >= Iterations) - errs() << getBasicBlockName(MBB) + dbgs() << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(prop) << ")->" << "predecessor " << getBasicBlockName(PRED) << "\n"); } @@ -656,7 +656,7 @@ bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { CSRUsed[EXB] |= loopSpills; addedUses = true; DEBUG(if (ShrinkWrapDebugging >= Iterations) - errs() << "LOOP " << getBasicBlockName(MBB) + dbgs() << "LOOP " << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(loopSpills) << ")->" << getBasicBlockName(EXB) << "\n"); if (EXB->succ_size() > 1 || EXB->pred_size() > 1) @@ -723,7 +723,7 @@ bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, blks.push_back(MBB); DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) - errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRSave[MBB]) << "\n"); return placedSpills; @@ -784,7 +784,7 @@ bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, blks.push_back(MBB); DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) - errs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " + dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); return placedRestores; @@ -808,7 +808,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { ++iterations; DEBUG(if (ShrinkWrapDebugging >= Iterations) - errs() << "iter " << iterations + dbgs() << "iter " << iterations << " --------------------------------------------------\n"); // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, @@ -858,15 +858,15 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); numSRReduced += numSRReducedThisFunc; DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { - errs() << "-----------------------------------------------------------\n"; - errs() << "total iterations = " << iterations << " ( " + dbgs() << "-----------------------------------------------------------\n"; + dbgs() << "total iterations = " << iterations << " ( " << Fn.getFunction()->getName() << " " << numSRReducedThisFunc << " " << Fn.size() << " )\n"; - errs() << "-----------------------------------------------------------\n"; + dbgs() << "-----------------------------------------------------------\n"; dumpSRSets(); - errs() << "-----------------------------------------------------------\n"; + dbgs() << "-----------------------------------------------------------\n"; if (numSRReducedThisFunc) verifySpillRestorePlacement(); }); @@ -899,7 +899,7 @@ void PEI::findFastExitPath() { // Check the immediate successors. if (isReturnBlock(SUCC)) { if (ShrinkWrapDebugging >= BasicInfo) - errs() << "Fast exit path: " << getBasicBlockName(EntryBlock) + dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) << "->" << getBasicBlockName(SUCC) << "\n"; break; } @@ -917,7 +917,7 @@ void PEI::findFastExitPath() { } if (HasFastExitPath) { if (ShrinkWrapDebugging >= BasicInfo) - errs() << "Fast exit path: " << getBasicBlockName(EntryBlock) + dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) << "->" << exitPath << "\n"; break; } @@ -951,7 +951,7 @@ void PEI::verifySpillRestorePlacement() { if (spilled.empty()) continue; - DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(spilled) << " RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); @@ -983,7 +983,7 @@ void PEI::verifySpillRestorePlacement() { if (isReturnBlock(SBB) || SBB->succ_size() == 0) { if (restored != spilled) { CSRegSet notRestored = (spilled - restored); - DEBUG(errs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getFunction()->getName() << ": " << stringifyCSRegSet(notRestored) << " spilled at " << getBasicBlockName(MBB) << " are never restored on path to return " @@ -1004,7 +1004,7 @@ void PEI::verifySpillRestorePlacement() { if (restored.empty()) continue; - DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRSave[MBB]) << " RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(restored) << "\n"); @@ -1031,7 +1031,7 @@ void PEI::verifySpillRestorePlacement() { } if (spilled != restored) { CSRegSet notSpilled = (restored - spilled); - DEBUG(errs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getFunction()->getName() << ": " << stringifyCSRegSet(notSpilled) << " restored at " << getBasicBlockName(MBB) << " are never spilled\n"); @@ -1078,13 +1078,13 @@ std::string PEI::stringifyCSRegSet(const CSRegSet& s) { } void PEI::dumpSet(const CSRegSet& s) { - DEBUG(errs() << stringifyCSRegSet(s) << "\n"); + DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); } void PEI::dumpUsed(MachineBasicBlock* MBB) { DEBUG({ if (MBB) - errs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " + dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; }); } @@ -1100,7 +1100,7 @@ void PEI::dumpAllUsed() { void PEI::dumpSets(MachineBasicBlock* MBB) { DEBUG({ if (MBB) - errs() << getBasicBlockName(MBB) << " | " + dbgs() << getBasicBlockName(MBB) << " | " << stringifyCSRegSet(CSRUsed[MBB]) << " | " << stringifyCSRegSet(AnticIn[MBB]) << " | " << stringifyCSRegSet(AnticOut[MBB]) << " | " @@ -1112,7 +1112,7 @@ void PEI::dumpSets(MachineBasicBlock* MBB) { void PEI::dumpSets1(MachineBasicBlock* MBB) { DEBUG({ if (MBB) - errs() << getBasicBlockName(MBB) << " | " + dbgs() << getBasicBlockName(MBB) << " | " << stringifyCSRegSet(CSRUsed[MBB]) << " | " << stringifyCSRegSet(AnticIn[MBB]) << " | " << stringifyCSRegSet(AnticOut[MBB]) << " | " @@ -1136,14 +1136,14 @@ void PEI::dumpSRSets() { for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); MBB != E; ++MBB) { if (!CSRSave[MBB].empty()) { - errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRSave[MBB]); if (CSRRestore[MBB].empty()) - errs() << '\n'; + dbgs() << '\n'; } if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) - errs() << " " + dbgs() << " " << "RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 6314331..27d429b 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -183,16 +183,16 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { DEBUG({ - errs() << "Interfere with sub-register "; - li_->getInterval(*SR).print(errs(), tri_); + dbgs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(dbgs(), tri_); }); return false; } } DEBUG({ - errs() << "\nExtending: "; - IntB.print(errs(), tri_); + dbgs() << "\nExtending: "; + IntB.print(dbgs(), tri_); }); SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; @@ -224,9 +224,9 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, IntB.MergeValueNumberInto(BValNo, ValLR->valno); } DEBUG({ - errs() << " result = "; - IntB.print(errs(), tri_); - errs() << "\n"; + dbgs() << " result = "; + IntB.print(dbgs(), tri_); + dbgs() << "\n"; }); // If the source instruction was killing the source register before the @@ -467,8 +467,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // We need to insert a new liverange: [ALR.start, LastUse). It may be we can // simply extend BLR if CopyMI doesn't end the range. DEBUG({ - errs() << "\nExtending: "; - IntB.print(errs(), tri_); + dbgs() << "\nExtending: "; + IntB.print(dbgs(), tri_); }); // Remove val#'s defined by copies that will be coalesced away. @@ -518,19 +518,19 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, ValNo->setHasPHIKill(BHasPHIKill); DEBUG({ - errs() << " result = "; - IntB.print(errs(), tri_); - errs() << '\n'; - errs() << "\nShortening: "; - IntA.print(errs(), tri_); + dbgs() << " result = "; + IntB.print(dbgs(), tri_); + dbgs() << '\n'; + dbgs() << "\nShortening: "; + IntA.print(dbgs(), tri_); }); IntA.removeValNo(AValNo); DEBUG({ - errs() << " result = "; - IntA.print(errs(), tri_); - errs() << '\n'; + dbgs() << " result = "; + IntA.print(dbgs(), tri_); + dbgs() << '\n'; }); ++numCommutes; @@ -1223,16 +1223,16 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, if (li_->hasInterval(RealDstReg) && RHS.overlaps(li_->getInterval(RealDstReg))) { DEBUG({ - errs() << "Interfere with register "; - li_->getInterval(RealDstReg).print(errs(), tri_); + dbgs() << "Interfere with register "; + li_->getInterval(RealDstReg).print(dbgs(), tri_); }); return false; // Not coalescable } for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - errs() << "Interfere with sub-register "; - li_->getInterval(*SR).print(errs(), tri_); + dbgs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(dbgs(), tri_); }); return false; // Not coalescable } @@ -1254,16 +1254,16 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, if (li_->hasInterval(RealSrcReg) && RHS.overlaps(li_->getInterval(RealSrcReg))) { DEBUG({ - errs() << "Interfere with register "; - li_->getInterval(RealSrcReg).print(errs(), tri_); + dbgs() << "Interfere with register "; + li_->getInterval(RealSrcReg).print(dbgs(), tri_); }); return false; // Not coalescable } for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - errs() << "Interfere with sub-register "; - li_->getInterval(*SR).print(errs(), tri_); + dbgs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(dbgs(), tri_); }); return false; // Not coalescable } @@ -1293,7 +1293,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) return false; // Already done. - DEBUG(errs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); + DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0; bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG; @@ -1313,7 +1313,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (SrcSubIdx && SrcSubIdx != DstSubIdx) { // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been // coalesced to a larger register so the subreg indices cancel out. - DEBUG(errs() << "\tSource of insert_subreg or subreg_to_reg is already " + DEBUG(dbgs() << "\tSource of insert_subreg or subreg_to_reg is already " "coalesced to another register.\n"); return false; // Not coalescable. } @@ -1329,7 +1329,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If they are already joined we continue. if (SrcReg == DstReg) { - DEBUG(errs() << "\tCopy already coalesced.\n"); + DEBUG(dbgs() << "\tCopy already coalesced.\n"); return false; // Not coalescable. } @@ -1338,17 +1338,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If they are both physical registers, we cannot join them. if (SrcIsPhys && DstIsPhys) { - DEBUG(errs() << "\tCan not coalesce physregs.\n"); + DEBUG(dbgs() << "\tCan not coalesce physregs.\n"); return false; // Not coalescable. } // We only join virtual registers with allocatable physical registers. if (SrcIsPhys && !allocatableRegs_[SrcReg]) { - DEBUG(errs() << "\tSrc reg is unallocatable physreg.\n"); + DEBUG(dbgs() << "\tSrc reg is unallocatable physreg.\n"); return false; // Not coalescable. } if (DstIsPhys && !allocatableRegs_[DstReg]) { - DEBUG(errs() << "\tDst reg is unallocatable physreg.\n"); + DEBUG(dbgs() << "\tDst reg is unallocatable physreg.\n"); return false; // Not coalescable. } @@ -1362,7 +1362,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx); assert(DstSubRC && "Illegal subregister index"); if (!DstSubRC->contains(SrcSubReg)) { - DEBUG(errs() << "\tIncompatible destination regclass: " + DEBUG(dbgs() << "\tIncompatible destination regclass: " << tri_->getName(SrcSubReg) << " not in " << DstSubRC->getName() << ".\n"); return false; // Not coalescable. @@ -1379,7 +1379,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx); assert(SrcSubRC && "Illegal subregister index"); if (!SrcSubRC->contains(DstSubReg)) { - DEBUG(errs() << "\tIncompatible source regclass: " + DEBUG(dbgs() << "\tIncompatible source regclass: " << tri_->getName(DstSubReg) << " not in " << SrcSubRC->getName() << ".\n"); (void)DstSubReg; @@ -1405,7 +1405,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been // coalesced to a larger register so the subreg indices cancel out. if (DstSubIdx != SubIdx) { - DEBUG(errs() << "\t Sub-register indices mismatch.\n"); + DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } else @@ -1418,7 +1418,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been // coalesced to a larger register so the subreg indices cancel out. if (SrcSubIdx != SubIdx) { - DEBUG(errs() << "\t Sub-register indices mismatch.\n"); + DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } else @@ -1427,7 +1427,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } else if ((DstIsPhys && isExtSubReg) || (SrcIsPhys && (isInsSubReg || isSubRegToReg))) { if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) { - DEBUG(errs() << "\tSrc of extract_subreg already coalesced with reg" + DEBUG(dbgs() << "\tSrc of extract_subreg already coalesced with reg" << " of a super-class.\n"); return false; // Not coalescable. } @@ -1451,7 +1451,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // class as the would be resulting register. SubIdx = 0; else { - DEBUG(errs() << "\t Sub-register indices mismatch.\n"); + DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } @@ -1463,7 +1463,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx); } if (!NewRC) { - DEBUG(errs() << "\t Conflicting sub-register indices.\n"); + DEBUG(dbgs() << "\t Conflicting sub-register indices.\n"); return false; // Not coalescable } @@ -1535,7 +1535,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } else if (!SrcIsPhys && !DstIsPhys) { NewRC = getCommonSubClass(SrcRC, DstRC); if (!NewRC) { - DEBUG(errs() << "\tDisjoint regclasses: " + DEBUG(dbgs() << "\tDisjoint regclasses: " << SrcRC->getName() << ", " << DstRC->getName() << ".\n"); return false; // Not coalescable. @@ -1551,7 +1551,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { (isExtSubReg || DstRC->isASubClass()) && !isWinToJoinCrossClass(LargeReg, SmallReg, allocatableRCRegs_[NewRC].count())) { - DEBUG(errs() << "\tSrc/Dest are different register classes.\n"); + DEBUG(dbgs() << "\tSrc/Dest are different register classes.\n"); // Allow the coalescer to try again in case either side gets coalesced to // a physical register that's compatible with the other side. e.g. // r1024 = MOV32to32_ r1025 @@ -1573,9 +1573,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { "Register mapping is horribly broken!"); DEBUG({ - errs() << "\t\tInspecting "; SrcInt.print(errs(), tri_); - errs() << " and "; DstInt.print(errs(), tri_); - errs() << ": "; + dbgs() << "\t\tInspecting "; SrcInt.print(dbgs(), tri_); + dbgs() << " and "; DstInt.print(dbgs(), tri_); + dbgs() << ": "; }); // Save a copy of the virtual register live interval. We'll manually @@ -1606,7 +1606,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg); ++numAborts; - DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1614,7 +1614,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg); ++numAborts; - DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1635,7 +1635,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { mri_->use_end()) / Length) < Ratio)) { mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); ++numAborts; - DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1654,7 +1654,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Only coalesce an empty interval (defined by implicit_def) with // another interval which has a valno defined by the CopyMI and the CopyMI // is a kill of the implicit def. - DEBUG(errs() << "Not profitable!\n"); + DEBUG(dbgs() << "Not profitable!\n"); return false; } @@ -1676,7 +1676,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } // Otherwise, we are unable to join the intervals. - DEBUG(errs() << "Interference!\n"); + DEBUG(dbgs() << "Interference!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1779,9 +1779,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } DEBUG({ - errs() << "\n\t\tJoined. Result = "; - ResDstInt->print(errs(), tri_); - errs() << "\n"; + dbgs() << "\n\t\tJoined. Result = "; + ResDstInt->print(dbgs(), tri_); + dbgs() << "\n"; }); ++numJoins; @@ -2134,8 +2134,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - errs() << "Interfere with sub-register "; - li_->getInterval(*SR).print(errs(), tri_); + dbgs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(dbgs(), tri_); }); return false; } @@ -2151,8 +2151,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - errs() << "Interfere with sub-register "; - li_->getInterval(*SR).print(errs(), tri_); + dbgs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(dbgs(), tri_); }); return false; } @@ -2413,7 +2413,7 @@ namespace { void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, std::vector<CopyRec> &TryAgain) { - DEBUG(errs() << MBB->getName() << ":\n"); + DEBUG(dbgs() << MBB->getName() << ":\n"); std::vector<CopyRec> VirtCopies; std::vector<CopyRec> PhysCopies; @@ -2478,7 +2478,7 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, } void SimpleRegisterCoalescing::joinIntervals() { - DEBUG(errs() << "********** JOINING INTERVALS ***********\n"); + DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); std::vector<CopyRec> TryAgainList; if (loopInfo->empty()) { @@ -2610,12 +2610,11 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, return NULL; } - void SimpleRegisterCoalescing::printRegName(unsigned reg) const { if (TargetRegisterInfo::isPhysicalRegister(reg)) - errs() << tri_->getName(reg); + dbgs() << tri_->getName(reg); else - errs() << "%reg" << reg; + dbgs() << "%reg" << reg; } void SimpleRegisterCoalescing::releaseMemory() { @@ -2634,7 +2633,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { AA = &getAnalysis<AliasAnalysis>(); loopInfo = &getAnalysis<MachineLoopInfo>(); - DEBUG(errs() << "********** SIMPLE REGISTER COALESCING **********\n" + DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" << "********** Function: " << ((Value*)mf_->getFunction())->getName() << '\n'); @@ -2648,11 +2647,11 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { if (EnableJoining) { joinIntervals(); DEBUG({ - errs() << "********** INTERVALS POST JOINING **********\n"; + dbgs() << "********** INTERVALS POST JOINING **********\n"; for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){ - I->second->print(errs(), tri_); - errs() << "\n"; + I->second->print(dbgs(), tri_); + dbgs() << "\n"; } }); } diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 6de03e1..9558933 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -381,9 +381,6 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { I->eraseFromParent(); } - - - // The entry block changes to have the eh.sjlj.setjmp, with a conditional // branch to a dispatch block for non-zero returns. If we return normally, // we're not handling an exception and just register the function context @@ -397,13 +394,15 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Insert a load in the Catch block, and a switch on its value. By default, // we go to a block that just does an unwind (which is the correct action // for a standard call). - BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwindbb", &F); + BasicBlock *UnwindBlock = + BasicBlock::Create(F.getContext(), "unwindbb", &F); Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock)); Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, DispatchBlock); SwitchInst *DispatchSwitch = - SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), DispatchBlock); + SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), + DispatchBlock); // Split the entry block to insert the conditional branch for the setjmp. BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), "eh.sjlj.setjmp.cont"); diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 782af12..b8f529b 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -192,18 +192,18 @@ void SlotIndexes::renumberIndexes() { void SlotIndexes::dump() const { for (const IndexListEntry *itr = front(); itr != getTail(); itr = itr->getNext()) { - errs() << itr->getIndex() << " "; + dbgs() << itr->getIndex() << " "; if (itr->getInstr() != 0) { - errs() << *itr->getInstr(); + dbgs() << *itr->getInstr(); } else { - errs() << "\n"; + dbgs() << "\n"; } } for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin(); itr != mbb2IdxMap.end(); ++itr) { - errs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - [" + dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - [" << itr->second.first << ", " << itr->second.second << "]\n"; } } @@ -217,7 +217,7 @@ void SlotIndex::print(raw_ostream &os) const { // Dump a SlotIndex to stderr. void SlotIndex::dump() const { - print(errs()); - errs() << "\n"; + print(dbgs()); + dbgs() << "\n"; } diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index bec9294..7ba4403 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -67,7 +67,7 @@ protected: /// immediately before each use, and stores after each def. No folding or /// remat is attempted. std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) { - DEBUG(errs() << "Spilling everywhere " << *li << "\n"); + DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); assert(li->weight != HUGE_VALF && "Attempting to spill already spilled value."); @@ -75,7 +75,7 @@ protected: assert(!li->isStackSlot() && "Trying to spill a stack slot."); - DEBUG(errs() << "Trivial spill everywhere of reg" << li->reg << "\n"); + DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n"); std::vector<LiveInterval*> added; @@ -89,7 +89,7 @@ protected: // Grab the use/def instr. MachineInstr *mi = &*regItr; - DEBUG(errs() << " Processing " << *mi); + DEBUG(dbgs() << " Processing " << *mi); // Step regItr to the next use/def instr. do { @@ -242,7 +242,7 @@ private: std::vector<LiveInterval*> tryVNISplit(LiveInterval *li, SlotIndex *earliestStart) { - DEBUG(errs() << "Trying VNI split of %reg" << *li << "\n"); + DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n"); std::vector<LiveInterval*> added; SmallVector<VNInfo*, 4> vnis; @@ -257,11 +257,11 @@ private: if (vni->isUnused() || vni->kills.empty()) continue; - DEBUG(errs() << " Extracted Val #" << vni->id << " as "); + DEBUG(dbgs() << " Extracted Val #" << vni->id << " as "); LiveInterval *splitInterval = extractVNI(li, vni); if (splitInterval != 0) { - DEBUG(errs() << *splitInterval << "\n"); + DEBUG(dbgs() << *splitInterval << "\n"); added.push_back(splitInterval); alreadySplit.insert(splitInterval); if (earliestStart != 0) { @@ -269,11 +269,11 @@ private: *earliestStart = splitInterval->beginIndex(); } } else { - DEBUG(errs() << "0\n"); + DEBUG(dbgs() << "0\n"); } } - DEBUG(errs() << "Original LI: " << *li << "\n"); + DEBUG(dbgs() << "Original LI: " << *li << "\n"); // If there original interval still contains some live ranges // add it to added and alreadySplit. diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index e8ee822..48bb5af 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -113,7 +113,7 @@ bool StackProtector::RequiresStackProtector() const { if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) { // We apparently only care about character arrays. - if (AT->getElementType() != Type::getInt8Ty(AT->getContext())) + if (!AT->getElementType()->isInteger(8)) continue; // If an array has more than SSPBufferSize bytes of allocated space, diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index fd25a37..2170703 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -200,7 +200,7 @@ void StackSlotColoring::InitializeSlots() { Assignments.resize(LastFI); // Gather all spill slots into a list. - DEBUG(errs() << "Spill slot intervals:\n"); + DEBUG(dbgs() << "Spill slot intervals:\n"); for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { LiveInterval &li = i->second; DEBUG(li.dump()); @@ -212,7 +212,7 @@ void StackSlotColoring::InitializeSlots() { OrigSizes[FI] = MFI->getObjectSize(FI); AllColors.set(FI); } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); // Sort them by weight. std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); @@ -244,7 +244,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, return false; bool Changed = false; - DEBUG(errs() << "Assigning unused registers to spill slots:\n"); + DEBUG(dbgs() << "Assigning unused registers to spill slots:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = li->getStackSlotIndex(); @@ -274,7 +274,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, AllColored = false; continue; } else { - DEBUG(errs() << "Assigning fi#" << RSS << " to " + DEBUG(dbgs() << "Assigning fi#" << RSS << " to " << TRI->getName(Reg) << '\n'); ColoredRegs.push_back(Reg); SlotMapping[RSS] = Reg; @@ -302,7 +302,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, ++NumEliminated; } } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); return Changed; } @@ -337,7 +337,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { // Record the assignment. Assignments[Color].push_back(li); int FI = li->getStackSlotIndex(); - DEBUG(errs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); + DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); // Change size and alignment of the allocated slot. If there are multiple // objects sharing the same slot, then make sure the size and alignment @@ -361,7 +361,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { BitVector SlotIsReg(NumObjs); BitVector UsedColors(NumObjs); - DEBUG(errs() << "Color spill slot intervals:\n"); + DEBUG(dbgs() << "Color spill slot intervals:\n"); bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; @@ -375,7 +375,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { Changed |= (SS != NewSS); } - DEBUG(errs() << "\nSpill slots after coloring:\n"); + DEBUG(dbgs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = li->getStackSlotIndex(); @@ -387,7 +387,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { #ifndef NDEBUG for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) DEBUG(SSIntervals[i]->dump()); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); #endif // Can we "color" a stack slot with a unused register? @@ -419,7 +419,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { // Delete unused stack slots. while (NextColor != -1) { - DEBUG(errs() << "Removing unused stack object fi#" << NextColor << "\n"); + DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); MFI->RemoveStackObject(NextColor); NextColor = AllColors.find_next(NextColor); } @@ -605,7 +605,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, MachineBasicBlock *MBB = MI->getParent(); if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) { if (PropagateForward(MI, MBB, DstReg, Reg)) { - DEBUG(errs() << "Eliminated load: "); + DEBUG(dbgs() << "Eliminated load: "); DEBUG(MI->dump()); ++NumLoadElim; } else { @@ -621,7 +621,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, } } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) { if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) { - DEBUG(errs() << "Eliminated store: "); + DEBUG(dbgs() << "Eliminated store: "); DEBUG(MI->dump()); ++NumStoreElim; } else { @@ -699,7 +699,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { - DEBUG(errs() << "********** Stack Slot Coloring **********\n"); + DEBUG(dbgs() << "********** Stack Slot Coloring **********\n"); MFI = MF.getFrameInfo(); MRI = &MF.getRegInfo(); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 3c13906..bd7cb75 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -555,7 +555,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { // Add the renaming set for this PHI node to our overall renaming information for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(), QE = PHIUnion.end(); QI != QE; ++QI) { - DEBUG(errs() << "Adding Renaming: " << QI->first << " -> " + DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> " << P->getOperand(0).getReg() << "\n"); } @@ -698,7 +698,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, TII->copyRegToReg(*PI->getParent(), PI, t, curr.second, RC, RC); - DEBUG(errs() << "Inserted copy from " << curr.second << " to " << t + DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t << "\n"); // Push temporary on Stacks @@ -715,7 +715,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, map[curr.first], RC, RC); map[curr.first] = curr.second; - DEBUG(errs() << "Inserted copy from " << curr.first << " to " + DEBUG(dbgs() << "Inserted copy from " << curr.first << " to " << curr.second << "\n"); // Push this copy onto InsertedPHICopies so we can @@ -928,7 +928,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { unsigned reg = OI->first; ++OI; I->second.erase(reg); - DEBUG(errs() << "Removing Renaming: " << reg << " -> " << I->first + DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first << "\n"); } } @@ -946,7 +946,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { while (I->second.size()) { std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin(); - DEBUG(errs() << "Renaming: " << SI->first << " -> " << I->first << "\n"); + DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n"); if (SI->first != I->first) { if (mergeLiveIntervals(I->first, SI->first)) { @@ -978,7 +978,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { R.valno->setCopy(--SI->second->getFirstTerminator()); R.valno->def = instrIdx.getDefIndex(); - DEBUG(errs() << "Renaming failed: " << SI->first << " -> " + DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> " << I->first << "\n"); } } diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index bf58902..f51f74d 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -139,8 +139,8 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { } } if (!Found) { - errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - errs() << " missing input from predecessor BB#" + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " missing input from predecessor BB#" << PredBB->getNumber() << '\n'; llvm_unreachable(0); } @@ -150,14 +150,14 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); if (CheckExtra && !Preds.count(PHIBB)) { // This is not a hard error. - errs() << "Warning: malformed PHI in BB#" << MBB->getNumber() + dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - errs() << " extra input from predecessor BB#" + dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() << '\n'; } if (PHIBB->getNumber() < 0) { - errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - errs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; llvm_unreachable(0); } } @@ -173,7 +173,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { bool MadeChange = false; if (PreRegAlloc && TailDupVerify) { - DEBUG(errs() << "\n*** Before tail-duplicating\n"); + DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); VerifyPHIs(MF, true); } @@ -253,7 +253,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { SSAUpdateVals.clear(); } - // Eliminate some of the copies inserted tail duplication to maintain + // Eliminate some of the copies inserted by tail duplication to maintain // SSA form. for (unsigned i = 0, e = Copies.size(); i != e; ++i) { MachineInstr *Copy = Copies[i]; @@ -346,7 +346,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, MachineBasicBlock *PredBB, MachineFunction &MF, DenseMap<unsigned, unsigned> &LocalVRMap) { - MachineInstr *NewMI = MF.CloneMachineInstr(MI); + MachineInstr *NewMI = TII->duplicate(MI, MF); for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); if (!MO.isReg()) @@ -437,8 +437,11 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies) { - // Don't try to tail-duplicate single-block loops. - if (TailBB->isSuccessor(TailBB)) + // Pre-regalloc tail duplication hurts compile time and doesn't help + // much except for indirect branches. + bool hasIndirectBranch = (!TailBB->empty() && + TailBB->back().getDesc().isIndirectBranch()); + if (PreRegAlloc && !hasIndirectBranch) return false; // Set the limit on the number of instructions to duplicate, with a default @@ -446,7 +449,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; - if (!TailBB->empty() && TailBB->back().getDesc().isIndirectBranch()) + if (hasIndirectBranch) // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there // are common paths through the code. The limit needs to be high enough @@ -457,6 +460,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, else MaxDuplicateCount = TailDuplicateSize; + // Don't try to tail-duplicate single-block loops. + if (TailBB->isSuccessor(TailBB)) + return false; + // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; @@ -481,7 +488,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, if (InstrCount > 1 && HasCall) return false; - DEBUG(errs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also @@ -510,7 +517,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) continue; - DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB + DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); TDBBs.push_back(PredBB); @@ -570,7 +577,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 && !TailBB->hasAddressTaken()) { - DEBUG(errs() << "\nMerging into block: " << *PrevBB + DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); if (PreRegAlloc) { DenseMap<unsigned, unsigned> LocalVRMap; @@ -620,7 +627,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, /// function, updating the CFG. void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); - DEBUG(errs() << "\nRemoving MBB: " << *MBB); + DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); // Remove all successors. while (!MBB->succ_empty()) diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 393e315..a0fccab 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -150,6 +150,13 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, MBB.insert(I, MI); } +MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig, + MachineFunction &MF) const { + assert(!Orig->getDesc().isNotDuplicable() && + "Instruction cannot be duplicated"); + return MF.CloneMachineInstr(Orig); +} + bool TargetInstrInfoImpl::isIdentical(const MachineInstr *MI, const MachineInstr *Other, diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 98b95ac..a3f6364 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -573,15 +573,15 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, MachineFunction::iterator &mbbi, unsigned RegB, unsigned RegC, unsigned Dist) { MachineInstr *MI = mi; - DEBUG(errs() << "2addr: COMMUTING : " << *MI); + DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(MI); if (NewMI == 0) { - DEBUG(errs() << "2addr: COMMUTING FAILED!\n"); + DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); return false; } - DEBUG(errs() << "2addr: COMMUTED TO: " << *NewMI); + DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI); // If the instruction changed to commute it, update livevar. if (NewMI != MI) { if (LV) @@ -628,8 +628,8 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, unsigned RegB, unsigned Dist) { MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); if (NewMI) { - DEBUG(errs() << "2addr: CONVERTING 2-ADDR: " << *mi); - DEBUG(errs() << "2addr: TO 3-ADDR: " << *NewMI); + DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); + DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; if (NewMI->findRegisterUseOperand(RegB, false, TRI)) @@ -891,7 +891,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, /// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DEBUG(errs() << "Machine Function\n"); + DEBUG(dbgs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); @@ -901,8 +901,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; - DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); - DEBUG(errs() << "********** Function: " + DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); + DEBUG(dbgs() << "********** Function: " << MF.getFunction()->getName() << '\n'); // ReMatRegs - Keep track of the registers whose def's are remat'ed. @@ -943,7 +943,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { if (FirstTied) { FirstTied = false; ++NumTwoAddressInstrs; - DEBUG(errs() << '\t' << *mi); + DEBUG(dbgs() << '\t' << *mi); } assert(mi->getOperand(SrcIdx).isReg() && @@ -1024,7 +1024,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DefMI->getDesc().isAsCheapAsAMove() && DefMI->isSafeToReMat(TII, regB, AA) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ - DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n"); + DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); ReMatRegs.set(regB); @@ -1040,7 +1040,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DistanceMap.insert(std::make_pair(prevMI, Dist)); DistanceMap[mi] = ++Dist; - DEBUG(errs() << "\t\tprepend:\t" << *prevMI); + DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI); MachineOperand &MO = mi->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && @@ -1085,7 +1085,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { MadeChange = true; - DEBUG(errs() << "\t\trewrite to:\t" << *mi); + DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } // Clear TiedOperands here instead of at the top of the loop diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index c8c5d86..d4fb2e4 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -278,5 +278,5 @@ void VirtRegMap::print(raw_ostream &OS, const Module* M) const { } void VirtRegMap::dump() const { - print(errs()); + print(dbgs()); } diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 054c3b6..df2b8d2 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -60,6 +60,33 @@ ScheduleSpills("schedule-spills", VirtRegRewriter::~VirtRegRewriter() {} +/// substitutePhysReg - Replace virtual register in MachineOperand with a +/// physical register. Do the right thing with the sub-register index. +static void substitutePhysReg(MachineOperand &MO, unsigned Reg, + const TargetRegisterInfo &TRI) { + if (unsigned SubIdx = MO.getSubReg()) { + // Insert the physical subreg and reset the subreg field. + MO.setReg(TRI.getSubReg(Reg, SubIdx)); + MO.setSubReg(0); + + // Any def, dead, and kill flags apply to the full virtual register, so they + // also apply to the full physical register. Add imp-def/dead and imp-kill + // as needed. + MachineInstr &MI = *MO.getParent(); + if (MO.isDef()) + if (MO.isDead()) + MI.addRegisterDead(Reg, &TRI, /*AddIfNotFound=*/ true); + else + MI.addRegisterDefined(Reg, &TRI); + else if (!MO.isUndef() && + (MO.isKill() || + MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0)))) + MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true); + } else { + MO.setReg(Reg); + } +} + namespace { /// This class is intended for use with the new spilling framework only. It @@ -69,10 +96,10 @@ struct TrivialRewriter : public VirtRegRewriter { bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, LiveIntervals* LIs) { - DEBUG(errs() << "********** REWRITE MACHINE CODE **********\n"); - DEBUG(errs() << "********** Function: " + DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n"); + DEBUG(dbgs() << "********** Function: " << MF.getFunction()->getName() << '\n'); - DEBUG(errs() << "**** Machine Instrs" + DEBUG(dbgs() << "**** Machine Instrs" << "(NOTE! Does not include spills and reloads!) ****\n"); DEBUG(MF.dump()); @@ -101,16 +128,13 @@ struct TrivialRewriter : public VirtRegRewriter { MachineOperand &mop = regItr.getOperand(); assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?"); ++regItr; - unsigned subRegIdx = mop.getSubReg(); - unsigned pRegOp = subRegIdx ? tri->getSubReg(pReg, subRegIdx) : pReg; - mop.setReg(pRegOp); - mop.setSubReg(0); + substitutePhysReg(mop, pReg, *tri); changed = true; } } } - DEBUG(errs() << "**** Post Machine Instrs ****\n"); + DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); DEBUG(MF.dump()); return changed; @@ -191,11 +215,11 @@ public: (unsigned)CanClobber; if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(errs() << "Remembering RM#" + DEBUG(dbgs() << "Remembering RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1); else - DEBUG(errs() << "Remembering SS#" << SlotOrReMat); - DEBUG(errs() << " in physreg " << TRI->getName(Reg) << "\n"); + DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat); + DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n"); } /// canClobberPhysRegForSS - Return true if the spiller is allowed to change @@ -647,12 +671,9 @@ static void ReMaterialize(MachineBasicBlock &MBB, if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) continue; assert(MO.isUse()); - unsigned SubIdx = MO.getSubReg(); unsigned Phys = VRM.getPhys(VirtReg); assert(Phys && "Virtual register is not assigned a register?"); - unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys; - MO.setReg(RReg); - MO.setSubReg(0); + substitutePhysReg(MO, Phys, *TRI); } ++NumReMats; } @@ -686,7 +707,7 @@ void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && "Bidirectional map mismatch!"); SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1; - DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg) + DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) << " copied, it is available for use but can no longer be modified\n"); } } @@ -711,12 +732,12 @@ void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && "Bidirectional map mismatch!"); SpillSlotsOrReMatsAvailable.erase(SlotOrReMat); - DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg) + DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) << " clobbered, invalidating "); if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(errs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n"); + DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n"); else - DEBUG(errs() << "SS#" << SlotOrReMat << "\n"); + DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n"); } } @@ -895,9 +916,9 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(errs() << '\t' << *prior(InsertLoc)); + DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - DEBUG(errs() << "Reuse undone!\n"); + DEBUG(dbgs() << "Reuse undone!\n"); --NumReused; // Finally, PhysReg is now available, go ahead and use it. @@ -1004,11 +1025,12 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, } static -void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg) { +void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg, + const TargetRegisterInfo &TRI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.getReg() == VirtReg) - MO.setReg(PhysReg); + substitutePhysReg(MO, PhysReg, TRI); } } @@ -1041,9 +1063,9 @@ public: TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); AllocatableRegs = TRI->getAllocatableSet(MF); - DEBUG(errs() << "\n**** Local spiller rewriting function '" + DEBUG(dbgs() << "\n**** Local spiller rewriting function '" << MF.getFunction()->getName() << "':\n"); - DEBUG(errs() << "**** Machine Instrs (NOTE! Does not include spills and" + DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and" " reloads!) ****\n"); DEBUG(MF.dump()); @@ -1095,7 +1117,7 @@ public: Spills.clear(); } - DEBUG(errs() << "**** Post Machine Instrs ****\n"); + DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); DEBUG(MF.dump()); // Mark unused spill slots. @@ -1175,7 +1197,7 @@ private: if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) llvm_unreachable("Unable unfold the load / store folding instruction!"); assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); VRM.transferRestorePts(&MI, NewMIs[0]); MII = MBB.insert(MII, NewMIs[0]); InvalidateKills(MI, TRI, RegKills, KillOps); @@ -1191,7 +1213,7 @@ private: if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) llvm_unreachable("Unable unfold the load / store folding instruction!"); assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); + AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); VRM.transferRestorePts(&NextMI, NewMIs[0]); MBB.insert(NextMII, NewMIs[0]); InvalidateKills(NextMI, TRI, RegKills, KillOps); @@ -1467,11 +1489,11 @@ private: TII->storeRegToStackSlot(MBB, llvm::next(MII), PhysReg, true, StackSlot, RC); MachineInstr *StoreMI = prior(oldNextMII); VRM.addSpillSlotUse(StackSlot, StoreMI); - DEBUG(errs() << "Store:\t" << *StoreMI); + DEBUG(dbgs() << "Store:\t" << *StoreMI); // If there is a dead store to this stack slot, nuke it now. if (LastStore) { - DEBUG(errs() << "Removed dead store:\t" << *LastStore); + DEBUG(dbgs() << "Removed dead store:\t" << *LastStore); ++NumDSE; SmallVector<unsigned, 2> KillRegs; InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs); @@ -1599,7 +1621,7 @@ private: AvailableSpills &Spills, BitVector &RegKills, std::vector<MachineOperand*> &KillOps) { - DEBUG(errs() << "\n**** Local spiller rewriting MBB '" + DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '" << MBB.getName() << "':\n"); MachineFunction &MF = *MBB.getParent(); @@ -1699,11 +1721,11 @@ private: // If the value is already available in the expected register, save // a reload / remat. if (SSorRMId) - DEBUG(errs() << "Reusing RM#" + DEBUG(dbgs() << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); else - DEBUG(errs() << "Reusing SS#" << SSorRMId); - DEBUG(errs() << " from physreg " + DEBUG(dbgs() << "Reusing SS#" << SSorRMId); + DEBUG(dbgs() << " from physreg " << TRI->getName(InReg) << " for vreg" << VirtReg <<" instead of reloading into physreg " << TRI->getName(Phys) << '\n'); @@ -1711,11 +1733,11 @@ private: continue; } else if (InReg && InReg != Phys) { if (SSorRMId) - DEBUG(errs() << "Reusing RM#" + DEBUG(dbgs() << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); else - DEBUG(errs() << "Reusing SS#" << SSorRMId); - DEBUG(errs() << " from physreg " + DEBUG(dbgs() << "Reusing SS#" << SSorRMId); + DEBUG(dbgs() << " from physreg " << TRI->getName(InReg) << " for vreg" << VirtReg <<" by copying it into physreg " << TRI->getName(Phys) << '\n'); @@ -1742,7 +1764,7 @@ private: KillOpnd->setIsKill(); UpdateKills(*CopyMI, TRI, RegKills, KillOps); - DEBUG(errs() << '\t' << *CopyMI); + DEBUG(dbgs() << '\t' << *CopyMI); ++NumCopified; continue; } @@ -1769,7 +1791,7 @@ private: Spills.addAvailable(SSorRMId, Phys); UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(errs() << '\t' << *prior(MII)); + DEBUG(dbgs() << '\t' << *prior(MII)); } } @@ -1789,7 +1811,7 @@ private: TII->storeRegToStackSlot(MBB, llvm::next(MII), Phys, isKill, StackSlot, RC); MachineInstr *StoreMI = prior(oldNextMII); VRM.addSpillSlotUse(StackSlot, StoreMI); - DEBUG(errs() << "Store:\t" << *StoreMI); + DEBUG(dbgs() << "Store:\t" << *StoreMI); VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); } NextMII = llvm::next(MII); @@ -1840,16 +1862,14 @@ private: RegInfo->setPhysRegUsed(Phys); if (MO.isDef()) ReusedOperands.markClobbered(Phys); - unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); + substitutePhysReg(MO, Phys, *TRI); if (VRM.isImplicitlyDefined(VirtReg)) // FIXME: Is this needed? BuildMI(MBB, &MI, MI.getDebugLoc(), - TII->get(TargetInstrInfo::IMPLICIT_DEF), RReg); + TII->get(TargetInstrInfo::IMPLICIT_DEF), Phys); continue; } - + // This virtual register is now known to be a spilled value. if (!MO.isUse()) continue; // Handle defs in the loop below (handle use&def here though) @@ -1908,11 +1928,11 @@ private: if (CanReuse) { // If this stack slot value is already available, reuse it! if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(errs() << "Reusing RM#" + DEBUG(dbgs() << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); else - DEBUG(errs() << "Reusing SS#" << ReuseSlot); - DEBUG(errs() << " from physreg " + DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); + DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) << " for vreg" << VirtReg <<" instead of reloading into physreg " << TRI->getName(VRM.getPhys(VirtReg)) << '\n'); @@ -1991,11 +2011,11 @@ private: if (DesignatedReg == PhysReg) { // If this stack slot value is already available, reuse it! if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(errs() << "Reusing RM#" + DEBUG(dbgs() << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); else - DEBUG(errs() << "Reusing SS#" << ReuseSlot); - DEBUG(errs() << " from physreg " << TRI->getName(PhysReg) + DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); + DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) << " for vreg" << VirtReg << " instead of reloading into same physreg.\n"); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; @@ -2029,7 +2049,7 @@ private: SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); - DEBUG(errs() << '\t' << *prior(MII)); + DEBUG(dbgs() << '\t' << *prior(MII)); ++NumReused; continue; } // if (PhysReg) @@ -2082,7 +2102,7 @@ private: } UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(errs() << '\t' << *prior(InsertLoc)); + DEBUG(dbgs() << '\t' << *prior(InsertLoc)); } unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); @@ -2096,7 +2116,7 @@ private: int PDSSlot = PotentialDeadStoreSlots[j]; MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; if (DeadStore) { - DEBUG(errs() << "Removed dead store:\t" << *DeadStore); + DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); InvalidateKills(*DeadStore, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(DeadStore); MBB.erase(DeadStore); @@ -2106,7 +2126,7 @@ private: } - DEBUG(errs() << '\t' << MI); + DEBUG(dbgs() << '\t' << MI); // If we have folded references to memory operands, make sure we clear all @@ -2116,7 +2136,7 @@ private: for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) { unsigned VirtReg = I->second.first; VirtRegMap::ModRef MR = I->second.second; - DEBUG(errs() << "Folded vreg: " << VirtReg << " MR: " << MR); + DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR); // MI2VirtMap be can updated which invalidate the iterator. // Increment the iterator first. @@ -2125,7 +2145,7 @@ private: if (SS == VirtRegMap::NO_STACK_SLOT) continue; FoldedSS.insert(SS); - DEBUG(errs() << " - StackSlot: " << SS << "\n"); + DEBUG(dbgs() << " - StackSlot: " << SS << "\n"); // If this folded instruction is just a use, check to see if it's a // straight load from the virt reg slot. @@ -2136,7 +2156,7 @@ private: // If this spill slot is available, turn it into a copy (or nothing) // instead of leaving it as a load! if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { - DEBUG(errs() << "Promoted Load To Copy: " << MI); + DEBUG(dbgs() << "Promoted Load To Copy: " << MI); if (DestReg != InReg) { const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC); @@ -2160,7 +2180,7 @@ private: BackTracked = true; } else { - DEBUG(errs() << "Removing now-noop copy: " << MI); + DEBUG(dbgs() << "Removing now-noop copy: " << MI); // Unset last kill since it's being reused. InvalidateKill(InReg, TRI, RegKills, KillOps); Spills.disallowClobberPhysReg(InReg); @@ -2230,7 +2250,7 @@ private: if (isDead) { // Previous store is dead. // If we get here, the store is dead, nuke it now. - DEBUG(errs() << "Removed dead store:\t" << *DeadStore); + DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); InvalidateKills(*DeadStore, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(DeadStore); MBB.erase(DeadStore); @@ -2301,7 +2321,7 @@ private: if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst && !MI.findRegisterUseOperand(Src)->isUndef()) { ++NumDCE; - DEBUG(errs() << "Removing now-noop copy: " << MI); + DEBUG(dbgs() << "Removing now-noop copy: " << MI); SmallVector<unsigned, 2> KillRegs; InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); if (MO.isDead() && !KillRegs.empty()) { @@ -2389,7 +2409,7 @@ private: unsigned Src, Dst, SrcSR, DstSR; if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) { ++NumDCE; - DEBUG(errs() << "Removing now-noop copy: " << MI); + DEBUG(dbgs() << "Removing now-noop copy: " << MI); InvalidateKills(MI, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(&MI); MBB.erase(&MI); diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index cb30748..89c4290 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -138,7 +138,7 @@ void *ExecutionEngineState::RemoveMapping( void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { MutexGuard locked(lock); - DEBUG(errs() << "JIT: Map \'" << GV->getName() + DEBUG(dbgs() << "JIT: Map \'" << GV->getName() << "\' to [" << Addr << "]\n";); void *&CurVal = EEState.getGlobalAddressMap(locked)[GV]; assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!"); @@ -246,13 +246,13 @@ static void *CreateArgv(LLVMContext &C, ExecutionEngine *EE, unsigned PtrSize = EE->getTargetData()->getPointerSize(); char *Result = new char[(InputArgv.size()+1)*PtrSize]; - DEBUG(errs() << "JIT: ARGV = " << (void*)Result << "\n"); + DEBUG(dbgs() << "JIT: ARGV = " << (void*)Result << "\n"); const Type *SBytePtr = Type::getInt8PtrTy(C); for (unsigned i = 0; i != InputArgv.size(); ++i) { unsigned Size = InputArgv[i].size()+1; char *Dest = new char[Size]; - DEBUG(errs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n"); + DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n"); std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest); Dest[Size-1] = 0; @@ -343,9 +343,7 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, // Check main() type unsigned NumArgs = Fn->getFunctionType()->getNumParams(); const FunctionType *FTy = Fn->getFunctionType(); - const Type* PPInt8Ty = - PointerType::getUnqual(PointerType::getUnqual( - Type::getInt8Ty(Fn->getContext()))); + const Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo(); switch (NumArgs) { case 3: if (FTy->getParamType(2) != PPInt8Ty) { @@ -358,13 +356,13 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, } // FALLS THROUGH case 1: - if (FTy->getParamType(0) != Type::getInt32Ty(Fn->getContext())) { + if (!FTy->getParamType(0)->isInteger(32)) { llvm_report_error("Invalid type for first argument of main() supplied"); } // FALLS THROUGH case 0: if (!isa<IntegerType>(FTy->getReturnType()) && - FTy->getReturnType() != Type::getVoidTy(FTy->getContext())) { + !FTy->getReturnType()->isVoidTy()) { llvm_report_error("Invalid return type of main() supplied"); } break; @@ -493,8 +491,22 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { /// @brief Get a GenericValue for a Constant* GenericValue ExecutionEngine::getConstantValue(const Constant *C) { // If its undefined, return the garbage. - if (isa<UndefValue>(C)) - return GenericValue(); + if (isa<UndefValue>(C)) { + GenericValue Result; + switch (C->getType()->getTypeID()) { + case Type::IntegerTyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + // Although the value is undefined, we still have to construct an APInt + // with the correct bit width. + Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0); + break; + default: + break; + } + return Result; + } // If the value is a ConstantExpr if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { @@ -620,13 +632,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { GV.DoubleVal = GV.IntVal.bitsToDouble(); break; case Type::FloatTyID: - assert(DestTy == Type::getInt32Ty(DestTy->getContext()) && - "Invalid bitcast"); + assert(DestTy->isInteger(32) && "Invalid bitcast"); GV.IntVal.floatToBits(GV.FloatVal); break; case Type::DoubleTyID: - assert(DestTy == Type::getInt64Ty(DestTy->getContext()) && - "Invalid bitcast"); + assert(DestTy->isInteger(64) && "Invalid bitcast"); GV.IntVal.doubleToBits(GV.DoubleVal); break; case Type::PointerTyID: @@ -832,7 +842,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, *((PointerTy*)Ptr) = Val.PointerVal; break; default: - errs() << "Cannot store value of type " << *Ty << "!\n"; + dbgs() << "Cannot store value of type " << *Ty << "!\n"; } if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian()) @@ -908,7 +918,7 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, // specified memory location... // void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { - DEBUG(errs() << "JIT: Initializing " << Addr << " "); + DEBUG(dbgs() << "JIT: Initializing " << Addr << " "); DEBUG(Init->dump()); if (isa<UndefValue>(Init)) { return; @@ -939,7 +949,7 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { return; } - errs() << "Bad Type: " << *Init->getType() << "\n"; + dbgs() << "Bad Type: " << *Init->getType() << "\n"; llvm_unreachable("Unknown constant type to initialize memory with!"); } diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 5901cd7..412b493 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -24,7 +24,7 @@ using namespace llvm; LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty, unsigned long long N, - int IsSigned) { + LLVMBool IsSigned) { GenericValue *GenVal = new GenericValue(); GenVal->IntVal = APInt(unwrap<IntegerType>(Ty)->getBitWidth(), N, IsSigned); return wrap(GenVal); @@ -56,7 +56,7 @@ unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef) { } unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenValRef, - int IsSigned) { + LLVMBool IsSigned) { GenericValue *GenVal = unwrap(GenValRef); if (IsSigned) return GenVal->IntVal.getSExtValue(); @@ -87,9 +87,9 @@ void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) { /*===-- Operations on execution engines -----------------------------------===*/ -int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, - LLVMModuleProviderRef MP, - char **OutError) { +LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, + LLVMModuleProviderRef MP, + char **OutError) { std::string Error; EngineBuilder builder(unwrap(MP)); builder.setEngineKind(EngineKind::Either) @@ -102,9 +102,9 @@ int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, return 1; } -int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, - LLVMModuleProviderRef MP, - char **OutError) { +LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, + LLVMModuleProviderRef MP, + char **OutError) { std::string Error; EngineBuilder builder(unwrap(MP)); builder.setEngineKind(EngineKind::Interpreter) @@ -117,10 +117,10 @@ int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, return 1; } -int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, - LLVMModuleProviderRef MP, - unsigned OptLevel, - char **OutError) { +LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, + LLVMModuleProviderRef MP, + unsigned OptLevel, + char **OutError) { std::string Error; EngineBuilder builder(unwrap(MP)); builder.setEngineKind(EngineKind::JIT) @@ -177,9 +177,9 @@ void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){ unwrap(EE)->addModuleProvider(unwrap(MP)); } -int LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, - LLVMModuleProviderRef MP, - LLVMModuleRef *OutMod, char **OutError) { +LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, + LLVMModuleProviderRef MP, + LLVMModuleRef *OutMod, char **OutError) { std::string Error; if (Module *Gone = unwrap(EE)->removeModuleProvider(unwrap(MP), &Error)) { *OutMod = wrap(Gone); @@ -190,8 +190,8 @@ int LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, return 1; } -int LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, - LLVMValueRef *OutFn) { +LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, + LLVMValueRef *OutFn) { if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) { *OutFn = wrap(F); return 0; diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index b59cfd1..73f5558 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -56,7 +56,7 @@ static void executeFAddInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(+, Float); IMPLEMENT_BINARY_OPERATOR(+, Double); default: - errs() << "Unhandled type for FAdd instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FAdd instruction: " << *Ty << "\n"; llvm_unreachable(0); } } @@ -67,7 +67,7 @@ static void executeFSubInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(-, Float); IMPLEMENT_BINARY_OPERATOR(-, Double); default: - errs() << "Unhandled type for FSub instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FSub instruction: " << *Ty << "\n"; llvm_unreachable(0); } } @@ -78,7 +78,7 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(*, Float); IMPLEMENT_BINARY_OPERATOR(*, Double); default: - errs() << "Unhandled type for FMul instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FMul instruction: " << *Ty << "\n"; llvm_unreachable(0); } } @@ -89,7 +89,7 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(/, Float); IMPLEMENT_BINARY_OPERATOR(/, Double); default: - errs() << "Unhandled type for FDiv instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FDiv instruction: " << *Ty << "\n"; llvm_unreachable(0); } } @@ -104,7 +104,7 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1, Dest.DoubleVal = fmod(Src1.DoubleVal, Src2.DoubleVal); break; default: - errs() << "Unhandled type for Rem instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n"; llvm_unreachable(0); } } @@ -131,7 +131,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(eq,Ty); IMPLEMENT_POINTER_ICMP(==); default: - errs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -144,7 +144,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ne,Ty); IMPLEMENT_POINTER_ICMP(!=); default: - errs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -157,7 +157,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ult,Ty); IMPLEMENT_POINTER_ICMP(<); default: - errs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -170,7 +170,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(slt,Ty); IMPLEMENT_POINTER_ICMP(<); default: - errs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -183,7 +183,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ugt,Ty); IMPLEMENT_POINTER_ICMP(>); default: - errs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -196,7 +196,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(sgt,Ty); IMPLEMENT_POINTER_ICMP(>); default: - errs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -209,7 +209,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ule,Ty); IMPLEMENT_POINTER_ICMP(<=); default: - errs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -222,7 +222,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(sle,Ty); IMPLEMENT_POINTER_ICMP(<=); default: - errs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -235,7 +235,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(uge,Ty); IMPLEMENT_POINTER_ICMP(>=); default: - errs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -248,7 +248,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(sge,Ty); IMPLEMENT_POINTER_ICMP(>=); default: - errs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n"; + dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -273,7 +273,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_UGE: R = executeICMP_UGE(Src1, Src2, Ty); break; case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break; default: - errs() << "Don't know how to handle this ICmp predicate!\n-->" << I; + dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I; llvm_unreachable(0); } @@ -292,7 +292,7 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(==, Float); IMPLEMENT_FCMP(==, Double); default: - errs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -306,7 +306,7 @@ static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(!=, Double); default: - errs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -319,7 +319,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(<=, Float); IMPLEMENT_FCMP(<=, Double); default: - errs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -332,7 +332,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(>=, Float); IMPLEMENT_FCMP(>=, Double); default: - errs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -345,7 +345,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(<, Float); IMPLEMENT_FCMP(<, Double); default: - errs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -358,7 +358,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(>, Float); IMPLEMENT_FCMP(>, Double); default: - errs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n"; + dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n"; llvm_unreachable(0); } return Dest; @@ -467,7 +467,7 @@ void Interpreter::visitFCmpInst(FCmpInst &I) { case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break; case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break; default: - errs() << "Don't know how to handle this FCmp predicate!\n-->" << I; + dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I; llvm_unreachable(0); } @@ -513,7 +513,7 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, return Result; } default: - errs() << "Unhandled Cmp predicate\n"; + dbgs() << "Unhandled Cmp predicate\n"; llvm_unreachable(0); } } @@ -542,7 +542,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break; case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break; default: - errs() << "Don't know how to handle this binary operator!\n-->" << I; + dbgs() << "Don't know how to handle this binary operator!\n-->" << I; llvm_unreachable(0); } @@ -602,7 +602,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy, ExecutionContext &CallingSF = ECStack.back(); if (Instruction *I = CallingSF.Caller.getInstruction()) { // Save result... - if (CallingSF.Caller.getType() != Type::getVoidTy(RetTy->getContext())) + if (!CallingSF.Caller.getType()->isVoidTy()) SetValue(I, Result, CallingSF); if (InvokeInst *II = dyn_cast<InvokeInst> (I)) SwitchToNewBasicBlock (II->getNormalDest (), CallingSF); @@ -744,7 +744,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) { // Allocate enough memory to hold the type... void *Memory = malloc(MemToAlloc); - DEBUG(errs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " + DEBUG(dbgs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " << NumElements << " (Total: " << MemToAlloc << ") at " << uintptr_t(Memory) << '\n'); @@ -794,7 +794,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, GenericValue Result; Result.PointerVal = ((char*)getOperandValue(Ptr, SF).PointerVal) + Total; - DEBUG(errs() << "GEP Index " << Total << " bytes.\n"); + DEBUG(dbgs() << "GEP Index " << Total << " bytes.\n"); return Result; } @@ -812,7 +812,7 @@ void Interpreter::visitLoadInst(LoadInst &I) { LoadValueFromMemory(Result, Ptr, I.getType()); SetValue(&I, Result, SF); if (I.isVolatile() && PrintVolatile) - errs() << "Volatile load " << I; + dbgs() << "Volatile load " << I; } void Interpreter::visitStoreInst(StoreInst &I) { @@ -822,7 +822,7 @@ void Interpreter::visitStoreInst(StoreInst &I) { StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC), I.getOperand(0)->getType()); if (I.isVolatile() && PrintVolatile) - errs() << "Volatile store: " << I; + dbgs() << "Volatile store: " << I; } //===----------------------------------------------------------------------===// @@ -1164,7 +1164,7 @@ void Interpreter::visitVAArgInst(VAArgInst &I) { IMPLEMENT_VAARG(Float); IMPLEMENT_VAARG(Double); default: - errs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n"; + dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n"; llvm_unreachable(0); } @@ -1251,7 +1251,7 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue()); break; default: - errs() << "Unhandled ConstantExpr: " << *CE << "\n"; + dbgs() << "Unhandled ConstantExpr: " << *CE << "\n"; llvm_unreachable(0); return GenericValue(); } @@ -1324,24 +1324,24 @@ void Interpreter::run() { // Track the number of dynamic instructions executed. ++NumDynamicInsts; - DEBUG(errs() << "About to interpret: " << I); + DEBUG(dbgs() << "About to interpret: " << I); visit(I); // Dispatch to one of the visit* methods... #if 0 // This is not safe, as visiting the instruction could lower it and free I. DEBUG( if (!isa<CallInst>(I) && !isa<InvokeInst>(I) && I.getType() != Type::VoidTy) { - errs() << " --> "; + dbgs() << " --> "; const GenericValue &Val = SF.Values[&I]; switch (I.getType()->getTypeID()) { default: llvm_unreachable("Invalid GenericValue Type"); - case Type::VoidTyID: errs() << "void"; break; - case Type::FloatTyID: errs() << "float " << Val.FloatVal; break; - case Type::DoubleTyID: errs() << "double " << Val.DoubleVal; break; - case Type::PointerTyID: errs() << "void* " << intptr_t(Val.PointerVal); + case Type::VoidTyID: dbgs() << "void"; break; + case Type::FloatTyID: dbgs() << "float " << Val.FloatVal; break; + case Type::DoubleTyID: dbgs() << "double " << Val.DoubleVal; break; + case Type::PointerTyID: dbgs() << "void* " << intptr_t(Val.PointerVal); break; case Type::IntegerTyID: - errs() << "i" << Val.IntVal.getBitWidth() << " " + dbgs() << "i" << Val.IntVal.getBitWidth() << " " << Val.IntVal.toStringUnsigned(10) << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n"; break; diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index ebc2567..faf724f 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -411,11 +411,10 @@ GenericValue JIT::runFunction(Function *F, // Handle some common cases first. These cases correspond to common `main' // prototypes. - if (RetTy == Type::getInt32Ty(F->getContext()) || - RetTy == Type::getVoidTy(F->getContext())) { + if (RetTy->isInteger(32) || RetTy->isVoidTy()) { switch (ArgValues.size()) { case 3: - if (FTy->getParamType(0) == Type::getInt32Ty(F->getContext()) && + if (FTy->getParamType(0)->isInteger(32) && isa<PointerType>(FTy->getParamType(1)) && isa<PointerType>(FTy->getParamType(2))) { int (*PF)(int, char **, const char **) = @@ -430,7 +429,7 @@ GenericValue JIT::runFunction(Function *F, } break; case 2: - if (FTy->getParamType(0) == Type::getInt32Ty(F->getContext()) && + if (FTy->getParamType(0)->isInteger(32) && isa<PointerType>(FTy->getParamType(1))) { int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; @@ -443,7 +442,7 @@ GenericValue JIT::runFunction(Function *F, break; case 1: if (FTy->getNumParams() == 1 && - FTy->getParamType(0) == Type::getInt32Ty(F->getContext())) { + FTy->getParamType(0)->isInteger(32)) { GenericValue rv; int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); @@ -548,7 +547,7 @@ GenericValue JIT::runFunction(Function *F, "", StubBB); TheCall->setCallingConv(F->getCallingConv()); TheCall->setTailCall(); - if (TheCall->getType() != Type::getVoidTy(F->getContext())) + if (!TheCall->getType()->isVoidTy()) // Return result of the call. ReturnInst::Create(F->getContext(), TheCall, StubBB); else diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index ef323b5..0f604ac 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -377,7 +377,7 @@ namespace { MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); if (jit.getJITInfo().needsGOT()) { MemMgr->AllocateGOT(); - DEBUG(errs() << "JIT is managing a GOT\n"); + DEBUG(dbgs() << "JIT is managing a GOT\n"); } if (DwarfExceptionHandling || JITEmitDebugInfo) { @@ -431,7 +431,7 @@ namespace { if (MBBLocations.size() <= (unsigned)MBB->getNumber()) MBBLocations.resize((MBB->getNumber()+1)*2); MBBLocations[MBB->getNumber()] = getCurrentPCValue(); - DEBUG(errs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" + DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" << (void*) getCurrentPCValue() << "]\n"); } @@ -547,7 +547,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) { TheJIT->updateGlobalMapping(F, Stub); } - DEBUG(errs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '" + DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '" << F->getName() << "'\n"); // Finally, keep track of the stub-to-Function mapping so that the @@ -577,7 +577,7 @@ void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) { IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress, JE); - DEBUG(errs() << "JIT: Indirect symbol emitted at [" << IndirectSym + DEBUG(dbgs() << "JIT: Indirect symbol emitted at [" << IndirectSym << "] for GV '" << GV->getName() << "'\n"); return IndirectSym; @@ -595,7 +595,7 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) { Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE); JE.finishGVStub(); - DEBUG(errs() << "JIT: Stub emitted at [" << Stub + DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub << "] for external function at '" << FnAddr << "'\n"); return Stub; } @@ -605,7 +605,7 @@ unsigned JITResolver::getGOTIndexForAddr(void* addr) { if (!idx) { idx = ++nextGOTIndex; revGOTMap[addr] = idx; - DEBUG(errs() << "JIT: Adding GOT entry " << idx << " for addr [" + DEBUG(dbgs() << "JIT: Adding GOT entry " << idx << " for addr [" << addr << "]\n"); } return idx; @@ -701,7 +701,7 @@ void *JITResolver::JITCompilerFn(void *Stub) { + F->getName() + "' when lazy compiles are disabled!"); } - DEBUG(errs() << "JIT: Lazily resolving function '" << F->getName() + DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName() << "' In stub ptr = " << Stub << " actual ptr = " << ActualPtr << "\n"); @@ -864,7 +864,7 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) { size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy); size_t GVAlign = (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV); - DEBUG(errs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign); + DEBUG(dbgs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign); DEBUG(GV->dump()); // Assume code section ends with worst possible alignment, so first // variable needs maximal padding. @@ -992,7 +992,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { } } } - DEBUG(errs() << "JIT: About to look through initializers\n"); + DEBUG(dbgs() << "JIT: About to look through initializers\n"); // Look for more globals that are referenced only from initializers. // GVSet.end is computed each time because the set can grow as we go. for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin(); @@ -1006,14 +1006,14 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { } void JITEmitter::startFunction(MachineFunction &F) { - DEBUG(errs() << "JIT: Starting CodeGen of Function " + DEBUG(dbgs() << "JIT: Starting CodeGen of Function " << F.getFunction()->getName() << "\n"); uintptr_t ActualSize = 0; // Set the memory writable, if it's not already MemMgr->setMemoryWritable(); if (MemMgr->NeedsExactSize()) { - DEBUG(errs() << "JIT: ExactSize\n"); + DEBUG(dbgs() << "JIT: ExactSize\n"); const TargetInstrInfo* TII = F.getTarget().getInstrInfo(); MachineJumpTableInfo *MJTI = F.getJumpTableInfo(); MachineConstantPool *MCP = F.getConstantPool(); @@ -1040,12 +1040,12 @@ void JITEmitter::startFunction(MachineFunction &F) { // Add the function size ActualSize += TII->GetFunctionSizeInBytes(F); - DEBUG(errs() << "JIT: ActualSize before globals " << ActualSize << "\n"); + DEBUG(dbgs() << "JIT: ActualSize before globals " << ActualSize << "\n"); // Add the size of the globals that will be allocated after this function. // These are all the ones referenced from this function that were not // previously allocated. ActualSize += GetSizeOfGlobalsInBytes(F); - DEBUG(errs() << "JIT: ActualSize after globals " << ActualSize << "\n"); + DEBUG(dbgs() << "JIT: ActualSize after globals " << ActualSize << "\n"); } else if (SizeEstimate > 0) { // SizeEstimate will be non-zero on reallocation attempts. ActualSize = SizeEstimate; @@ -1104,7 +1104,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { if (MR.isExternalSymbol()) { ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(), false); - DEBUG(errs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to [" + DEBUG(dbgs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to [" << ResultPtr << "]\n"); // If the target REALLY wants a stub for this function, emit it now. @@ -1136,7 +1136,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr); MR.setGOTIndex(idx); if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) { - DEBUG(errs() << "JIT: GOT was out of date for " << ResultPtr + DEBUG(dbgs() << "JIT: GOT was out of date for " << ResultPtr << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] << "\n"); ((void**)MemMgr->getGOTBase())[idx] = ResultPtr; @@ -1153,7 +1153,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { if (MemMgr->isManagingGOT()) { unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin); if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) { - DEBUG(errs() << "JIT: GOT was out of date for " << (void*)BufferBegin + DEBUG(dbgs() << "JIT: GOT was out of date for " << (void*)BufferBegin << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] << "\n"); ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin; @@ -1182,7 +1182,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart, EmissionDetails); - DEBUG(errs() << "JIT: Finished CodeGen of [" << (void*)FnStart + DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart << "] Function: " << F.getFunction()->getName() << ": " << (FnEnd-FnStart) << " bytes of text, " << Relocations.size() << " relocations\n"); @@ -1195,31 +1195,31 @@ bool JITEmitter::finishFunction(MachineFunction &F) { DEBUG( if (sys::hasDisassembler()) { - errs() << "JIT: Disassembled code:\n"; - errs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart, + dbgs() << "JIT: Disassembled code:\n"; + dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart, (uintptr_t)FnStart); } else { - errs() << "JIT: Binary code:\n"; + dbgs() << "JIT: Binary code:\n"; uint8_t* q = FnStart; for (int i = 0; q < FnEnd; q += 4, ++i) { if (i == 4) i = 0; if (i == 0) - errs() << "JIT: " << (long)(q - FnStart) << ": "; + dbgs() << "JIT: " << (long)(q - FnStart) << ": "; bool Done = false; for (int j = 3; j >= 0; --j) { if (q + j >= FnEnd) Done = true; else - errs() << (unsigned short)q[j]; + dbgs() << (unsigned short)q[j]; } if (Done) break; - errs() << ' '; + dbgs() << ' '; if (i == 3) - errs() << '\n'; + dbgs() << '\n'; } - errs()<< '\n'; + dbgs()<< '\n'; } ); @@ -1268,7 +1268,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { } void JITEmitter::retryWithMoreMemory(MachineFunction &F) { - DEBUG(errs() << "JIT: Ran out of space for native code. Reattempting.\n"); + DEBUG(dbgs() << "JIT: Ran out of space for native code. Reattempting.\n"); Relocations.clear(); // Clear the old relocations or we'll reapply them. ConstPoolAddresses.clear(); ++NumRetries; @@ -1319,7 +1319,7 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { // in the JITResolver. Were there a memory manager deallocateStub routine, // we could call that at this point too. if (FnRefs.empty()) { - DEBUG(errs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n"); + DEBUG(dbgs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n"); StubFnRefs.erase(Stub); // Invalidate the stub. If it is a GV stub, update the JIT's global @@ -1365,7 +1365,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { if (ConstantPoolBase == 0) return; // Buffer overflow. - DEBUG(errs() << "JIT: Emitted constant pool at [" << ConstantPoolBase + DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase << "] (size: " << Size << ", alignment: " << Align << ")\n"); // Initialize the memory for all of the constant pool entries. @@ -1383,8 +1383,8 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { "entry has not been implemented!"); } TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr); - DEBUG(errs() << "JIT: CP" << i << " at [0x"; - errs().write_hex(CAddr) << "]\n"); + DEBUG(dbgs() << "JIT: CP" << i << " at [0x"; + dbgs().write_hex(CAddr) << "]\n"); const Type *Ty = CPE.Val.ConstVal->getType(); Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty); diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 80cb999..a17caa1 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -352,7 +352,7 @@ namespace { // another block of memory and add it to the free list. if (largest < ActualSize || largest <= FreeRangeHeader::getMinBlockSize()) { - DEBUG(errs() << "JIT: Allocating another slab of memory for function."); + DEBUG(dbgs() << "JIT: Allocating another slab of memory for function."); candidateBlock = allocateNewCodeSlab((size_t)ActualSize); } diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp index 52a8f71..d01c4b2 100644 --- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp @@ -50,9 +50,9 @@ OProfileJITEventListener::OProfileJITEventListener() : Agent(op_open_agent()) { if (Agent == NULL) { const std::string err_str = sys::StrError(); - DEBUG(errs() << "Failed to connect to OProfile agent: " << err_str << "\n"); + DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n"); } else { - DEBUG(errs() << "Connected to OProfile agent.\n"); + DEBUG(dbgs() << "Connected to OProfile agent.\n"); } } @@ -60,10 +60,10 @@ OProfileJITEventListener::~OProfileJITEventListener() { if (Agent != NULL) { if (op_close_agent(Agent) == -1) { const std::string err_str = sys::StrError(); - DEBUG(errs() << "Failed to disconnect from OProfile agent: " + DEBUG(dbgs() << "Failed to disconnect from OProfile agent: " << err_str << "\n"); } else { - DEBUG(errs() << "Disconnected from OProfile agent.\n"); + DEBUG(dbgs() << "Disconnected from OProfile agent.\n"); } } } @@ -92,7 +92,7 @@ static debug_line_info LineStartToOProfileFormat( const DebugLocTuple &tuple = MF.getDebugLocTuple(Loc); Result.lineno = tuple.Line; Result.filename = Filenames.getFilename(tuple.Scope); - DEBUG(errs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to " + DEBUG(dbgs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to " << Result.filename << ":" << Result.lineno << "\n"); return Result; } @@ -105,7 +105,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted( if (op_write_native_code(Agent, F.getName().data(), reinterpret_cast<uint64_t>(FnStart), FnStart, FnSize) == -1) { - DEBUG(errs() << "Failed to tell OProfile about native function " + DEBUG(dbgs() << "Failed to tell OProfile about native function " << F.getName() << " at [" << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); return; @@ -133,7 +133,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted( if (!LineInfo.empty()) { if (op_write_debug_line_info(Agent, FnStart, LineInfo.size(), &*LineInfo.begin()) == -1) { - DEBUG(errs() + DEBUG(dbgs() << "Failed to tell OProfile about line numbers for native function " << F.getName() << " at [" << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); @@ -145,7 +145,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted( void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) { assert(FnStart && "Invalid function pointer"); if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) { - DEBUG(errs() + DEBUG(dbgs() << "Failed to tell OProfile about unload of native function at " << FnStart << "\n"); } diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 104cbe9..dcd696c 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -25,6 +25,7 @@ #include "llvm/ValueSymbolTable.h" #include "llvm/Instructions.h" #include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Path.h" @@ -144,7 +145,7 @@ protected: // for debugging... virtual void dump() const { - errs() << "AbstractTypeSet!\n"; + dbgs() << "AbstractTypeSet!\n"; } }; } @@ -337,11 +338,11 @@ static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) { static void PrintMap(const std::map<const Value*, Value*> &M) { for (std::map<const Value*, Value*>::const_iterator I = M.begin(), E =M.end(); I != E; ++I) { - errs() << " Fr: " << (void*)I->first << " "; + dbgs() << " Fr: " << (void*)I->first << " "; I->first->dump(); - errs() << " To: " << (void*)I->second << " "; + dbgs() << " To: " << (void*)I->second << " "; I->second->dump(); - errs() << "\n"; + dbgs() << "\n"; } } #endif @@ -404,10 +405,10 @@ static Value *RemapOperand(const Value *In, } #ifndef NDEBUG - errs() << "LinkModules ValueMap: \n"; + dbgs() << "LinkModules ValueMap: \n"; PrintMap(ValueMap); - errs() << "Couldn't remap value: " << (void*)In << " " << *In << "\n"; + dbgs() << "Couldn't remap value: " << (void*)In << " " << *In << "\n"; llvm_unreachable("Couldn't remap value!"); #endif return 0; @@ -854,9 +855,14 @@ static bool LinkAlias(Module *Dest, const Module *Src, } else { // No linking to be performed, simply create an identical version of the // alias over in the dest module... - + Constant *Aliasee = DAliasee; + // Fixup aliases to bitcasts. Note that aliases to GEPs are still broken + // by this, but aliases to GEPs are broken to a lot of other things, so + // it's less important. + if (SGA->getType() != DAliasee->getType()) + Aliasee = ConstantExpr::getBitCast(DAliasee, SGA->getType()); NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(), - SGA->getName(), DAliasee, Dest); + SGA->getName(), Aliasee, Dest); CopyGVAttributes(NewGA, SGA); // Proceed to 'common' steps @@ -1222,9 +1228,15 @@ static bool LinkAppendingVars(Module *M, static bool ResolveAliases(Module *Dest) { for (Module::alias_iterator I = Dest->alias_begin(), E = Dest->alias_end(); I != E; ++I) - if (const GlobalValue *GV = I->resolveAliasedGlobal()) - if (GV != I && !GV->isDeclaration()) - I->replaceAllUsesWith(const_cast<GlobalValue*>(GV)); + // We can't sue resolveGlobalAlias here because we need to preserve + // bitcasts and GEPs. + if (const Constant *C = I->getAliasee()) { + while (dyn_cast<GlobalAlias>(C)) + C = cast<GlobalAlias>(C)->getAliasee(); + const GlobalValue *GV = dyn_cast<GlobalValue>(C); + if (C != I && !(GV && GV->isDeclaration())) + I->replaceAllUsesWith(const_cast<Constant*>(C)); + } return false; } diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index a5a2256..a19ec19 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -11,6 +11,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -108,8 +109,8 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI) const { } void MCExpr::dump() const { - print(errs(), 0); - errs() << '\n'; + print(dbgs(), 0); + dbgs() << '\n'; } /* *** */ diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp index d050318..7c7a644 100644 --- a/lib/MC/MCInst.cpp +++ b/lib/MC/MCInst.cpp @@ -9,6 +9,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCExpr.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -31,8 +32,8 @@ void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const { } void MCOperand::dump() const { - print(errs(), 0); - errs() << "\n"; + print(dbgs(), 0); + dbgs() << "\n"; } void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const { @@ -45,6 +46,6 @@ void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const { } void MCInst::dump() const { - print(errs(), 0); - errs() << "\n"; + print(dbgs(), 0); + dbgs() << "\n"; } diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index c6812ed..4d520ec 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/MC/MCAsmInfo.h" - using namespace llvm; MCSectionELF *MCSectionELF:: @@ -23,7 +23,7 @@ Create(StringRef Section, unsigned Type, unsigned Flags, // ShouldOmitSectionDirective - Decides whether a '.section' directive // should be printed before the section name bool MCSectionELF::ShouldOmitSectionDirective(const char *Name, - const MCAsmInfo &MAI) const { + const MCAsmInfo &MAI) const { // FIXME: Does .section .bss/.data/.text work everywhere?? if (strcmp(Name, ".text") == 0 || @@ -37,7 +37,6 @@ bool MCSectionELF::ShouldOmitSectionDirective(const char *Name, // ShouldPrintSectionType - Only prints the section type if supported bool MCSectionELF::ShouldPrintSectionType(unsigned Ty) const { - if (IsExplicit && !(Ty == SHT_NOBITS || Ty == SHT_PROGBITS)) return false; diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index b145d07..265d06c 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -9,6 +9,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -51,11 +52,14 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) { return false; } -static void PrintMangledName(raw_ostream &OS, StringRef Str, - const MCAsmInfo &MAI) { +/// printMangledName - Print the specified string in mangled form if it uses +/// any unusual characters. +void MCSymbol::printMangledName(StringRef Str, raw_ostream &OS, + const MCAsmInfo *MAI) { // The first character is not allowed to be a number unless the target // explicitly allows it. - if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') { + if ((MAI == 0 || !MAI->doesAllowNameToStartWithDigit()) && + Str[0] >= '0' && Str[0] <= '9') { MangleLetter(OS, Str[0]); Str = Str.substr(1); } @@ -94,7 +98,7 @@ void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const { // On systems that do not allow quoted names, print with mangling. if (!MAI->doesAllowQuotesInName()) - return PrintMangledName(OS, getName(), *MAI); + return printMangledName(getName(), OS, MAI); // If the string contains a double quote or newline, we still have to mangle // it. @@ -106,5 +110,5 @@ void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const { } void MCSymbol::dump() const { - print(errs(), 0); + print(dbgs(), 0); } diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp index 69bd10c..c1222ec 100644 --- a/lib/MC/MCValue.cpp +++ b/lib/MC/MCValue.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -30,5 +31,5 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const { } void MCValue::dump() const { - print(errs(), 0); + print(dbgs(), 0); } diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 9532e1e..9d14684 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1580,12 +1580,12 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, uint64_t b = uint64_t(1) << 32; #if 0 - DEBUG(errs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); - DEBUG(errs() << "KnuthDiv: original:"); - DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); - DEBUG(errs() << " by"); - DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); + DEBUG(dbgs() << "KnuthDiv: original:"); + DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); + DEBUG(dbgs() << " by"); + DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); + DEBUG(dbgs() << '\n'); #endif // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of // u and v by d. Note that we have taken Knuth's advice here to use a power @@ -1612,17 +1612,17 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, } u[m+n] = u_carry; #if 0 - DEBUG(errs() << "KnuthDiv: normal:"); - DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); - DEBUG(errs() << " by"); - DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << "KnuthDiv: normal:"); + DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); + DEBUG(dbgs() << " by"); + DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); + DEBUG(dbgs() << '\n'); #endif // D2. [Initialize j.] Set j to m. This is the loop counter over the places. int j = m; do { - DEBUG(errs() << "KnuthDiv: quotient digit #" << j << '\n'); + DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); // D3. [Calculate q'.]. // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') @@ -1632,7 +1632,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // value qp is one too large, and it eliminates all cases where qp is two // too large. uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]); - DEBUG(errs() << "KnuthDiv: dividend == " << dividend << '\n'); + DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); uint64_t qp = dividend / v[n-1]; uint64_t rp = dividend % v[n-1]; if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { @@ -1641,7 +1641,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) qp--; } - DEBUG(errs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); + DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation @@ -1652,7 +1652,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32); uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]); bool borrow = subtrahend > u_tmp; - DEBUG(errs() << "KnuthDiv: u_tmp == " << u_tmp + DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp << ", subtrahend == " << subtrahend << ", borrow = " << borrow << '\n'); @@ -1666,12 +1666,12 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, k++; } isNeg |= borrow; - DEBUG(errs() << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " << + DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " << u[j+i+1] << '\n'); } - DEBUG(errs() << "KnuthDiv: after subtraction:"); - DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << "KnuthDiv: after subtraction:"); + DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); + DEBUG(dbgs() << '\n'); // The digits (u[j+n]...u[j]) should be kept positive; if the result of // this step is actually negative, (u[j+n]...u[j]) should be left as the // true value plus b**(n+1), namely as the b's complement of @@ -1684,9 +1684,9 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, carry = carry && u[i] == 0; } } - DEBUG(errs() << "KnuthDiv: after complement:"); - DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << "KnuthDiv: after complement:"); + DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); + DEBUG(dbgs() << '\n'); // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was // negative, go to step D6; otherwise go on to step D7. @@ -1707,16 +1707,16 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, } u[j+n] += carry; } - DEBUG(errs() << "KnuthDiv: after correction:"); - DEBUG(for (int i = m+n; i >=0; i--) errs() <<" " << u[i]); - DEBUG(errs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); + DEBUG(dbgs() << "KnuthDiv: after correction:"); + DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]); + DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. } while (--j >= 0); - DEBUG(errs() << "KnuthDiv: quotient:"); - DEBUG(for (int i = m; i >=0; i--) errs() <<" " << q[i]); - DEBUG(errs() << '\n'); + DEBUG(dbgs() << "KnuthDiv: quotient:"); + DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]); + DEBUG(dbgs() << '\n'); // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired // remainder may be obtained by dividing u[...] by d. If r is non-null we @@ -1727,22 +1727,22 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // shift right here. In order to mak if (shift) { unsigned carry = 0; - DEBUG(errs() << "KnuthDiv: remainder:"); + DEBUG(dbgs() << "KnuthDiv: remainder:"); for (int i = n-1; i >= 0; i--) { r[i] = (u[i] >> shift) | carry; carry = u[i] << (32 - shift); - DEBUG(errs() << " " << r[i]); + DEBUG(dbgs() << " " << r[i]); } } else { for (int i = n-1; i >= 0; i--) { r[i] = u[i]; - DEBUG(errs() << " " << r[i]); + DEBUG(dbgs() << " " << r[i]); } } - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); } #if 0 - DEBUG(errs() << '\n'); + DEBUG(dbgs() << '\n'); #endif } @@ -2191,7 +2191,7 @@ void APInt::dump() const { SmallString<40> S, U; this->toStringUnsigned(U); this->toStringSigned(S); - errs() << "APInt(" << BitWidth << "b, " + dbgs() << "APInt(" << BitWidth << "b, " << U.str() << "u " << S.str() << "s)"; } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index b6c0e08..fa692be8 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -354,7 +354,7 @@ static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value, // we don't need to pass argc/argv in. assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired && "Option can not be cl::Grouping AND cl::ValueRequired!"); - int Dummy; + int Dummy = 0; ErrorParsing |= ProvideOption(PGOpt, OneArgName, StringRef(), 0, 0, Dummy); @@ -778,10 +778,10 @@ void cl::ParseCommandLineOptions(int argc, char **argv, free(*i); } - DEBUG(errs() << "Args: "; + DEBUG(dbgs() << "Args: "; for (int i = 0; i < argc; ++i) - errs() << argv[i] << ' '; - errs() << '\n'; + dbgs() << argv[i] << ' '; + dbgs() << '\n'; ); // If we had an error processing our arguments, don't let the program execute diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index e427f82..ddf14e3 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -22,6 +22,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Instructions.h" using namespace llvm; @@ -655,7 +656,7 @@ void ConstantRange::print(raw_ostream &OS) const { /// dump - Allow printing from a debugger easily... /// void ConstantRange::dump() const { - print(errs()); + print(dbgs()); } diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index dff4f03..8bb1566 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Threading.h" @@ -62,11 +63,11 @@ void llvm_unreachable_internal(const char *msg, const char *file, // llvm_unreachable is intended to be used to indicate "impossible" // situations, and not legitimate runtime errors. if (msg) - errs() << msg << "\n"; - errs() << "UNREACHABLE executed"; + dbgs() << msg << "\n"; + dbgs() << "UNREACHABLE executed"; if (file) - errs() << " at " << file << ":" << line; - errs() << "!\n"; + dbgs() << " at " << file << ":" << line; + dbgs() << "!\n"; abort(); } } diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp index 70f2cfa..9ab3666 100644 --- a/lib/Support/FormattedStream.cpp +++ b/lib/Support/FormattedStream.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; @@ -91,3 +92,10 @@ formatted_raw_ostream &llvm::ferrs() { static formatted_raw_ostream S(errs()); return S; } + +/// fdbgs() - This returns a reference to a formatted_raw_ostream for +/// the debug stream. Use it like: fdbgs() << "foo" << "bar"; +formatted_raw_ostream &llvm::fdbgs() { + static formatted_raw_ostream S(dbgs()); + return S; +} diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 14f94bc..e787670 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Mutex.h" @@ -127,6 +128,6 @@ StatisticInfo::~StatisticInfo() { OutStream << '\n'; // Flush the output stream... OutStream.flush(); - if (&OutStream != &outs() && &OutStream != &errs()) + if (&OutStream != &outs() && &OutStream != &errs() && &OutStream != &dbgs()) delete &OutStream; // Close the file. } diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp index 1b233ab..785e0ec 100644 --- a/lib/Support/StringExtras.cpp +++ b/lib/Support/StringExtras.cpp @@ -11,50 +11,53 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallVector.h" -#include <cstring> +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" using namespace llvm; +/// StrInStrNoCase - Portable version of strcasestr. Locates the first +/// occurrence of string 's1' in string 's2', ignoring case. Returns +/// the offset of s2 in s1 or npos if s2 cannot be found. +StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) { + size_t N = s2.size(), M = s1.size(); + if (N > M) + return StringRef::npos; + for (size_t i = 0, e = M - N + 1; i != e; ++i) + if (s1.substr(i, N).equals_lower(s2)) + return i; + return StringRef::npos; +} + /// getToken - This function extracts one token from source, ignoring any /// leading characters that appear in the Delimiters string, and ending the /// token at any of the characters that appear in the Delimiters string. If /// there are no tokens in the source string, an empty string is returned. -/// The Source source string is updated in place to remove the returned string -/// and any delimiter prefix from it. -std::string llvm::getToken(std::string &Source, const char *Delimiters) { - size_t NumDelimiters = std::strlen(Delimiters); - +/// The function returns a pair containing the extracted token and the +/// remaining tail string. +std::pair<StringRef, StringRef> llvm::getToken(StringRef Source, + StringRef Delimiters) { // Figure out where the token starts. - std::string::size_type Start = - Source.find_first_not_of(Delimiters, 0, NumDelimiters); - if (Start == std::string::npos) Start = Source.size(); - - // Find the next occurance of the delimiter. - std::string::size_type End = - Source.find_first_of(Delimiters, Start, NumDelimiters); - if (End == std::string::npos) End = Source.size(); - - // Create the return token. - std::string Result = std::string(Source.begin()+Start, Source.begin()+End); + StringRef::size_type Start = Source.find_first_not_of(Delimiters); + if (Start == StringRef::npos) Start = Source.size(); - // Erase the token that we read in. - Source.erase(Source.begin(), Source.begin()+End); + // Find the next occurrence of the delimiter. + StringRef::size_type End = Source.find_first_of(Delimiters, Start); + if (End == StringRef::npos) End = Source.size(); - return Result; + return std::make_pair(Source.substr(Start, End), Source.substr(End)); } /// SplitString - Split up the specified string according to the specified /// delimiters, appending the result fragments to the output list. -void llvm::SplitString(const std::string &Source, - std::vector<std::string> &OutFragments, - const char *Delimiters) { - std::string S = Source; - - std::string S2 = getToken(S, Delimiters); +void llvm::SplitString(StringRef Source, + SmallVectorImpl<StringRef> &OutFragments, + StringRef Delimiters) { + StringRef S2, S; + tie(S2, S) = getToken(Source, Delimiters); while (!S2.empty()) { OutFragments.push_back(S2); - S2 = getToken(S, Delimiters); + tie(S2, S) = getToken(S, Delimiters); } } diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index e4a9984..ae2640b 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/SmallVector.h" + using namespace llvm; // MSVC emits references to this into the translation units which reference it. @@ -51,13 +51,18 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, size_type m = size(); size_type n = Other.size(); - SmallVector<unsigned, 32> previous(n+1, 0); - for (SmallVector<unsigned, 32>::size_type i = 0; i <= n; ++i) + const unsigned SmallBufferSize = 64; + unsigned SmallBuffer[SmallBufferSize]; + unsigned *Allocated = 0; + unsigned *previous = SmallBuffer; + if (2*(n + 1) > SmallBufferSize) + Allocated = previous = new unsigned [2*(n+1)]; + unsigned *current = previous + (n + 1); + + for (unsigned i = 0; i <= n; ++i) previous[i] = i; - SmallVector<unsigned, 32> current(n+1, 0); for (size_type y = 1; y <= m; ++y) { - current.assign(n+1, 0); current[0] = y; for (size_type x = 1; x <= n; ++x) { if (AllowReplacements) { @@ -69,10 +74,16 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, else current[x] = min(current[x-1], previous[x]) + 1; } } - current.swap(previous); + + unsigned *tmp = current; + current = previous; + previous = tmp; } - return previous[n]; + unsigned Result = previous[n]; + delete [] Allocated; + + return Result; } //===----------------------------------------------------------------------===// diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 7d32ee6..4bdfac2 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" @@ -373,7 +374,7 @@ void TimerGroup::removeTimer() { TimersToPrint.clear(); - if (OutStream != &errs() && OutStream != &outs()) + if (OutStream != &errs() && OutStream != &outs() && OutStream != &dbgs()) delete OutStream; // Close the file... } } diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp index 292c0c2..21504e9 100644 --- a/lib/Support/Twine.cpp +++ b/lib/Support/Twine.cpp @@ -9,13 +9,13 @@ #include "llvm/ADT/Twine.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; std::string Twine::str() const { SmallString<256> Vec; - toVector(Vec); - return std::string(Vec.begin(), Vec.end()); + return toStringRef(Vec).str(); } void Twine::toVector(SmallVectorImpl<char> &Out) const { @@ -23,6 +23,13 @@ void Twine::toVector(SmallVectorImpl<char> &Out) const { print(OS); } +StringRef Twine::toStringRef(SmallVectorImpl<char> &Out) const { + if (isSingleStringRef()) + return getSingleStringRef(); + toVector(Out); + return StringRef(Out.data(), Out.size()); +} + void Twine::printOneChild(raw_ostream &OS, const void *Ptr, NodeKind Kind) const { switch (Kind) { @@ -125,9 +132,9 @@ void Twine::printRepr(raw_ostream &OS) const { } void Twine::dump() const { - print(llvm::errs()); + print(llvm::dbgs()); } void Twine::dumpRepr() const { - printRepr(llvm::errs()); + printRepr(llvm::dbgs()); } diff --git a/lib/System/Win32/DynamicLibrary.inc b/lib/System/Win32/DynamicLibrary.inc index 10e64aa..c9a89e5 100644 --- a/lib/System/Win32/DynamicLibrary.inc +++ b/lib/System/Win32/DynamicLibrary.inc @@ -79,7 +79,7 @@ extern "C" { // Mingw32 uses msvcrt.dll by default. Don't ignore it. // Otherwise, user should be aware, what he's doing :) stricmp(ModuleName, "msvcrt") != 0 && -#endif +#endif stricmp(ModuleName, "msvcrt20") != 0 && stricmp(ModuleName, "msvcrt40") != 0) { OpenedHandles.push_back((HMODULE)ModuleBase); @@ -119,24 +119,24 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *filename, extern "C" { extern void *SYM; } #if defined(__MINGW32__) - EXPLICIT_SYMBOL_DEF(_alloca); - EXPLICIT_SYMBOL_DEF(__main); - EXPLICIT_SYMBOL_DEF(__ashldi3); - EXPLICIT_SYMBOL_DEF(__ashrdi3); - EXPLICIT_SYMBOL_DEF(__cmpdi2); - EXPLICIT_SYMBOL_DEF(__divdi3); - EXPLICIT_SYMBOL_DEF(__fixdfdi); - EXPLICIT_SYMBOL_DEF(__fixsfdi); - EXPLICIT_SYMBOL_DEF(__fixunsdfdi); - EXPLICIT_SYMBOL_DEF(__fixunssfdi); - EXPLICIT_SYMBOL_DEF(__floatdidf); - EXPLICIT_SYMBOL_DEF(__floatdisf); - EXPLICIT_SYMBOL_DEF(__lshrdi3); - EXPLICIT_SYMBOL_DEF(__moddi3); - EXPLICIT_SYMBOL_DEF(__udivdi3); - EXPLICIT_SYMBOL_DEF(__umoddi3); + EXPLICIT_SYMBOL_DEF(_alloca) + EXPLICIT_SYMBOL_DEF(__main) + EXPLICIT_SYMBOL_DEF(__ashldi3) + EXPLICIT_SYMBOL_DEF(__ashrdi3) + EXPLICIT_SYMBOL_DEF(__cmpdi2) + EXPLICIT_SYMBOL_DEF(__divdi3) + EXPLICIT_SYMBOL_DEF(__fixdfdi) + EXPLICIT_SYMBOL_DEF(__fixsfdi) + EXPLICIT_SYMBOL_DEF(__fixunsdfdi) + EXPLICIT_SYMBOL_DEF(__fixunssfdi) + EXPLICIT_SYMBOL_DEF(__floatdidf) + EXPLICIT_SYMBOL_DEF(__floatdisf) + EXPLICIT_SYMBOL_DEF(__lshrdi3) + EXPLICIT_SYMBOL_DEF(__moddi3) + EXPLICIT_SYMBOL_DEF(__udivdi3) + EXPLICIT_SYMBOL_DEF(__umoddi3) #elif defined(_MSC_VER) - EXPLICIT_SYMBOL_DEF(_alloca_probe); + EXPLICIT_SYMBOL_DEF(_alloca_probe) #endif #endif @@ -181,7 +181,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { EXPLICIT_SYMBOL2(alloca, _alloca); #undef EXPLICIT_SYMBOL #undef EXPLICIT_SYMBOL2 -#undef EXPLICIT_SYMBOL_DEF +#undef EXPLICIT_SYMBOL_DEF } #elif defined(_MSC_VER) { @@ -189,8 +189,8 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { EXPLICIT_SYMBOL2(_alloca, _alloca_probe); #undef EXPLICIT_SYMBOL #undef EXPLICIT_SYMBOL2 -#undef EXPLICIT_SYMBOL_DEF - } +#undef EXPLICIT_SYMBOL_DEF + } #endif return 0; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 7cfa097..969c4a4 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -938,6 +938,35 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, return false; } +/// Create a copy of a const pool value. Update CPI to the new index and return +/// the label UID. +static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { + MachineConstantPool *MCP = MF.getConstantPool(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; + assert(MCPE.isMachineConstantPoolEntry() && + "Expecting a machine constantpool entry!"); + ARMConstantPoolValue *ACPV = + static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); + + unsigned PCLabelId = AFI->createConstPoolEntryUId(); + ARMConstantPoolValue *NewCPV = 0; + if (ACPV->isGlobalValue()) + NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, + ARMCP::CPValue, 4); + else if (ACPV->isExtSymbol()) + NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(), + ACPV->getSymbol(), PCLabelId, 4); + else if (ACPV->isBlockAddress()) + NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, + ARMCP::CPBlockAddress, 4); + else + llvm_unreachable("Unexpected ARM constantpool value type!!"); + CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); + return PCLabelId; +} + void ARMBaseInstrInfo:: reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, @@ -960,28 +989,8 @@ reMaterialize(MachineBasicBlock &MBB, case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - MachineConstantPool *MCP = MF.getConstantPool(); unsigned CPI = Orig->getOperand(1).getIndex(); - const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; - assert(MCPE.isMachineConstantPoolEntry() && - "Expecting a machine constantpool entry!"); - ARMConstantPoolValue *ACPV = - static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); - unsigned PCLabelId = AFI->createConstPoolEntryUId(); - ARMConstantPoolValue *NewCPV = 0; - if (ACPV->isGlobalValue()) - NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, - ARMCP::CPValue, 4); - else if (ACPV->isExtSymbol()) - NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(), - ACPV->getSymbol(), PCLabelId, 4); - else if (ACPV->isBlockAddress()) - NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, - ARMCP::CPBlockAddress, 4); - else - llvm_unreachable("Unexpected ARM constantpool value type!!"); - CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); + unsigned PCLabelId = duplicateCPV(MF, CPI); MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), DestReg) .addConstantPoolIndex(CPI).addImm(PCLabelId); @@ -994,6 +1003,22 @@ reMaterialize(MachineBasicBlock &MBB, NewMI->getOperand(0).setSubReg(SubIdx); } +MachineInstr * +ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { + MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF); + switch(Orig->getOpcode()) { + case ARM::tLDRpci_pic: + case ARM::t2LDRpci_pic: { + unsigned CPI = Orig->getOperand(1).getIndex(); + unsigned PCLabelId = duplicateCPV(MF, CPI); + Orig->getOperand(1).setIndex(CPI); + Orig->getOperand(2).setImm(PCLabelId); + break; + } + } + return MI; +} + bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 78d9135..0d9d4a7 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -287,6 +287,8 @@ public: const MachineInstr *Orig, const TargetRegisterInfo *TRI) const; + MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; + virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other, const MachineRegisterInfo *MRI) const; }; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 7aebdf4..f1b6e1d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -217,7 +217,8 @@ ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses; } -BitVector ARMBaseRegisterInfo::getReservedRegs(const MachineFunction &MF) const { +BitVector ARMBaseRegisterInfo:: +getReservedRegs(const MachineFunction &MF) const { // FIXME: avoid re-calculating this everytime. BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); @@ -494,7 +495,8 @@ needsStackRealignment(const MachineFunction &MF) const { !MFI->hasVarSizedObjects()); } -bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { +bool ARMBaseRegisterInfo:: +cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); if (NoFramePointerElim && MFI->hasCalls()) return true; @@ -523,7 +525,7 @@ static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) { /// estimateRSStackSizeLimit - Look at each instruction that references stack /// frames and return the stack size limit beyond which some of these -/// instructions will require scratch register during their expansion later. +/// instructions will require a scratch register during their expansion later. unsigned ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { unsigned Limit = (1 << 12) - 1; @@ -547,6 +549,9 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { // When the stack offset is negative, we will end up using // the i8 instructions instead. return (1 << 8) - 1; + + if (AddrMode == ARMII::AddrMode6) + return 0; break; // At most one FI per instruction } } @@ -557,7 +562,7 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { void ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *RS) const { // This tells PEI to spill the FP as if it is any other callee-save register // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier @@ -852,7 +857,7 @@ int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { } unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, - const MachineFunction &MF) const { + const MachineFunction &MF) const { switch (Reg) { default: break; // Return 0 if either register of the pair is a special register. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index d63f3e6..14a45b3 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -64,53 +64,53 @@ public: return CurDAG->getTargetConstant(Imm, MVT::i32); } - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); virtual void InstructionSelect(); - bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A, + bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A, SDValue &B, SDValue &C); - bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode2Offset(SDValue Op, SDValue N, + bool SelectAddrMode2Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode3(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode3Offset(SDValue Op, SDValue N, + bool SelectAddrMode3Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode4(SDValue Op, SDValue N, SDValue &Addr, + bool SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Mode); - bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); - bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, + bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update, SDValue &Opc, SDValue &Align); - bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset, + bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Label); - bool SelectThumbAddrModeRR(SDValue Op, SDValue N, SDValue &Base, + bool SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); - bool SelectThumbAddrModeRI5(SDValue Op, SDValue N, unsigned Scale, + bool SelectThumbAddrModeRI5(SDNode *Op, SDValue N, unsigned Scale, SDValue &Base, SDValue &OffImm, SDValue &Offset); - bool SelectThumbAddrModeS1(SDValue Op, SDValue N, SDValue &Base, + bool SelectThumbAddrModeS1(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset); - bool SelectThumbAddrModeS2(SDValue Op, SDValue N, SDValue &Base, + bool SelectThumbAddrModeS2(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset); - bool SelectThumbAddrModeS4(SDValue Op, SDValue N, SDValue &Base, + bool SelectThumbAddrModeS4(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset); - bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base, + bool SelectThumbAddrModeSP(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm); - bool SelectT2ShifterOperandReg(SDValue Op, SDValue N, + bool SelectT2ShifterOperandReg(SDNode *Op, SDValue N, SDValue &BaseReg, SDValue &Opc); - bool SelectT2AddrModeImm12(SDValue Op, SDValue N, SDValue &Base, + bool SelectT2AddrModeImm12(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm); - bool SelectT2AddrModeImm8(SDValue Op, SDValue N, SDValue &Base, + bool SelectT2AddrModeImm8(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm); - bool SelectT2AddrModeImm8Offset(SDValue Op, SDValue N, + bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, SDValue &OffImm); - bool SelectT2AddrModeImm8s4(SDValue Op, SDValue N, SDValue &Base, + bool SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm); - bool SelectT2AddrModeSoReg(SDValue Op, SDValue N, SDValue &Base, + bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); // Include the pieces autogenerated from the target description. @@ -119,48 +119,48 @@ public: private: /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for /// ARM. - SDNode *SelectARMIndexedLoad(SDValue Op); - SDNode *SelectT2IndexedLoad(SDValue Op); + SDNode *SelectARMIndexedLoad(SDNode *N); + SDNode *SelectT2IndexedLoad(SDNode *N); /// SelectDYN_ALLOC - Select dynamic alloc for Thumb. - SDNode *SelectDYN_ALLOC(SDValue Op); + SDNode *SelectDYN_ALLOC(SDNode *N); /// SelectVLD - Select NEON load intrinsics. NumVecs should /// be 2, 3 or 4. The opcode arrays specify the instructions used for /// loads of D registers and even subregs and odd subregs of Q registers. /// For NumVecs == 2, QOpcodes1 is not used. - SDNode *SelectVLD(SDValue Op, unsigned NumVecs, unsigned *DOpcodes, + SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); /// SelectVST - Select NEON store intrinsics. NumVecs should /// be 2, 3 or 4. The opcode arrays specify the instructions used for /// stores of D registers and even subregs and odd subregs of Q registers. /// For NumVecs == 2, QOpcodes1 is not used. - SDNode *SelectVST(SDValue Op, unsigned NumVecs, unsigned *DOpcodes, + SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should /// be 2, 3 or 4. The opcode arrays specify the instructions used for /// load/store of D registers and even subregs and odd subregs of Q registers. - SDNode *SelectVLDSTLane(SDValue Op, bool IsLoad, unsigned NumVecs, + SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. - SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc); + SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Opc); /// SelectCMOVOp - Select CMOV instructions for ARM. - SDNode *SelectCMOVOp(SDValue Op); - SDNode *SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + SDNode *SelectCMOVOp(SDNode *N); + SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); - SDNode *SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); - SDNode *SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + SDNode *SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); - SDNode *SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + SDNode *SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); @@ -206,7 +206,7 @@ void ARMDAGToDAGISel::InstructionSelect() { CurDAG->RemoveDeadNodes(); } -bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op, +bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &BaseReg, SDValue &ShReg, @@ -230,7 +230,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op, return true; } -bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { if (N.getOpcode() == ISD::MUL) { @@ -340,9 +340,9 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc) { - unsigned Opcode = Op.getOpcode(); + unsigned Opcode = Op->getOpcode(); ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) ? cast<LoadSDNode>(Op)->getAddressingMode() : cast<StoreSDNode>(Op)->getAddressingMode(); @@ -379,7 +379,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDValue Op, SDValue N, } -bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { if (N.getOpcode() == ISD::SUB) { @@ -429,9 +429,9 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc) { - unsigned Opcode = Op.getOpcode(); + unsigned Opcode = Op->getOpcode(); ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) ? cast<LoadSDNode>(Op)->getAddressingMode() : cast<StoreSDNode>(Op)->getAddressingMode(); @@ -451,14 +451,14 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrMode4(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Mode) { Addr = N; Mode = CurDAG->getTargetConstant(0, MVT::i32); return true; } -bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset) { if (N.getOpcode() != ISD::ADD) { Base = N; @@ -506,7 +506,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update, SDValue &Opc, SDValue &Align) { Addr = N; @@ -518,7 +518,7 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Label) { if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { Offset = N.getOperand(0); @@ -530,10 +530,10 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset){ // FIXME dl should come from the parent load or store, not the address - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = Op->getDebugLoc(); if (N.getOpcode() != ISD::ADD) { ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); if (!NC || NC->getZExtValue() != 0) @@ -549,7 +549,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N, } bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N, +ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDNode *Op, SDValue N, unsigned Scale, SDValue &Base, SDValue &OffImm, SDValue &Offset) { if (Scale == 4) { @@ -605,25 +605,25 @@ ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset) { return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset); } -bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset) { return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset); } -bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm, SDValue &Offset) { return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset); } -bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); @@ -659,7 +659,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N, SDValue &BaseReg, SDValue &Opc) { ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); @@ -679,7 +679,7 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm) { // Match simple R + imm12 operands. @@ -729,7 +729,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm) { // Match simple R - imm8 operands. if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::SUB) { @@ -753,9 +753,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, SDValue &OffImm){ - unsigned Opcode = Op.getOpcode(); + unsigned Opcode = Op->getOpcode(); ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) ? cast<LoadSDNode>(Op)->getAddressingMode() : cast<StoreSDNode>(Op)->getAddressingMode(); @@ -772,7 +772,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::ADD) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { @@ -798,7 +798,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm) { // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. @@ -854,8 +854,8 @@ static inline SDValue getAL(SelectionDAG *CurDAG) { return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32); } -SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) { - LoadSDNode *LD = cast<LoadSDNode>(Op); +SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) return NULL; @@ -866,23 +866,23 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) { unsigned Opcode = 0; bool Match = false; if (LoadedVT == MVT::i32 && - SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) { Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST; Match = true; } else if (LoadedVT == MVT::i16 && - SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { Match = true; Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { if (LD->getExtensionType() == ISD::SEXTLOAD) { - if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { Match = true; Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; } } else { - if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + if (SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) { Match = true; Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST; } @@ -894,15 +894,15 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32, + return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, MVT::Other, Ops, 6); } return NULL; } -SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) { - LoadSDNode *LD = cast<LoadSDNode>(Op); +SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) return NULL; @@ -913,7 +913,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) { bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); unsigned Opcode = 0; bool Match = false; - if (SelectT2AddrModeImm8Offset(Op, LD->getOffset(), Offset)) { + if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { switch (LoadedVT.getSimpleVT().SimpleTy) { case MVT::i32: Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; @@ -942,20 +942,19 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32, + return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, MVT::Other, Ops, 5); } return NULL; } -SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - EVT VT = Op.getValueType(); - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - SDValue Align = Op.getOperand(2); + EVT VT = N->getValueType(0); + SDValue Chain = N->getOperand(0); + SDValue Size = N->getOperand(1); + SDValue Align = N->getOperand(2); SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32); int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue(); if (AlignVal < 0) @@ -1030,15 +1029,14 @@ static EVT GetNEONSubregVT(EVT VT) { } } -SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, +SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range"); - SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1124,15 +1122,14 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, return NULL; } -SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, +SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range"); - SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1225,16 +1222,15 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, return NULL; } -SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, +SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); - SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1324,38 +1320,38 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, return NULL; } -SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op, +SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Opc) { if (!Subtarget->hasV6T2Ops()) return NULL; unsigned Shl_imm = 0; - if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)) { + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); unsigned Srl_imm = 0; - if (isInt32Immediate(Op.getOperand(1), Srl_imm)) { + if (isInt32Immediate(N->getOperand(1), Srl_imm)) { assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); unsigned Width = 32 - Srl_imm; int LSB = Srl_imm - Shl_imm; if (LSB < 0) return NULL; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { Op.getOperand(0).getOperand(0), + SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), getAL(CurDAG), Reg0 }; - return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); } } return NULL; } SDNode *ARMDAGToDAGISel:: -SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, +SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { SDValue CPTmp0; SDValue CPTmp1; - if (SelectT2ShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1)) { + if (SelectT2ShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1)) { unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue(); unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); unsigned Opc = 0; @@ -1372,27 +1368,27 @@ SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6); } return 0; } SDNode *ARMDAGToDAGISel:: -SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, +SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { SDValue CPTmp0; SDValue CPTmp1; SDValue CPTmp2; - if (SelectShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1, CPTmp2)) { + if (SelectShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1, CPTmp2)) { SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7); } return 0; } SDNode *ARMDAGToDAGISel:: -SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, +SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); if (!T) @@ -1402,14 +1398,14 @@ SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), + return CurDAG->SelectNodeTo(N, ARM::t2MOVCCi, MVT::i32, Ops, 5); } return 0; } SDNode *ARMDAGToDAGISel:: -SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, +SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); if (!T) @@ -1419,19 +1415,19 @@ SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), + return CurDAG->SelectNodeTo(N, ARM::MOVCCi, MVT::i32, Ops, 5); } return 0; } -SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) { - EVT VT = Op.getValueType(); - SDValue FalseVal = Op.getOperand(0); - SDValue TrueVal = Op.getOperand(1); - SDValue CC = Op.getOperand(2); - SDValue CCR = Op.getOperand(3); - SDValue InFlag = Op.getOperand(4); +SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue FalseVal = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + SDValue CC = N->getOperand(2); + SDValue CCR = N->getOperand(3); + SDValue InFlag = N->getOperand(4); assert(CC.getOpcode() == ISD::Constant); assert(CCR.getOpcode() == ISD::Register); ARMCC::CondCodes CCVal = @@ -1445,18 +1441,18 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) { SDValue CPTmp1; SDValue CPTmp2; if (Subtarget->isThumb()) { - SDNode *Res = SelectT2CMOVShiftOp(Op, FalseVal, TrueVal, + SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); if (!Res) - Res = SelectT2CMOVShiftOp(Op, TrueVal, FalseVal, + Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal, ARMCC::getOppositeCondition(CCVal), CCR, InFlag); if (Res) return Res; } else { - SDNode *Res = SelectARMCMOVShiftOp(Op, FalseVal, TrueVal, + SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); if (!Res) - Res = SelectARMCMOVShiftOp(Op, TrueVal, FalseVal, + Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal, ARMCC::getOppositeCondition(CCVal), CCR, InFlag); if (Res) return Res; @@ -1469,18 +1465,18 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) { // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) // Pattern complexity = 10 cost = 1 size = 0 if (Subtarget->isThumb()) { - SDNode *Res = SelectT2CMOVSoImmOp(Op, FalseVal, TrueVal, + SDNode *Res = SelectT2CMOVSoImmOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); if (!Res) - Res = SelectT2CMOVSoImmOp(Op, TrueVal, FalseVal, + Res = SelectT2CMOVSoImmOp(N, TrueVal, FalseVal, ARMCC::getOppositeCondition(CCVal), CCR, InFlag); if (Res) return Res; } else { - SDNode *Res = SelectARMCMOVSoImmOp(Op, FalseVal, TrueVal, + SDNode *Res = SelectARMCMOVSoImmOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); if (!Res) - Res = SelectARMCMOVSoImmOp(Op, TrueVal, FalseVal, + Res = SelectARMCMOVSoImmOp(N, TrueVal, FalseVal, ARMCC::getOppositeCondition(CCVal), CCR, InFlag); if (Res) return Res; @@ -1514,11 +1510,10 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) { Opc = ARM::VMOVDcc; break; } - return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } -SDNode *ARMDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *ARMDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); if (N->isMachineOpcode()) @@ -1569,7 +1564,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, Ops, 6); } - ReplaceUses(Op, SDValue(ResNode, 0)); + ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); return NULL; } @@ -1593,28 +1588,28 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } } case ARMISD::DYN_ALLOC: - return SelectDYN_ALLOC(Op); + return SelectDYN_ALLOC(N); case ISD::SRL: - if (SDNode *I = SelectV6T2BitfieldExtractOp(Op, + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX)) return I; break; case ISD::SRA: - if (SDNode *I = SelectV6T2BitfieldExtractOp(Op, + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)) return I; break; case ISD::MUL: if (Subtarget->isThumb1Only()) break; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { unsigned RHSV = C->getZExtValue(); if (!RHSV) break; if (isPowerOf2_32(RHSV-1)) { // 2^n+1? unsigned ShImm = Log2_32(RHSV-1); if (ShImm >= 32) break; - SDValue V = Op.getOperand(0); + SDValue V = N->getOperand(0); ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); @@ -1630,7 +1625,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { unsigned ShImm = Log2_32(RHSV+1); if (ShImm >= 32) break; - SDValue V = Op.getOperand(0); + SDValue V = N->getOperand(0); ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); @@ -1650,7 +1645,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { // are entirely contributed by c2 and lower 16-bits are entirely contributed // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). // Select it to: "movt x, ((c1 & 0xffff) >> 16) - EVT VT = Op.getValueType(); + EVT VT = N->getValueType(0); if (VT != MVT::i32) break; unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) @@ -1658,7 +1653,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); if (!Opc) break; - SDValue N0 = Op.getOperand(0), N1 = Op.getOperand(1); + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (!N1C) break; @@ -1683,18 +1678,18 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } case ARMISD::VMOVRRD: return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, - Op.getOperand(0), getAL(CurDAG), + N->getOperand(0), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32)); case ISD::UMUL_LOHI: { if (Subtarget->isThumb1Only()) break; if (Subtarget->isThumb()) { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4); } else { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5); @@ -1704,11 +1699,11 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { if (Subtarget->isThumb1Only()) break; if (Subtarget->isThumb()) { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4); } else { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5); @@ -1717,9 +1712,9 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { case ISD::LOAD: { SDNode *ResNode = 0; if (Subtarget->isThumb() && Subtarget->hasThumb2()) - ResNode = SelectT2IndexedLoad(Op); + ResNode = SelectT2IndexedLoad(N); else - ResNode = SelectARMIndexedLoad(Op); + ResNode = SelectARMIndexedLoad(N); if (ResNode) return ResNode; // Other cases are autogenerated. @@ -1740,11 +1735,11 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { unsigned Opc = Subtarget->isThumb() ? ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; - SDValue Chain = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); - SDValue N3 = Op.getOperand(3); - SDValue InFlag = Op.getOperand(4); + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + SDValue InFlag = N->getOperand(4); assert(N1.getOpcode() == ISD::BasicBlock); assert(N2.getOpcode() == ISD::Constant); assert(N3.getOpcode() == ISD::Register); @@ -1756,23 +1751,23 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, MVT::Flag, Ops, 5); Chain = SDValue(ResNode, 0); - if (Op.getNode()->getNumValues() == 2) { + if (N->getNumValues() == 2) { InFlag = SDValue(ResNode, 1); - ReplaceUses(SDValue(Op.getNode(), 1), InFlag); + ReplaceUses(SDValue(N, 1), InFlag); } - ReplaceUses(SDValue(Op.getNode(), 0), + ReplaceUses(SDValue(N, 0), SDValue(Chain.getNode(), Chain.getResNo())); return NULL; } case ARMISD::CMOV: - return SelectCMOVOp(Op); + return SelectCMOVOp(N); case ARMISD::CNEG: { - EVT VT = Op.getValueType(); - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); - SDValue N3 = Op.getOperand(3); - SDValue InFlag = Op.getOperand(4); + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + SDValue InFlag = N->getOperand(4); assert(N2.getOpcode() == ISD::Constant); assert(N3.getOpcode() == ISD::Register); @@ -1791,7 +1786,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { Opc = ARM::VNEGDcc; break; } - return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } case ARMISD::VZIP: { @@ -1863,7 +1858,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, ARM::VLD2d32, ARM::VLD2d64 }; unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 }; - return SelectVLD(Op, 2, DOpcodes, QOpcodes, 0); + return SelectVLD(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld3: { @@ -1871,7 +1866,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { ARM::VLD3d32, ARM::VLD3d64 }; unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a }; unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b }; - return SelectVLD(Op, 3, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4: { @@ -1879,35 +1874,35 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { ARM::VLD4d32, ARM::VLD4d64 }; unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a }; unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b }; - return SelectVLD(Op, 4, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld2lane: { unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 }; unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a }; unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b }; - return SelectVLDSTLane(Op, true, 2, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld3lane: { unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 }; unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a }; unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b }; - return SelectVLDSTLane(Op, true, 3, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4lane: { unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 }; unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a }; unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b }; - return SelectVLDSTLane(Op, true, 4, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst2: { unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, ARM::VST2d32, ARM::VST2d64 }; unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 }; - return SelectVST(Op, 2, DOpcodes, QOpcodes, 0); + return SelectVST(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst3: { @@ -1915,7 +1910,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { ARM::VST3d32, ARM::VST3d64 }; unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a }; unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b }; - return SelectVST(Op, 3, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4: { @@ -1923,34 +1918,34 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { ARM::VST4d32, ARM::VST4d64 }; unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a }; unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b }; - return SelectVST(Op, 4, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst2lane: { unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 }; unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a }; unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b }; - return SelectVLDSTLane(Op, false, 2, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst3lane: { unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 }; unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a }; unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b }; - return SelectVLDSTLane(Op, false, 3, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4lane: { unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 }; unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a }; unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b }; - return SelectVLDSTLane(Op, false, 4, DOpcodes, QOpcodes0, QOpcodes1); + return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } } } } - return SelectCode(Op); + return SelectCode(N); } bool ARMDAGToDAGISel:: diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 334baae..7b62c00 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3130,6 +3130,9 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // exitMBB: // ... BB = exitMBB; + + MF->DeleteMachineInstr(MI); // The instruction is gone now. + return BB; } @@ -3140,7 +3143,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *F = BB->getParent(); + MachineFunction *MF = BB->getParent(); MachineFunction::iterator It = BB; ++It; @@ -3155,7 +3158,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); case 1: ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; + strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; break; case 2: ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; @@ -3167,13 +3170,13 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, break; } - MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, loopMBB); - F->insert(It, exitMBB); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); exitMBB->transferSuccessors(BB); - MachineRegisterInfo &RegInfo = F->getRegInfo(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); unsigned scratch2 = (!BinOpcode) ? incr : RegInfo.createVirtualRegister(ARM::GPRRegisterClass); @@ -3216,7 +3219,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // ... BB = exitMBB; - F->DeleteMachineInstr(MI); // The instruction is gone now. + MF->DeleteMachineInstr(MI); // The instruction is gone now. return BB; } @@ -4258,10 +4261,10 @@ std::pair<unsigned, const TargetRegisterClass*> ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { - // GCC RS6000 Constraint Letters + // GCC ARM Constraint Letters switch (Constraint[0]) { case 'l': - if (Subtarget->isThumb1Only()) + if (Subtarget->isThumb()) return std::make_pair(0U, ARM::tGPRRegisterClass); else return std::make_pair(0U, ARM::GPRRegisterClass); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index da8b373..f67e74a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -127,8 +127,8 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">; def IsARM : Predicate<"!Subtarget->isThumb()">; def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; -def CarryDefIsUnused : Predicate<"!N.getNode()->hasAnyUseOfValue(1)">; -def CarryDefIsUsed : Predicate<"N.getNode()->hasAnyUseOfValue(1)">; +def CarryDefIsUnused : Predicate<"!N->hasAnyUseOfValue(1)">; +def CarryDefIsUsed : Predicate<"N->hasAnyUseOfValue(1)">; // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt()">; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 34d7d8f..603ccf5 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -113,7 +113,7 @@ def t_addrmode_s1 : Operand<i32>, def t_addrmode_sp : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> { let PrintMethod = "printThumbAddrModeSPOperand"; - let MIOperandInfo = (ops JustSP:$base, i32imm:$offsimm); + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } //===----------------------------------------------------------------------===// @@ -208,9 +208,8 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def tBRIND : TI<(outs), (ins GPR:$dst), IIC_Br, "mov\tpc, $dst", [(brind GPR:$dst)]>, - T1Special<{1,0,?,?}> { - // <Rd> = pc - let Inst{7} = 1; + T1Special<{1,0,1,1}> { + // <Rd> = Inst{7:2-0} = pc let Inst{2-0} = 0b111; } } @@ -342,16 +341,28 @@ def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, "ldr", "\t$dst, $addr", [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>, T1LdSt<0b100>; +def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, + "ldr", "\t$dst, $addr", + []>, + T1LdSt4Imm<{1,?,?}>; def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr, "ldrb", "\t$dst, $addr", [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>, T1LdSt<0b110>; +def tLDRBi: T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr, + "ldrb", "\t$dst, $addr", + []>, + T1LdSt1Imm<{1,?,?}>; def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr, "ldrh", "\t$dst, $addr", [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>, T1LdSt<0b101>; +def tLDRHi: T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr, + "ldrh", "\t$dst, $addr", + []>, + T1LdSt2Imm<{1,?,?}>; let AddedComplexity = 10 in def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, @@ -397,16 +408,28 @@ def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer, "str", "\t$src, $addr", [(store tGPR:$src, t_addrmode_s4:$addr)]>, T1LdSt<0b000>; +def tSTRi: T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer, + "str", "\t$src, $addr", + []>, + T1LdSt4Imm<{0,?,?}>; def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer, "strb", "\t$src, $addr", [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>, T1LdSt<0b010>; +def tSTRBi: T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer, + "strb", "\t$src, $addr", + []>, + T1LdSt1Imm<{0,?,?}>; def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer, "strh", "\t$src, $addr", [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>, T1LdSt<0b001>; +def tSTRHi: T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer, + "strh", "\t$src, $addr", + []>, + T1LdSt2Imm<{0,?,?}>; def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, "str", "\t$src, $addr", @@ -748,7 +771,7 @@ let usesCustomInserter = 1 in // Expanded after instruction selection. // 16-bit movcc in IT blocks for Thumb2. def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, "mov", "\t$dst, $rhs", []>, - T1Special<{1,0,?,?}>; + T1Special<{1,0,1,1}>; def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iCMOVi, "mov", "\t$dst, $rhs", []>, diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 6f20ed4..769df7e 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -360,8 +360,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { let Inst{31-27} = 0b11101; - let Inst{24} = 1; let Inst{26-25} = 0b01; + let Inst{24} = 1; let Inst{23-21} = op23_21; let Inst{20} = 0; // The S bit. } diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index b13f98a..b78b95b 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -740,6 +740,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, /// isMemoryOp - Returns true if instruction is a memory operations (that this /// pass is capable of operating on). static bool isMemoryOp(const MachineInstr *MI) { + if (MI->hasOneMemOperand()) { + const MachineMemOperand *MMO = *MI->memoperands_begin(); + + // Don't touch volatile memory accesses - we may be changing their order. + if (MMO->isVolatile()) + return false; + + // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is not. + if (MMO->getAlignment() < 4) + return false; + } + int Opcode = MI->getOpcode(); switch (Opcode) { default: break; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 9fbde81..d393e8d 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -367,19 +367,6 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; -// Just the stack pointer (for tSTRspi and friends). -def JustSP : RegisterClass<"ARM", [i32], 32, [SP]> { - let MethodProtos = [{ - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - JustSPClass::iterator - JustSPClass::allocation_order_end(const MachineFunction &MF) const { - return allocation_order_begin(MF); - } - }]; -} - //===----------------------------------------------------------------------===// // Subregister Set Definitions... now that we have all of the pieces, define the // sub registers for each register. diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ed4667b..132738e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmLexer.h" #include "llvm/MC/MCAsmParser.h" +#include "llvm/MC/MCParsedAsmOperand.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -78,7 +79,7 @@ private: /// @name Auto-generated Match Functions /// { - bool MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, + bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst); /// MatchRegisterName - Match the given string to a register name and return @@ -94,14 +95,15 @@ public: ARMAsmParser(const Target &T, MCAsmParser &_Parser) : TargetAsmParser(T), Parser(_Parser) {} - virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst); + virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); }; /// ARMOperand - Instances of this class represent a parsed ARM machine /// instruction. -struct ARMOperand { +struct ARMOperand : public MCParsedAsmOperand { enum { Token, Register, @@ -515,9 +517,10 @@ int ARMAsmParser::MatchRegisterName(const StringRef &Name) { } /// A hack to allow some testing, to be replaced by a real table gen version. -bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, - MCInst &Inst) { - struct ARMOperand Op0 = Operands[0]; +bool ARMAsmParser:: +MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCInst &Inst) { + ARMOperand &Op0 = *(ARMOperand*)Operands[0]; assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); const StringRef &Mnemonic = Op0.getToken(); if (Mnemonic == "add" || @@ -578,33 +581,27 @@ bool ARMAsmParser::ParseOperand(ARMOperand &Op) { } /// Parse an arm instruction mnemonic followed by its operands. -bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { - SmallVector<ARMOperand, 7> Operands; - - Operands.push_back(ARMOperand::CreateToken(Name)); +bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + Operands.push_back(new ARMOperand(ARMOperand::CreateToken(Name))); SMLoc Loc = getLexer().getTok().getLoc(); if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. - Operands.push_back(ARMOperand()); - if (ParseOperand(Operands.back())) - return true; + ARMOperand Op; + if (ParseOperand(Op)) return true; + Operands.push_back(new ARMOperand(Op)); while (getLexer().is(AsmToken::Comma)) { getLexer().Lex(); // Eat the comma. // Parse and remember the operand. - Operands.push_back(ARMOperand()); - if (ParseOperand(Operands.back())) - return true; + if (ParseOperand(Op)) return true; + Operands.push_back(new ARMOperand(Op)); } } - if (!MatchInstruction(Operands, Inst)) - return false; - - Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented"); - return true; + return false; } /// ParseDirective parses the arm specific directives diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 931d8df..2d13533 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -175,16 +175,16 @@ namespace { printDataDirective(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); - std::string Name; + SmallString<128> TmpNameStr; if (ACPV->isLSDA()) { - SmallString<16> LSDAName; - raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() << + raw_svector_ostream(TmpNameStr) << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); - Name = LSDAName.str(); + O << TmpNameStr.str(); } else if (ACPV->isBlockAddress()) { - Name = GetBlockAddressSymbol(ACPV->getBlockAddress())->getName(); + O << GetBlockAddressSymbol(ACPV->getBlockAddress())->getName(); } else if (ACPV->isGlobalValue()) { + std::string Name; GlobalValue *GV = ACPV->getGV(); bool isIndirect = Subtarget->isTargetDarwin() && Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); @@ -201,16 +201,16 @@ namespace { GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(Sym) : MMIMachO.getGVStubEntry(Sym); if (StubSym == 0) { - SmallString<128> NameStr; - Mang->getNameWithPrefix(NameStr, GV, false); - StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + Mang->getNameWithPrefix(TmpNameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(TmpNameStr.str()); } } + O << Name; } else { assert(ACPV->isExtSymbol() && "unrecognized constant pool value"); - Name = Mang->makeNameProper(ACPV->getSymbol()); + Mang->getNameWithPrefix(TmpNameStr, ACPV->getSymbol()); + OutContext.GetOrCreateSymbol(TmpNameStr.str())->print(O, MAI); } - O << Name; if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")"; if (ACPV->getPCAdjustment() != 0) { @@ -392,9 +392,10 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, } case MachineOperand::MO_ExternalSymbol: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); - std::string Name = Mang->makeNameProper(MO.getSymbolName()); - - O << Name; + SmallString<128> NameStr; + Mang->getNameWithPrefix(NameStr, MO.getSymbolName()); + OutContext.GetOrCreateSymbol(NameStr.str())->print(O, MAI); + if (isCallOp && Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_) O << "(PLT)"; diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index 5b0a89d..eaefef9 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -157,7 +157,7 @@ namespace { // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. @@ -202,7 +202,7 @@ private: SDNode *getGlobalBaseReg(); SDNode *getGlobalRetAddr(); - void SelectCALL(SDValue Op); + void SelectCALL(SDNode *Op); }; } @@ -232,8 +232,7 @@ void AlphaDAGToDAGISel::InstructionSelect() { // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. -SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *AlphaDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) { return NULL; // Already selected. } @@ -242,7 +241,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { switch (N->getOpcode()) { default: break; case AlphaISD::CALL: - SelectCALL(Op); + SelectCALL(N); return NULL; case ISD::FrameIndex: { @@ -258,9 +257,9 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { case AlphaISD::DivCall: { SDValue Chain = CurDAG->getEntryNode(); - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1, SDValue(0,0)); Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2, @@ -287,7 +286,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { if (uval == 0) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, Alpha::R31, MVT::i64); - ReplaceUses(Op, Result); + ReplaceUses(SDValue(N, 0), Result); return NULL; } @@ -415,13 +414,12 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { } - return SelectCode(Op); + return SelectCode(N); } -void AlphaDAGToDAGISel::SelectCALL(SDValue Op) { +void AlphaDAGToDAGISel::SelectCALL(SDNode *N) { //TODO: add flag stuff to prevent nondeturministic breakage! - SDNode *N = Op.getNode(); SDValue Chain = N->getOperand(0); SDValue Addr = N->getOperand(1); SDValue InFlag = N->getOperand(N->getNumOperands() - 1); @@ -442,8 +440,8 @@ void AlphaDAGToDAGISel::SelectCALL(SDValue Op) { } InFlag = Chain.getValue(1); - ReplaceUses(Op.getValue(0), Chain); - ReplaceUses(Op.getValue(1), InFlag); + ReplaceUses(SDValue(N, 0), Chain); + ReplaceUses(SDValue(N, 1), InFlag); } diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp index 917f7f5..0bd94d4 100644 --- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp +++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp @@ -25,12 +25,14 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; @@ -179,9 +181,12 @@ void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { O << Mang->getMangledName(MO.getGlobal()); printOffset(MO.getOffset()); break; - case MachineOperand::MO_ExternalSymbol: - O << Mang->makeNameProper(MO.getSymbolName()); + case MachineOperand::MO_ExternalSymbol: { + SmallString<60> NameStr; + Mang->getNameWithPrefix(NameStr, MO.getSymbolName()); + OutContext.GetOrCreateSymbol(NameStr.str())->print(O, MAI); break; + } case MachineOperand::MO_ConstantPoolIndex: O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getIndex(); diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index 2217af4..e1b6008 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -51,8 +51,8 @@ namespace { #include "BlackfinGenDAGISel.inc" private: - SDNode *Select(SDValue Op); - bool SelectADDRspii(SDValue Op, SDValue Addr, + SDNode *Select(SDNode *N); + bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); // Walk the DAG after instruction selection, fixing register class issues. @@ -82,8 +82,7 @@ void BlackfinDAGToDAGISel::InstructionSelect() { FixRegisterClasses(*CurDAG); } -SDNode *BlackfinDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) return NULL; // Already selected. @@ -99,10 +98,10 @@ SDNode *BlackfinDAGToDAGISel::Select(SDValue Op) { } } - return SelectCode(Op); + return SelectCode(N); } -bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Op, +bool BlackfinDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset) { diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 1ab3c0a..0fd975c 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -25,6 +25,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/InlineAsm.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ConstantsScanner.h" #include "llvm/Analysis/FindUsedTypes.h" @@ -34,6 +35,7 @@ #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/Transforms/Scalar.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CallSite.h" @@ -341,6 +343,15 @@ namespace { char CWriter::ID = 0; + +static std::string Mangle(const std::string &S) { + std::string Result; + raw_string_ostream OS(Result); + MCSymbol::printMangledName(S, OS, 0); + return OS.str(); +} + + /// This method inserts names for any unnamed structure types that are used by /// the program, and removes names from structure types that are not used by the /// program. @@ -1431,8 +1442,11 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { std::string CWriter::GetValueName(const Value *Operand) { // Mangle globals with the standard mangler interface for LLC compatibility. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) - return Mang->getMangledName(GV); + if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) { + SmallString<128> Str; + Mang->getNameWithPrefix(Str, GV, false); + return Mangle(Str.str().str()); + } std::string Name = Operand->getName(); @@ -1857,7 +1871,6 @@ bool CWriter::doInitialization(Module &M) { // Ensure that all structure types have names... Mang = new Mangler(M); - Mang->markCharUnacceptable('.'); // Keep track of which functions are static ctors/dtors so they can have // an attribute added to their prototypes. @@ -2210,7 +2223,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) { // Print out forward declarations for structure types before anything else! Out << "/* Structure forward decls */\n"; for (; I != End; ++I) { - std::string Name = "struct l_" + Mang->makeNameProper(I->first); + std::string Name = "struct " + Mangle("l_"+I->first); Out << Name << ";\n"; TypeNames.insert(std::make_pair(I->second, Name)); } @@ -2221,7 +2234,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) { // for struct or opaque types. Out << "/* Typedefs */\n"; for (I = TST.begin(); I != End; ++I) { - std::string Name = "l_" + Mang->makeNameProper(I->first); + std::string Name = Mangle("l_"+I->first); Out << "typedef "; printType(Out, I->second, false, Name); Out << ";\n"; @@ -2921,7 +2934,6 @@ void CWriter::lowerIntrinsics(Function &F) { case Intrinsic::setjmp: case Intrinsic::longjmp: case Intrinsic::prefetch: - case Intrinsic::dbg_stoppoint: case Intrinsic::powi: case Intrinsic::x86_sse_cmp_ss: case Intrinsic::x86_sse_cmp_ps: @@ -3178,20 +3190,6 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << "0; *((void**)&" << GetValueName(&I) << ") = __builtin_stack_save()"; return true; - case Intrinsic::dbg_stoppoint: { - // If we use writeOperand directly we get a "u" suffix which is rejected - // by gcc. - DbgStopPointInst &SPI = cast<DbgStopPointInst>(I); - std::string dir; - GetConstantStringInfo(SPI.getDirectory(), dir); - std::string file; - GetConstantStringInfo(SPI.getFileName(), file); - Out << "\n#line " - << SPI.getLine() - << " \"" - << dir << '/' << file << "\"\n"; - return true; - } case Intrinsic::x86_sse_cmp_ss: case Intrinsic::x86_sse_cmp_ps: case Intrinsic::x86_sse2_cmp_sd: diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index c69a751..80693e1 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -277,10 +277,9 @@ namespace { return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); } - SDNode *emitBuildVector(SDValue build_vec) { - EVT vecVT = build_vec.getValueType(); + SDNode *emitBuildVector(SDNode *bvNode) { + EVT vecVT = bvNode->getValueType(0); EVT eltVT = vecVT.getVectorElementType(); - SDNode *bvNode = build_vec.getNode(); DebugLoc dl = bvNode->getDebugLoc(); // Check to see if this vector can be represented as a CellSPU immediate @@ -296,13 +295,13 @@ namespace { ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) - return Select(build_vec); + return Select(bvNode); // No, need to emit a constant pool spill: std::vector<Constant*> CV; - for (size_t i = 0; i < build_vec.getNumOperands(); ++i) { - ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i)); + for (size_t i = 0; i < bvNode->getNumOperands(); ++i) { + ConstantSDNode *V = dyn_cast<ConstantSDNode > (bvNode->getOperand(i)); CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue())); } @@ -312,49 +311,49 @@ namespace { SDValue CGPoolOffset = SPU::LowerConstantPool(CPIdx, *CurDAG, SPUtli.getSPUTargetMachine()); - return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl, + return SelectCode(CurDAG->getLoad(vecVT, dl, CurDAG->getEntryNode(), CGPoolOffset, PseudoSourceValue::getConstantPool(), 0, - false, Alignment)); + false, Alignment).getNode()); } /// Select - Convert the specified operand from a target-independent to a /// target-specific node if it hasn't already been changed. - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); //! Emit the instruction sequence for i64 shl - SDNode *SelectSHLi64(SDValue &Op, EVT OpVT); + SDNode *SelectSHLi64(SDNode *N, EVT OpVT); //! Emit the instruction sequence for i64 srl - SDNode *SelectSRLi64(SDValue &Op, EVT OpVT); + SDNode *SelectSRLi64(SDNode *N, EVT OpVT); //! Emit the instruction sequence for i64 sra - SDNode *SelectSRAi64(SDValue &Op, EVT OpVT); + SDNode *SelectSRAi64(SDNode *N, EVT OpVT); //! Emit the necessary sequence for loading i64 constants: - SDNode *SelectI64Constant(SDValue &Op, EVT OpVT, DebugLoc dl); + SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl); //! Alternate instruction emit sequence for loading i64 constants SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl); //! Returns true if the address N is an A-form (local store) address - bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index); //! D-form address predicate - bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index); /// Alternate D-form address using i7 offset predicate - bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, + bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base); /// D-form address selection workhorse - bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp, + bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base, int minOffset, int maxOffset); //! Address predicate if N can be expressed as an indexed [r+r] operation. - bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for @@ -366,13 +365,13 @@ namespace { switch (ConstraintCode) { default: return true; case 'm': // memory - if (!SelectDFormAddr(Op, Op, Op0, Op1) - && !SelectAFormAddr(Op, Op, Op0, Op1)) - SelectXFormAddr(Op, Op, Op0, Op1); + if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1) + && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) + SelectXFormAddr(Op.getNode(), Op, Op0, Op1); break; case 'o': // offsetable - if (!SelectDFormAddr(Op, Op, Op0, Op1) - && !SelectAFormAddr(Op, Op, Op0, Op1)) { + if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1) + && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) { Op0 = Op; Op1 = getSmallIPtrImm(0); } @@ -429,7 +428,7 @@ SPUDAGToDAGISel::InstructionSelect() \arg Index The base address index */ bool -SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, +SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { // These match the addr256k operand type: EVT OffsVT = MVT::i16; @@ -479,7 +478,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, } bool -SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, +SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) { const int minDForm2Offset = -(1 << 7); const int maxDForm2Offset = (1 << 7) - 1; @@ -500,7 +499,7 @@ SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp, to non-empty SDValue instances. */ bool -SPUDAGToDAGISel::SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, +SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { return DFormAddressPredicate(Op, N, Base, Index, SPUFrameInfo::minFrameOffset(), @@ -508,7 +507,7 @@ SPUDAGToDAGISel::SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, } bool -SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, +SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index, int minOffset, int maxOffset) { unsigned Opc = N.getOpcode(); @@ -618,7 +617,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, Index = N; return true; } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) { - unsigned OpOpc = Op.getOpcode(); + unsigned OpOpc = Op->getOpcode(); if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { // Direct load/store without getelementptr @@ -630,7 +629,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, else Addr = N; // Register - Offs = ((OpOpc == ISD::STORE) ? Op.getOperand(3) : Op.getOperand(2)); + Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2)); if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) { if (Offs.getOpcode() == ISD::UNDEF) @@ -667,7 +666,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, (r)(r) X-form address. */ bool -SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, +SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { if (!SelectAFormAddr(Op, N, Base, Index) && !SelectDFormAddr(Op, N, Base, Index)) { @@ -685,12 +684,11 @@ SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, /*! */ SDNode * -SPUDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SPUDAGToDAGISel::Select(SDNode *N) { unsigned Opc = N->getOpcode(); int n_ops = -1; unsigned NewOpc; - EVT OpVT = Op.getValueType(); + EVT OpVT = N->getValueType(0); SDValue Ops[8]; DebugLoc dl = N->getDebugLoc(); @@ -700,8 +698,8 @@ SPUDAGToDAGISel::Select(SDValue Op) { if (Opc == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType()); - SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType()); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); + SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0)); if (FI < 128) { NewOpc = SPU::AIr32; @@ -710,9 +708,9 @@ SPUDAGToDAGISel::Select(SDValue Op) { n_ops = 2; } else { NewOpc = SPU::Ar32; - Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType()); + Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0)); Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl, - Op.getValueType(), TFI, Imm0), + N->getValueType(0), TFI, Imm0), 0); n_ops = 2; } @@ -720,10 +718,10 @@ SPUDAGToDAGISel::Select(SDValue Op) { // Catch the i64 constants that end up here. Note: The backend doesn't // attempt to legalize the constant (it's useless because DAGCombiner // will insert 64-bit constants and we can't stop it). - return SelectI64Constant(Op, OpVT, Op.getDebugLoc()); + return SelectI64Constant(N, OpVT, N->getDebugLoc()); } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && OpVT == MVT::i64) { - SDValue Op0 = Op.getOperand(0); + SDValue Op0 = N->getOperand(0); EVT Op0VT = Op0.getValueType(); EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(), Op0VT, (128 / Op0VT.getSizeInBits())); @@ -760,9 +758,10 @@ SPUDAGToDAGISel::Select(SDValue Op) { break; } - SDNode *shufMaskLoad = emitBuildVector(shufMask); + SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode()); SDNode *PromoteScalar = - SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, Op0VecVT, Op0)); + SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, + Op0VecVT, Op0).getNode()); SDValue zextShuffle = CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, @@ -773,32 +772,32 @@ SPUDAGToDAGISel::Select(SDValue Op) { // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we // re-use it in the VEC2PREFSLOT selection without needing to explicitly // call SelectCode (it's already done for us.) - SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle)); + SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle).getNode()); return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, - zextShuffle)); + zextShuffle).getNode()); } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0)).getNode()); } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode()); return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0)).getNode()); } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = - emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl)); + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, - Op.getOperand(0), Op.getOperand(1), - SDValue(CGLoad, 0))); + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0)).getNode()); } else if (Opc == ISD::TRUNCATE) { - SDValue Op0 = Op.getOperand(0); + SDValue Op0 = N->getOperand(0); if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) && OpVT == MVT::i32 && Op0.getValueType() == MVT::i64) { @@ -834,22 +833,22 @@ SPUDAGToDAGISel::Select(SDValue Op) { } } else if (Opc == ISD::SHL) { if (OpVT == MVT::i64) { - return SelectSHLi64(Op, OpVT); + return SelectSHLi64(N, OpVT); } } else if (Opc == ISD::SRL) { if (OpVT == MVT::i64) { - return SelectSRLi64(Op, OpVT); + return SelectSRLi64(N, OpVT); } } else if (Opc == ISD::SRA) { if (OpVT == MVT::i64) { - return SelectSRAi64(Op, OpVT); + return SelectSRAi64(N, OpVT); } } else if (Opc == ISD::FNEG && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); // Check if the pattern is a special form of DFNMS: // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)) - SDValue Op0 = Op.getOperand(0); + SDValue Op0 = N->getOperand(0); if (Op0.getOpcode() == ISD::FSUB) { SDValue Op00 = Op0.getOperand(0); if (Op00.getOpcode() == ISD::FMUL) { @@ -869,28 +868,28 @@ SPUDAGToDAGISel::Select(SDValue Op) { unsigned Opc = SPU::XORfneg64; if (OpVT == MVT::f64) { - signMask = SelectI64Constant(negConst, MVT::i64, dl); + signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl); } else if (OpVT == MVT::v2f64) { Opc = SPU::XORfnegvec; signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, - negConst, negConst)); + negConst, negConst).getNode()); } return CurDAG->getMachineNode(Opc, dl, OpVT, - Op.getOperand(0), SDValue(signMask, 0)); + N->getOperand(0), SDValue(signMask, 0)); } else if (Opc == ISD::FABS) { if (OpVT == MVT::f64) { SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl); return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT, - Op.getOperand(0), SDValue(signMask, 0)); + N->getOperand(0), SDValue(signMask, 0)); } else if (OpVT == MVT::v2f64) { SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64); SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, absConst, absConst); - SDNode *signMask = emitBuildVector(absVec); + SDNode *signMask = emitBuildVector(absVec.getNode()); return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT, - Op.getOperand(0), SDValue(signMask, 0)); + N->getOperand(0), SDValue(signMask, 0)); } } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT @@ -925,7 +924,7 @@ SPUDAGToDAGISel::Select(SDValue Op) { // SPUInstrInfo catches the following patterns: // (SPUindirect (SPUhi ...), (SPUlo ...)) // (SPUindirect $sp, imm) - EVT VT = Op.getValueType(); + EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); RegisterSDNode *RN; @@ -952,7 +951,7 @@ SPUDAGToDAGISel::Select(SDValue Op) { else return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops); } else - return SelectCode(Op); + return SelectCode(N); } /*! @@ -968,15 +967,15 @@ SPUDAGToDAGISel::Select(SDValue Op) { * @return The SDNode with the entire instruction sequence */ SDNode * -SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, EVT OpVT) { - SDValue Op0 = Op.getOperand(0); +SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) { + SDValue Op0 = N->getOperand(0); EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = Op.getOperand(1); + SDValue ShiftAmt = N->getOperand(1); EVT ShiftAmtVT = ShiftAmt.getValueType(); SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0; SDValue SelMaskVal; - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0); SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); @@ -1034,14 +1033,14 @@ SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, EVT OpVT) { * @return The SDNode with the entire instruction sequence */ SDNode * -SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, EVT OpVT) { - SDValue Op0 = Op.getOperand(0); +SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) { + SDValue Op0 = N->getOperand(0); EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = Op.getOperand(1); + SDValue ShiftAmt = N->getOperand(1); EVT ShiftAmtVT = ShiftAmt.getValueType(); SDNode *VecOp0, *Shift = 0; - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0); @@ -1101,16 +1100,16 @@ SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, EVT OpVT) { * @return The SDNode with the entire instruction sequence */ SDNode * -SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, EVT OpVT) { +SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) { // Promote Op0 to vector EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = Op.getOperand(1); + SDValue ShiftAmt = N->getOperand(1); EVT ShiftAmtVT = ShiftAmt.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); SDNode *VecOp0 = - CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op.getOperand(0)); + CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, N->getOperand(0)); SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); SDNode *SignRot = @@ -1170,9 +1169,9 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, EVT OpVT) { /*! Do the necessary magic necessary to load a i64 constant */ -SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, EVT OpVT, +SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl) { - ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode()); + ConstantSDNode *CN = cast<ConstantSDNode>(N); return SelectI64Constant(CN->getZExtValue(), OpVT, dl); } @@ -1192,7 +1191,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, ReplaceUses(i64vec, Op0); return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(emitBuildVector(Op0), 0)); + SDValue(emitBuildVector(Op0.getNode()), 0)); } else if (i64vec.getOpcode() == SPUISD::SHUFB) { SDValue lhs = i64vec.getOperand(0); SDValue rhs = i64vec.getOperand(1); @@ -1205,7 +1204,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, SDNode *lhsNode = (lhs.getNode()->isMachineOpcode() ? lhs.getNode() - : emitBuildVector(lhs)); + : emitBuildVector(lhs.getNode())); if (rhs.getOpcode() == ISD::BIT_CONVERT) { ReplaceUses(rhs, rhs.getOperand(0)); @@ -1214,7 +1213,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, SDNode *rhsNode = (rhs.getNode()->isMachineOpcode() ? rhs.getNode() - : emitBuildVector(rhs)); + : emitBuildVector(rhs.getNode())); if (shufmask.getOpcode() == ISD::BIT_CONVERT) { ReplaceUses(shufmask, shufmask.getOperand(0)); @@ -1223,18 +1222,18 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode() ? shufmask.getNode() - : emitBuildVector(shufmask)); + : emitBuildVector(shufmask.getNode())); SDNode *shufNode = Select(CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, SDValue(lhsNode, 0), SDValue(rhsNode, 0), - SDValue(shufMaskNode, 0))); + SDValue(shufMaskNode, 0)).getNode()); return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(shufNode, 0)); } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(emitBuildVector(i64vec), 0)); + SDValue(emitBuildVector(i64vec.getNode()), 0)); } else { llvm_report_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" "condition"); diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 4d40769..4eec757 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -146,12 +146,12 @@ namespace { private: DenseMap<SDNode*, SDNode*> RMWStores; void PreprocessForRMW(); - SDNode *Select(SDValue Op); - SDNode *SelectIndexedLoad(SDValue Op); - SDNode *SelectIndexedBinOp(SDValue Op, SDValue N1, SDValue N2, + SDNode *Select(SDNode *N); + SDNode *SelectIndexedLoad(SDNode *Op); + SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2, unsigned Opc8, unsigned Opc16); - bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp); + bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp); #ifndef NDEBUG unsigned Indent; @@ -283,7 +283,7 @@ bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) { /// SelectAddr - returns true if it is able pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. -bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, +bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Disp) { MSP430ISelAddressMode AM; @@ -326,7 +326,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, switch (ConstraintCode) { default: return true; case 'm': // memory - if (!SelectAddr(Op, Op, Op0, Op1)) + if (!SelectAddr(Op.getNode(), Op, Op0, Op1)) return true; break; } @@ -627,8 +627,8 @@ static bool isValidIndexedLoad(const LoadSDNode *LD) { return true; } -SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDValue Op) { - LoadSDNode *LD = cast<LoadSDNode>(Op); +SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); if (!isValidIndexedLoad(LD)) return NULL; @@ -646,17 +646,17 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDValue Op) { return NULL; } - return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), + return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), VT, MVT::i16, MVT::Other, LD->getBasePtr(), LD->getChain()); } -SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDValue Op, +SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2, unsigned Opc8, unsigned Opc16) { if (N1.getOpcode() == ISD::LOAD && N1.hasOneUse() && - IsLegalAndProfitableToFold(N1.getNode(), Op.getNode(), Op.getNode())) { + IsLegalAndProfitableToFold(N1.getNode(), Op, Op)) { LoadSDNode *LD = cast<LoadSDNode>(N1); if (!isValidIndexedLoad(LD)) return NULL; @@ -667,7 +667,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDValue Op, MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand(); SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() }; SDNode *ResNode = - CurDAG->SelectNodeTo(Op.getNode(), Opc, + CurDAG->SelectNodeTo(Op, Opc, VT, MVT::i16, MVT::Other, Ops0, 3); cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1); @@ -707,9 +707,8 @@ void MSP430DAGToDAGISel::InstructionSelect() { RMWStores.clear(); } -SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { - SDNode *Node = Op.getNode(); - DebugLoc dl = Op.getDebugLoc(); +SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) { + DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected DEBUG(errs().indent(Indent) << "Selecting: "); @@ -730,7 +729,7 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { switch (Node->getOpcode()) { default: break; case ISD::FrameIndex: { - assert(Op.getValueType() == MVT::i16); + assert(Node->getValueType(0) == MVT::i16); int FI = cast<FrameIndexSDNode>(Node)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16); if (Node->hasOneUse()) @@ -740,18 +739,18 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { TFI, CurDAG->getTargetConstant(0, MVT::i16)); } case ISD::LOAD: - if (SDNode *ResNode = SelectIndexedLoad(Op)) + if (SDNode *ResNode = SelectIndexedLoad(Node)) return ResNode; // Other cases are autogenerated. break; case ISD::ADD: if (SDNode *ResNode = - SelectIndexedBinOp(Op, - Op.getOperand(0), Op.getOperand(1), + SelectIndexedBinOp(Node, + Node->getOperand(0), Node->getOperand(1), MSP430::ADD8rm_POST, MSP430::ADD16rm_POST)) return ResNode; else if (SDNode *ResNode = - SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0), MSP430::ADD8rm_POST, MSP430::ADD16rm_POST)) return ResNode; @@ -759,8 +758,8 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { break; case ISD::SUB: if (SDNode *ResNode = - SelectIndexedBinOp(Op, - Op.getOperand(0), Op.getOperand(1), + SelectIndexedBinOp(Node, + Node->getOperand(0), Node->getOperand(1), MSP430::SUB8rm_POST, MSP430::SUB16rm_POST)) return ResNode; @@ -768,12 +767,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { break; case ISD::AND: if (SDNode *ResNode = - SelectIndexedBinOp(Op, - Op.getOperand(0), Op.getOperand(1), + SelectIndexedBinOp(Node, + Node->getOperand(0), Node->getOperand(1), MSP430::AND8rm_POST, MSP430::AND16rm_POST)) return ResNode; else if (SDNode *ResNode = - SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0), MSP430::AND8rm_POST, MSP430::AND16rm_POST)) return ResNode; @@ -781,12 +780,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { break; case ISD::OR: if (SDNode *ResNode = - SelectIndexedBinOp(Op, - Op.getOperand(0), Op.getOperand(1), + SelectIndexedBinOp(Node, + Node->getOperand(0), Node->getOperand(1), MSP430::OR8rm_POST, MSP430::OR16rm_POST)) return ResNode; else if (SDNode *ResNode = - SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0), MSP430::OR8rm_POST, MSP430::OR16rm_POST)) return ResNode; @@ -794,12 +793,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { break; case ISD::XOR: if (SDNode *ResNode = - SelectIndexedBinOp(Op, - Op.getOperand(0), Op.getOperand(1), + SelectIndexedBinOp(Node, + Node->getOperand(0), Node->getOperand(1), MSP430::XOR8rm_POST, MSP430::XOR16rm_POST)) return ResNode; else if (SDNode *ResNode = - SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0), MSP430::XOR8rm_POST, MSP430::XOR16rm_POST)) return ResNode; @@ -808,11 +807,11 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { } // Select the default instruction - SDNode *ResNode = SelectCode(Op); + SDNode *ResNode = SelectCode(Node); DEBUG(errs() << std::string(Indent-2, ' ') << "=> "); - if (ResNode == NULL || ResNode == Op.getNode()) - DEBUG(Op.getNode()->dump(CurDAG)); + if (ResNode == NULL || ResNode == Node) + DEBUG(Node->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); DEBUG(errs() << "\n"); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 5fe9b20..d3dce4b 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -660,16 +660,16 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: TCC = MSP430CC::COND_E; // aka COND_Z - // Minor optimization: if RHS is a constant, swap operands, then the + // Minor optimization: if LHS is a constant, swap operands, then the // constant can be folded into comparison. - if (RHS.getOpcode() == ISD::Constant) + if (LHS.getOpcode() == ISD::Constant) std::swap(LHS, RHS); break; case ISD::SETNE: TCC = MSP430CC::COND_NE; // aka COND_NZ - // Minor optimization: if RHS is a constant, swap operands, then the + // Minor optimization: if LHS is a constant, swap operands, then the // constant can be folded into comparison. - if (RHS.getOpcode() == ISD::Constant) + if (LHS.getOpcode() == ISD::Constant) std::swap(LHS, RHS); break; case ISD::SETULE: @@ -1014,8 +1014,8 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, // BB: // cmp 0, N // je RemBB - BuildMI(BB, dl, TII.get(MSP430::CMP8ir)) - .addImm(0).addReg(ShiftAmtSrcReg); + BuildMI(BB, dl, TII.get(MSP430::CMP8ri)) + .addReg(ShiftAmtSrcReg).addImm(0); BuildMI(BB, dl, TII.get(MSP430::JCC)) .addMBB(RemBB) .addImm(MSP430CC::COND_E); @@ -1045,6 +1045,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, .addReg(SrcReg).addMBB(BB) .addReg(ShiftReg2).addMBB(LoopBB); + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return RemBB; } diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index d67ba90..022d171 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -819,38 +819,40 @@ def SWPB16r : Pseudo<(outs GR16:$dst), (ins GR16:$src), // Integer comparisons let Defs = [SRW] in { def CMP8rr : Pseudo<(outs), (ins GR8:$src1, GR8:$src2), - "cmp.b\t{$src1, $src2}", + "cmp.b\t{$src2, $src1}", [(MSP430cmp GR8:$src1, GR8:$src2), (implicit SRW)]>; def CMP16rr : Pseudo<(outs), (ins GR16:$src1, GR16:$src2), - "cmp.w\t{$src1, $src2}", + "cmp.w\t{$src2, $src1}", [(MSP430cmp GR16:$src1, GR16:$src2), (implicit SRW)]>; -def CMP8ir : Pseudo<(outs), (ins i8imm:$src1, GR8:$src2), - "cmp.b\t{$src1, $src2}", - [(MSP430cmp imm:$src1, GR8:$src2), (implicit SRW)]>; -def CMP16ir : Pseudo<(outs), (ins i16imm:$src1, GR16:$src2), - "cmp.w\t{$src1, $src2}", - [(MSP430cmp imm:$src1, GR16:$src2), (implicit SRW)]>; - -def CMP8im : Pseudo<(outs), (ins i8imm:$src1, memsrc:$src2), - "cmp.b\t{$src1, $src2}", - [(MSP430cmp (i8 imm:$src1), (load addr:$src2)), (implicit SRW)]>; -def CMP16im : Pseudo<(outs), (ins i16imm:$src1, memsrc:$src2), - "cmp.w\t{$src1, $src2}", - [(MSP430cmp (i16 imm:$src1), (load addr:$src2)), (implicit SRW)]>; +def CMP8ri : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2), + "cmp.b\t{$src2, $src1}", + [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>; +def CMP16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2), + "cmp.w\t{$src2, $src1}", + [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>; + +def CMP8mi : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2), + "cmp.b\t{$src2, $src1}", + [(MSP430cmp (load addr:$src1), + (i8 imm:$src2)), (implicit SRW)]>; +def CMP16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2), + "cmp.w\t{$src2, $src1}", + [(MSP430cmp (load addr:$src1), + (i16 imm:$src2)), (implicit SRW)]>; def CMP8rm : Pseudo<(outs), (ins GR8:$src1, memsrc:$src2), - "cmp.b\t{$src1, $src2}", + "cmp.b\t{$src2, $src1}", [(MSP430cmp GR8:$src1, (load addr:$src2)), (implicit SRW)]>; def CMP16rm : Pseudo<(outs), (ins GR16:$src1, memsrc:$src2), - "cmp.w\t{$src1, $src2}", + "cmp.w\t{$src2, $src1}", [(MSP430cmp GR16:$src1, (load addr:$src2)), (implicit SRW)]>; def CMP8mr : Pseudo<(outs), (ins memsrc:$src1, GR8:$src2), - "cmp.b\t{$src1, $src2}", + "cmp.b\t{$src2, $src1}", [(MSP430cmp (load addr:$src1), GR8:$src2), (implicit SRW)]>; def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2), - "cmp.w\t{$src1, $src2}", + "cmp.w\t{$src2, $src1}", [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>; diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index ede111d..a53e918 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -84,14 +84,14 @@ private: } SDNode *getGlobalBaseReg(); - SDNode *Select(SDValue N); + SDNode *Select(SDNode *N); // Complex Pattern. - bool SelectAddr(SDValue Op, SDValue N, + bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); - SDNode *SelectLoadFp64(SDValue N); - SDNode *SelectStoreFp64(SDValue N); + SDNode *SelectLoadFp64(SDNode *N); + SDNode *SelectStoreFp64(SDNode *N); // getI32Imm - Return a target constant with the specified // value, of type i32. @@ -132,7 +132,7 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: -SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) +SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) { // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { @@ -199,19 +199,19 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) return true; } -SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) { +SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { MVT::SimpleValueType NVT = - N.getNode()->getValueType(0).getSimpleVT().SimpleTy; + N->getValueType(0).getSimpleVT().SimpleTy; if (!Subtarget.isMips1() || NVT != MVT::f64) return NULL; - if (!Predicate_unindexedload(N.getNode()) || - !Predicate_load(N.getNode())) + if (!Predicate_unindexedload(N) || + !Predicate_load(N)) return NULL; - SDValue Chain = N.getOperand(0); - SDValue N1 = N.getOperand(1); + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); SDValue Offset0, Offset1, Base; if (!SelectAddr(N, N1, Offset0, Base) || @@ -220,7 +220,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) { MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); - DebugLoc dl = N.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); // The second load should start after for 4 bytes. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0)) @@ -255,27 +255,27 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) { SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl, MVT::f64, I0, SDValue(LD1, 0)); - ReplaceUses(N, I1); - ReplaceUses(N.getValue(1), Chain); + ReplaceUses(SDValue(N, 0), I1); + ReplaceUses(SDValue(N, 1), Chain); cast<MachineSDNode>(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1); cast<MachineSDNode>(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1); return I1.getNode(); } -SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) { +SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { if (!Subtarget.isMips1() || - N.getOperand(1).getValueType() != MVT::f64) + N->getOperand(1).getValueType() != MVT::f64) return NULL; - SDValue Chain = N.getOperand(0); + SDValue Chain = N->getOperand(0); - if (!Predicate_unindexedstore(N.getNode()) || - !Predicate_store(N.getNode())) + if (!Predicate_unindexedstore(N) || + !Predicate_store(N)) return NULL; - SDValue N1 = N.getOperand(1); - SDValue N2 = N.getOperand(2); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); SDValue Offset0, Offset1, Base; if (!SelectAddr(N, N2, Offset0, Base) || @@ -285,7 +285,7 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) { MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); - DebugLoc dl = N.getDebugLoc(); + DebugLoc dl = N->getDebugLoc(); // Get the even and odd part from the f64 register SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPODD, @@ -318,14 +318,13 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) { MVT::Other, Ops1, 4), 0); cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1); - ReplaceUses(N.getValue(0), Chain); + ReplaceUses(SDValue(N, 0), Chain); return Chain.getNode(); } /// Select instructions not customized! Used for /// expanded, promoted and normal instructions -SDNode* MipsDAGToDAGISel::Select(SDValue N) { - SDNode *Node = N.getNode(); +SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); @@ -379,7 +378,7 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) { SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, SDValue(Carry,0), RHS); - return CurDAG->SelectNodeTo(N.getNode(), MOp, VT, MVT::Flag, + return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Flag, LHS, SDValue(AddCarry,0)); } @@ -405,11 +404,11 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) { InFlag = SDValue(Lo,1); SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); - if (!N.getValue(0).use_empty()) - ReplaceUses(N.getValue(0), SDValue(Lo,0)); + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(Lo,0)); - if (!N.getValue(1).use_empty()) - ReplaceUses(N.getValue(1), SDValue(Hi,0)); + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(Hi,0)); return NULL; } @@ -460,23 +459,23 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) { return getGlobalBaseReg(); case ISD::ConstantFP: { - ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); - if (N.getValueType() == MVT::f64 && CN->isExactlyValue(+0.0)) { + ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32); - ReplaceUses(N, Zero); + ReplaceUses(SDValue(Node, 0), Zero); return Zero.getNode(); } break; } case ISD::LOAD: - if (SDNode *ResNode = SelectLoadFp64(N)) + if (SDNode *ResNode = SelectLoadFp64(Node)) return ResNode; // Other cases are autogenerated. break; case ISD::STORE: - if (SDNode *ResNode = SelectStoreFp64(N)) + if (SDNode *ResNode = SelectStoreFp64(Node)) return ResNode; // Other cases are autogenerated. break; @@ -523,11 +522,11 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) { } // Select the default instruction - SDNode *ResNode = SelectCode(N); + SDNode *ResNode = SelectCode(Node); DEBUG(errs().indent(Indent-2) << "=> "); - if (ResNode == NULL || ResNode == N.getNode()) - DEBUG(N.getNode()->dump(CurDAG)); + if (ResNode == NULL || ResNode == Node) + DEBUG(Node->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); DEBUG(errs() << "\n"); diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp index e13e6cd..82197ae 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp @@ -36,7 +36,7 @@ void PIC16DAGToDAGISel::InstructionSelect() { /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. -SDNode* PIC16DAGToDAGISel::Select(SDValue N) { +SDNode* PIC16DAGToDAGISel::Select(SDNode *N) { // Select the default instruction. SDNode *ResNode = SelectCode(N); @@ -47,7 +47,7 @@ SDNode* PIC16DAGToDAGISel::Select(SDValue N) { // SelectDirectAddr - Match a direct address for DAG. // A direct address could be a globaladdress or externalsymbol. -bool PIC16DAGToDAGISel::SelectDirectAddr(SDValue Op, SDValue N, +bool PIC16DAGToDAGISel::SelectDirectAddr(SDNode *Op, SDValue N, SDValue &Address) { // Return true if TGA or ES. if (N.getOpcode() == ISD::TargetGlobalAddress diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h index d9172f2..813a540 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h @@ -52,10 +52,10 @@ private: // Include the pieces autogenerated from the target description. #include "PIC16GenDAGISel.inc" - SDNode *Select(SDValue N); + SDNode *Select(SDNode *N); // Match direct address complex pattern. - bool SelectDirectAddr(SDValue Op, SDValue N, SDValue &Address); + bool SelectDirectAddr(SDNode *Op, SDValue N, SDValue &Address); }; diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index aae4607..d505d38 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -49,6 +50,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" +#include "llvm/ADT/SmallString.h" using namespace llvm; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -57,27 +59,42 @@ namespace { class PPCAsmPrinter : public AsmPrinter { protected: struct FnStubInfo { - std::string Stub, LazyPtr, AnonSymbol; + MCSymbol *Stub, *LazyPtr, *AnonSymbol; - FnStubInfo() {} + FnStubInfo() { + Stub = LazyPtr = AnonSymbol = 0; + } - void Init(const GlobalValue *GV, Mangler *Mang) { + void Init(const GlobalValue *GV, Mangler *Mang, MCContext &Ctx) { // Already initialized. - if (!Stub.empty()) return; - Stub = Mang->getMangledName(GV, "$stub", true); - LazyPtr = Mang->getMangledName(GV, "$lazy_ptr", true); - AnonSymbol = Mang->getMangledName(GV, "$stub$tmp", true); + if (Stub != 0) return; + + // Get the names. + SmallString<128> TmpStr; + Mang->getNameWithPrefix(TmpStr, GV, true); + MakeSymbols(TmpStr, Ctx); } - void Init(const std::string &GV, Mangler *Mang) { - // Already initialized. - if (!Stub.empty()) return; - Stub = Mang->makeNameProper(GV + "$stub", - Mangler::Private); - LazyPtr = Mang->makeNameProper(GV + "$lazy_ptr", - Mangler::Private); - AnonSymbol = Mang->makeNameProper(GV + "$stub$tmp", - Mangler::Private); + void Init(StringRef GVName, Mangler *Mang, MCContext &Ctx) { + assert(!GVName.empty() && "external symbol name shouldn't be empty"); + if (Stub != 0) return; // Already initialized. + // Get the names for the external symbol name. + SmallString<128> TmpStr; + Mang->getNameWithPrefix(TmpStr, GVName, Mangler::Private); + MakeSymbols(TmpStr, Ctx); + } + + void MakeSymbols(SmallString<128> &TmpStr, MCContext &Ctx) { + TmpStr += "$stub"; + Stub = Ctx.GetOrCreateSymbol(TmpStr.str()); + TmpStr.erase(TmpStr.end()-5, TmpStr.end()); // Remove $stub + + TmpStr += "$lazy_ptr"; + LazyPtr = Ctx.GetOrCreateSymbol(TmpStr.str()); + TmpStr.erase(TmpStr.end()-9, TmpStr.end()); // Remove $lazy_ptr + + TmpStr += "$stub$tmp"; + AnonSymbol = Ctx.GetOrCreateSymbol(TmpStr.str()); } }; @@ -224,15 +241,17 @@ namespace { if (GV->isDeclaration() || GV->isWeakForLinker()) { // Dynamically-resolved functions need a stub for the function. FnStubInfo &FnInfo = FnStubs[Mang->getMangledName(GV)]; - FnInfo.Init(GV, Mang); - O << FnInfo.Stub; + FnInfo.Init(GV, Mang, OutContext); + FnInfo.Stub->print(O, MAI); return; } } if (MO.getType() == MachineOperand::MO_ExternalSymbol) { - FnStubInfo &FnInfo =FnStubs[Mang->makeNameProper(MO.getSymbolName())]; - FnInfo.Init(MO.getSymbolName(), Mang); - O << FnInfo.Stub; + SmallString<128> MangledName; + Mang->getNameWithPrefix(MangledName, MO.getSymbolName()); + FnStubInfo &FnInfo = FnStubs[MangledName.str()]; + FnInfo.Init(MO.getSymbolName(), Mang, OutContext); + FnInfo.Stub->print(O, MAI); return; } } @@ -550,50 +569,49 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) { processDebugLoc(MI, true); // Check for slwi/srwi mnemonics. + bool useSubstituteMnemonic = false; if (MI->getOpcode() == PPC::RLWINM) { - bool FoundMnemonic = false; unsigned char SH = MI->getOperand(2).getImm(); unsigned char MB = MI->getOperand(3).getImm(); unsigned char ME = MI->getOperand(4).getImm(); if (SH <= 31 && MB == 0 && ME == (31-SH)) { - O << "\tslwi "; FoundMnemonic = true; + O << "\tslwi "; useSubstituteMnemonic = true; } if (SH <= 31 && MB == (32-SH) && ME == 31) { - O << "\tsrwi "; FoundMnemonic = true; + O << "\tsrwi "; useSubstituteMnemonic = true; SH = 32-SH; } - if (FoundMnemonic) { + if (useSubstituteMnemonic) { printOperand(MI, 0); O << ", "; printOperand(MI, 1); - O << ", " << (unsigned int)SH << '\n'; - return; + O << ", " << (unsigned int)SH; } } else if (MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) { if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + useSubstituteMnemonic = true; O << "\tmr "; printOperand(MI, 0); O << ", "; printOperand(MI, 1); - O << '\n'; - return; } } else if (MI->getOpcode() == PPC::RLDICR) { unsigned char SH = MI->getOperand(2).getImm(); unsigned char ME = MI->getOperand(3).getImm(); // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH if (63-SH == ME) { + useSubstituteMnemonic = true; O << "\tsldi "; printOperand(MI, 0); O << ", "; printOperand(MI, 1); - O << ", " << (unsigned int)SH << '\n'; - return; + O << ", " << (unsigned int)SH; } } - printInstruction(MI); - + if (!useSubstituteMnemonic) + printInstruction(MI); + if (VerboseAsm) EmitComments(*MI); O << '\n'; @@ -1038,27 +1056,38 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { MCSectionMachO::S_SYMBOL_STUBS | MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 32, SectionKind::getText()); - for (StringMap<FnStubInfo>::iterator I = FnStubs.begin(), E = FnStubs.end(); + for (StringMap<FnStubInfo>::iterator I = FnStubs.begin(), E = FnStubs.end(); I != E; ++I) { OutStreamer.SwitchSection(StubSection); EmitAlignment(4); const FnStubInfo &Info = I->second; - O << Info.Stub << ":\n"; + Info.Stub->print(O, MAI); + O << ":\n"; O << "\t.indirect_symbol " << I->getKeyData() << '\n'; O << "\tmflr r0\n"; - O << "\tbcl 20,31," << Info.AnonSymbol << '\n'; - O << Info.AnonSymbol << ":\n"; + O << "\tbcl 20,31,"; + Info.AnonSymbol->print(O, MAI); + O << '\n'; + Info.AnonSymbol->print(O, MAI); + O << ":\n"; O << "\tmflr r11\n"; - O << "\taddis r11,r11,ha16(" << Info.LazyPtr << "-" << Info.AnonSymbol; + O << "\taddis r11,r11,ha16("; + Info.LazyPtr->print(O, MAI); + O << '-'; + Info.AnonSymbol->print(O, MAI); O << ")\n"; O << "\tmtlr r0\n"; O << (isPPC64 ? "\tldu" : "\tlwzu") << " r12,lo16("; - O << Info.LazyPtr << "-" << Info.AnonSymbol << ")(r11)\n"; + Info.LazyPtr->print(O, MAI); + O << '-'; + Info.AnonSymbol->print(O, MAI); + O << ")(r11)\n"; O << "\tmtctr r12\n"; O << "\tbctr\n"; OutStreamer.SwitchSection(LSPSection); - O << Info.LazyPtr << ":\n"; + Info.LazyPtr->print(O, MAI); + O << ":\n"; O << "\t.indirect_symbol " << I->getKeyData() << '\n'; O << (isPPC64 ? "\t.quad" : "\t.long") << " dyld_stub_binding_helper\n"; } @@ -1074,15 +1103,20 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { OutStreamer.SwitchSection(StubSection); EmitAlignment(4); const FnStubInfo &Info = I->second; - O << Info.Stub << ":\n"; + Info.Stub->print(O, MAI); + O << ":\n"; O << "\t.indirect_symbol " << I->getKeyData() << '\n'; - O << "\tlis r11,ha16(" << Info.LazyPtr << ")\n"; + O << "\tlis r11,ha16("; + Info.LazyPtr->print(O, MAI); + O << ")\n"; O << (isPPC64 ? "\tldu" : "\tlwzu") << " r12,lo16("; - O << Info.LazyPtr << ")(r11)\n"; + Info.LazyPtr->print(O, MAI); + O << ")(r11)\n"; O << "\tmtctr r12\n"; O << "\tbctr\n"; OutStreamer.SwitchSection(LSPSection); - O << Info.LazyPtr << ":\n"; + Info.LazyPtr->print(O, MAI); + O << ":\n"; O << "\t.indirect_symbol " << I->getKeyData() << '\n'; O << (isPPC64 ? "\t.quad" : "\t.long") << " dyld_stub_binding_helper\n"; } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index e7334b5..32c1879 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -95,7 +95,7 @@ namespace { // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); SDNode *SelectBitfieldInsert(SDNode *N); @@ -105,7 +105,7 @@ namespace { /// SelectAddrImm - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement [r+imm]. - bool SelectAddrImm(SDValue Op, SDValue N, SDValue &Disp, + bool SelectAddrImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG); } @@ -113,7 +113,7 @@ namespace { /// SelectAddrImmOffs - Return true if the operand is valid for a preinc /// immediate field. Because preinc imms have already been validated, just /// accept it. - bool SelectAddrImmOffs(SDValue Op, SDValue N, SDValue &Out) const { + bool SelectAddrImmOffs(SDNode *Op, SDValue N, SDValue &Out) const { Out = N; return true; } @@ -121,14 +121,14 @@ namespace { /// SelectAddrIdx - Given the specified addressed, check to see if it can be /// represented as an indexed [r+r] operation. Returns false if it can /// be represented by [r+imm], which are preferred. - bool SelectAddrIdx(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddrIdx(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG); } /// SelectAddrIdxOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. - bool SelectAddrIdxOnly(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddrIdxOnly(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } @@ -136,7 +136,7 @@ namespace { /// SelectAddrImmShift - Returns true if the address N can be represented by /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable /// for use by STD and friends. - bool SelectAddrImmShift(SDValue Op, SDValue N, SDValue &Disp, + bool SelectAddrImmShift(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); } @@ -180,7 +180,7 @@ namespace { #include "PPCGenDAGISel.inc" private: - SDNode *SelectSETCC(SDValue Op); + SDNode *SelectSETCC(SDNode *N); }; } @@ -635,8 +635,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { return 0; } -SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { DebugLoc dl = N->getDebugLoc(); unsigned Imm; ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); @@ -756,9 +755,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) { // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. -SDNode *PPCDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); - DebugLoc dl = Op.getDebugLoc(); +SDNode *PPCDAGToDAGISel::Select(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); if (N->isMachineOpcode()) return NULL; // Already selected. @@ -841,18 +839,18 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { } case ISD::SETCC: - return SelectSETCC(Op); + return SelectSETCC(N); case PPCISD::GlobalBaseReg: return getGlobalBaseReg(); case ISD::FrameIndex: { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType()); - unsigned Opc = Op.getValueType() == MVT::i32 ? PPC::ADDI : PPC::ADDI8; + SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); + unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; if (N->hasOneUse()) - return CurDAG->SelectNodeTo(N, Opc, Op.getValueType(), TFI, + return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI, getSmallIPtrImm(0)); - return CurDAG->getMachineNode(Opc, dl, Op.getValueType(), TFI, + return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, getSmallIPtrImm(0)); } @@ -899,7 +897,7 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { case ISD::LOAD: { // Handle preincrement loads. - LoadSDNode *LD = cast<LoadSDNode>(Op); + LoadSDNode *LD = cast<LoadSDNode>(N); EVT LoadedVT = LD->getMemoryVT(); // Normal loads are handled by code generated from the .td file. @@ -1092,7 +1090,7 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { } } - return SelectCode(Op); + return SelectCode(N); } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 8fe151a..842f8ee 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -430,9 +430,7 @@ let isCall = 1, PPC970_Unit = 7, F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, LR,CTR, - CR0,CR1,CR5,CR6,CR7, - CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ, - CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN,CARRY] in { + CR0,CR1,CR5,CR6,CR7,CARRY] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_Darwin : IForm<18, 0, 1, @@ -457,9 +455,7 @@ let isCall = 1, PPC970_Unit = 7, F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, LR,CTR, - CR0,CR1,CR5,CR6,CR7, - CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ, - CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN,CARRY] in { + CR0,CR1,CR5,CR6,CR7,CARRY] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_SVR4 : IForm<18, 0, 1, diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index be6e51e..daf4ec6 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -308,6 +308,7 @@ extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, // Rewrite the stub with an unconditional branch to the target, for any users // who took the address of the stub. EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit); + sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4); // Put the address of the target function to call and the address to return to // after calling the target function in a place that is easy to get on the @@ -441,4 +442,5 @@ void PPCJITInfo::relocate(void *Function, MachineRelocation *MR, void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit); + sys::Memory::InvalidateInstructionCache(Old, 7*4); } diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp index c87879b..ee6deb5 100644 --- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp @@ -22,6 +22,7 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { if (!is64Bit) Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode. AssemblerDialect = 1; // New-Style mnemonics. + SupportsDebugInformation= true; // Debug information. } PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index f5e50fc..060d6a5 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -7,6 +7,39 @@ TODO: ===-------------------------------------------------------------------------=== +On PPC64, this: + +long f2 (long x) { return 0xfffffff000000000UL; } +long f3 (long x) { return 0x1ffffffffUL; } + +could compile into: + +_f2: + li r3,-1 + rldicr r3,r3,0,27 + blr +_f3: + li r3,-1 + rldicl r3,r3,0,31 + blr + +we produce: + +_f2: + lis r2, 4095 + ori r2, r2, 65535 + sldi r3, r2, 36 + blr +_f3: + li r2, 1 + sldi r2, r2, 32 + oris r2, r2, 65535 + ori r3, r2, 65535 + blr + + +===-------------------------------------------------------------------------=== + Support 'update' load/store instructions. These are cracked on the G5, but are still a codesize win. diff --git a/lib/Target/README.txt b/lib/Target/README.txt index a6e05fa..69da35f 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -282,19 +282,6 @@ this requires TBAA. //===---------------------------------------------------------------------===// -This should be optimized to one 'and' and one 'or', from PR4216: - -define i32 @test_bitfield(i32 %bf.prev.low) nounwind ssp { -entry: - %bf.prev.lo.cleared10 = or i32 %bf.prev.low, 32962 ; <i32> [#uses=1] - %0 = and i32 %bf.prev.low, -65536 ; <i32> [#uses=1] - %1 = and i32 %bf.prev.lo.cleared10, 40186 ; <i32> [#uses=1] - %2 = or i32 %1, %0 ; <i32> [#uses=1] - ret i32 %2 -} - -//===---------------------------------------------------------------------===// - This isn't recognized as bswap by instcombine (yes, it really is bswap): unsigned long reverse(unsigned v) { @@ -1661,38 +1648,9 @@ would delete the or instruction for us. //===---------------------------------------------------------------------===// -FunctionAttrs is not marking this function as readnone (just readonly): -$ clang t.c -emit-llvm -S -o - -O0 | opt -mem2reg -S -functionattrs - -int t(int a, int b, int c) { - int *p; - if (a) - p = &a; - else - p = &c; - return *p; -} - -This is because we codegen this to: - -define i32 @t(i32 %a, i32 %b, i32 %c) nounwind readonly ssp { -entry: - %a.addr = alloca i32 ; <i32*> [#uses=3] - %c.addr = alloca i32 ; <i32*> [#uses=2] -... - -if.end: - %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ] - %tmp2 = load i32* %p.0 ; <i32> [#uses=1] - ret i32 %tmp2 -} - -And functionattrs doesn't realize that the p.0 load points to function local -memory. - -Also, functionattrs doesn't know about memcpy/memset. This function should be -marked readnone, since it only twiddles local memory, but functionattrs doesn't -handle memset/memcpy/memmove aggressively: +functionattrs doesn't know much about memcpy/memset. This function should be +marked readnone rather than readonly, since it only twiddles local memory, but +functionattrs doesn't handle memset/memcpy/memmove aggressively: struct X { int *p; int *q; }; int foo() { diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index b41917e..e1b3299 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -43,11 +43,11 @@ public: TM(tm) { } - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); // Complex Pattern Selectors. - bool SelectADDRrr(SDValue Op, SDValue N, SDValue &R1, SDValue &R2); - bool SelectADDRri(SDValue Op, SDValue N, SDValue &Base, + bool SelectADDRrr(SDNode *Op, SDValue N, SDValue &R1, SDValue &R2); + bool SelectADDRri(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for @@ -87,7 +87,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } -bool SparcDAGToDAGISel::SelectADDRri(SDValue Op, SDValue Addr, +bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); @@ -128,7 +128,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Op, SDValue Addr, return true; } -bool SparcDAGToDAGISel::SelectADDRrr(SDValue Op, SDValue Addr, +bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr, SDValue &R1, SDValue &R2) { if (Addr.getOpcode() == ISD::FrameIndex) return false; if (Addr.getOpcode() == ISD::TargetExternalSymbol || @@ -152,8 +152,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Op, SDValue Addr, return true; } -SDNode *SparcDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *SparcDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); if (N->isMachineOpcode()) return NULL; // Already selected. @@ -199,7 +198,7 @@ SDNode *SparcDAGToDAGISel::Select(SDValue Op) { } } - return SelectCode(Op); + return SelectCode(N); } @@ -213,8 +212,8 @@ SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, switch (ConstraintCode) { default: return true; case 'm': // memory - if (!SelectADDRrr(Op, Op, Op0, Op1)) - SelectADDRri(Op, Op, Op0, Op1); + if (!SelectADDRrr(Op.getNode(), Op, Op0, Op1)) + SelectADDRri(Op.getNode(), Op, Op0, Op1); break; } diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp index 590574e..7cc4fd1 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/Target/SubtargetFeature.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/SubtargetFeature.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/StringExtras.h" #include <algorithm> @@ -355,7 +356,7 @@ void SubtargetFeatures::print(raw_ostream &OS) const { /// dump - Dump feature info. /// void SubtargetFeatures::dump() const { - print(errs()); + print(dbgs()); } /// getDefaultSubtargetFeatures - Return a string listing diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index d64611d..7096c0e 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -128,23 +128,23 @@ namespace { #include "SystemZGenDAGISel.inc" private: - bool SelectAddrRI12Only(SDValue Op, SDValue& Addr, + bool SelectAddrRI12Only(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp); - bool SelectAddrRI12(SDValue Op, SDValue& Addr, + bool SelectAddrRI12(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp, bool is12BitOnly = false); - bool SelectAddrRI(SDValue Op, SDValue& Addr, + bool SelectAddrRI(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp); - bool SelectAddrRRI12(SDValue Op, SDValue Addr, + bool SelectAddrRRI12(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index); - bool SelectAddrRRI20(SDValue Op, SDValue Addr, + bool SelectAddrRRI20(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index); - bool SelectLAAddr(SDValue Op, SDValue Addr, + bool SelectLAAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index); - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *Node); - bool TryFoldLoad(SDValue P, SDValue N, + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Disp, SDValue &Index); bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM, @@ -367,12 +367,12 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM, /// Returns true if the address can be represented by a base register plus /// an unsigned 12-bit displacement [r+imm]. -bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue Op, SDValue& Addr, +bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp) { return SelectAddrRI12(Op, Addr, Base, Disp, /*is12BitOnly*/true); } -bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue Op, SDValue& Addr, +bool SystemZDAGToDAGISel::SelectAddrRI12(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp, bool is12BitOnly) { SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true); @@ -422,7 +422,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue Op, SDValue& Addr, /// Returns true if the address can be represented by a base register plus /// a signed 20-bit displacement [r+imm]. -bool SystemZDAGToDAGISel::SelectAddrRI(SDValue Op, SDValue& Addr, +bool SystemZDAGToDAGISel::SelectAddrRI(SDNode *Op, SDValue& Addr, SDValue &Base, SDValue &Disp) { SystemZRRIAddressMode AM(/*isRI*/true); bool Done = false; @@ -465,7 +465,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI(SDValue Op, SDValue& Addr, /// Returns true if the address can be represented by a base register plus /// index register plus an unsigned 12-bit displacement [base + idx + imm]. -bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Op, SDValue Addr, +bool SystemZDAGToDAGISel::SelectAddrRRI12(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index) { SystemZRRIAddressMode AM20, AM12; bool Done = false; @@ -514,7 +514,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Op, SDValue Addr, /// Returns true if the address can be represented by a base register plus /// index register plus a signed 20-bit displacement [base + idx + imm]. -bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Op, SDValue Addr, +bool SystemZDAGToDAGISel::SelectAddrRRI20(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index) { SystemZRRIAddressMode AM; bool Done = false; @@ -558,7 +558,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Op, SDValue Addr, /// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LA/LAY instruction. -bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Op, SDValue Addr, +bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index) { SystemZRRIAddressMode AM; @@ -591,11 +591,11 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Op, SDValue Addr, return false; } -bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, +bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Disp, SDValue &Index) { if (ISD::isNON_EXTLoad(N.getNode()) && N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) + IsLegalAndProfitableToFold(N.getNode(), P, P)) return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index); return false; } @@ -612,10 +612,9 @@ void SystemZDAGToDAGISel::InstructionSelect() { CurDAG->RemoveDeadNodes(); } -SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { - SDNode *Node = Op.getNode(); +SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc dl = Node->getDebugLoc(); unsigned Opcode = Node->getOpcode(); // Dump information about the Node being selected @@ -643,20 +642,20 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { EVT ResVT; bool is32Bit = false; switch (NVT.getSimpleVT().SimpleTy) { - default: assert(0 && "Unsupported VT!"); - case MVT::i32: - Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m; - ResVT = MVT::v2i64; - is32Bit = true; - break; - case MVT::i64: - Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m; - ResVT = MVT::v2i64; - break; + default: assert(0 && "Unsupported VT!"); + case MVT::i32: + Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m; + ResVT = MVT::v2i64; + is32Bit = true; + break; + case MVT::i64: + Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m; + ResVT = MVT::v2i64; + break; } SDValue Tmp0, Tmp1, Tmp2; - bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2); + bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2); // Prepare the dividend SDNode *Dividend; @@ -677,16 +676,16 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { SDValue DivVal = SDValue(Dividend, 0); if (foldedLoad) { SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) }; - Result = CurDAG->getMachineNode(MOpc, dl, ResVT, + Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other, Ops, array_lengthof(Ops)); // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(Result, 0)); + ReplaceUses(N1.getValue(1), SDValue(Result, 1)); } else { Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1); } // Copy the division (odd subreg) result, if it is needed. - if (!Op.getValue(0).use_empty()) { + if (!SDValue(Node, 0).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, dl, NVT, @@ -694,14 +693,14 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); - ReplaceUses(Op.getValue(0), SDValue(Div, 0)); + ReplaceUses(SDValue(Node, 0), SDValue(Div, 0)); DEBUG(errs().indent(Indent-2) << "=> "; Result->dump(CurDAG); errs() << "\n"); } // Copy the remainder (even subreg) result, if it is needed. - if (!Op.getValue(1).use_empty()) { + if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, dl, NVT, @@ -709,7 +708,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); - ReplaceUses(Op.getValue(1), SDValue(Rem, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0)); DEBUG(errs().indent(Indent-2) << "=> "; Result->dump(CurDAG); errs() << "\n"); @@ -729,22 +728,22 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { bool is32Bit = false; switch (NVT.getSimpleVT().SimpleTy) { - default: assert(0 && "Unsupported VT!"); - case MVT::i32: - Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m; - ClrOpc = SystemZ::MOV64Pr0_even; - ResVT = MVT::v2i32; - is32Bit = true; - break; - case MVT::i64: - Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m; - ClrOpc = SystemZ::MOV128r0_even; - ResVT = MVT::v2i64; - break; + default: assert(0 && "Unsupported VT!"); + case MVT::i32: + Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m; + ClrOpc = SystemZ::MOV64Pr0_even; + ResVT = MVT::v2i32; + is32Bit = true; + break; + case MVT::i64: + Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m; + ClrOpc = SystemZ::MOV128r0_even; + ResVT = MVT::v2i64; + break; } SDValue Tmp0, Tmp1, Tmp2; - bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2); + bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2); // Prepare the dividend SDNode *Dividend = N0.getNode(); @@ -767,37 +766,37 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { SDNode *Result; if (foldedLoad) { SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) }; - Result = CurDAG->getMachineNode(MOpc, dl,ResVT, + Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other, Ops, array_lengthof(Ops)); // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(Result, 0)); + ReplaceUses(N1.getValue(1), SDValue(Result, 1)); } else { Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1); } // Copy the division (odd subreg) result, if it is needed. - if (!Op.getValue(0).use_empty()) { + if (!SDValue(Node, 0).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); - ReplaceUses(Op.getValue(0), SDValue(Div, 0)); + ReplaceUses(SDValue(Node, 0), SDValue(Div, 0)); DEBUG(errs().indent(Indent-2) << "=> "; Result->dump(CurDAG); errs() << "\n"); } // Copy the remainder (even subreg) result, if it is needed. - if (!Op.getValue(1).use_empty()) { + if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); - ReplaceUses(Op.getValue(1), SDValue(Rem, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0)); DEBUG(errs().indent(Indent-2) << "=> "; Result->dump(CurDAG); errs() << "\n"); @@ -812,11 +811,11 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { } // Select the default instruction - SDNode *ResNode = SelectCode(Op); + SDNode *ResNode = SelectCode(Node); DEBUG(errs().indent(Indent-2) << "=> "; - if (ResNode == NULL || ResNode == Op.getNode()) - Op.getNode()->dump(CurDAG); + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); else ResNode->dump(CurDAG); errs() << "\n"; diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index cddf49e..f5c969a 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -34,7 +34,7 @@ char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) { } LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) { - return unwrap(TD)->isLittleEndian(); + return unwrap(TD)->isLittleEndian() ? LLVMLittleEndian : LLVMBigEndian; } unsigned LLVMPointerSize(LLVMTargetDataRef TD) { diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index f887523..70e8008 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -21,11 +21,13 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mangler.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" using namespace llvm; @@ -492,16 +494,15 @@ getELFKindForNamedSection(const char *Name, SectionKind K) { } -static unsigned -getELFSectionType(const char *Name, SectionKind K) { +static unsigned getELFSectionType(StringRef Name, SectionKind K) { - if (strcmp(Name, ".init_array") == 0) + if (Name == ".init_array") return MCSectionELF::SHT_INIT_ARRAY; - if (strcmp(Name, ".fini_array") == 0) + if (Name == ".fini_array") return MCSectionELF::SHT_FINI_ARRAY; - if (strcmp(Name, ".preinit_array") == 0) + if (Name == ".preinit_array") return MCSectionELF::SHT_PREINIT_ARRAY; if (K.isBSS() || K.isThreadBSS()) @@ -577,10 +578,16 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // into a 'uniqued' section name, create and return the section now. if (GV->isWeakForLinker()) { const char *Prefix = getSectionPrefixForUniqueGlobal(Kind); - std::string Name = Mang->makeNameProper(GV->getNameStr()); - - return getELFSection((Prefix+Name).c_str(), - getELFSectionType((Prefix+Name).c_str(), Kind), + SmallString<128> Name, MangledName; + Name.append(Prefix, Prefix+strlen(Prefix)); + Mang->getNameWithPrefix(Name, GV, false); + + raw_svector_ostream OS(MangledName); + MCSymbol::printMangledName(Name, OS, 0); + OS.flush(); + + return getELFSection(MangledName.str(), + getELFSectionType(MangledName.str(), Kind), getELFSectionFlags(Kind), Kind); } @@ -922,7 +929,7 @@ const MCSection * TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { // If this constant requires a relocation, we have to put it in the data // segment, not in the text segment. - if (Kind.isDataRel()) + if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) return ConstDataSection; if (Kind.isMergeableConst4()) @@ -983,7 +990,7 @@ TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() { const MCSection *TargetLoweringObjectFileCOFF:: -getCOFFSection(const char *Name, bool isDirective, SectionKind Kind) const { +getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const { // Create the map if it doesn't already exist. if (UniquingMap == 0) UniquingMap = new MachOUniqueMapTy(); @@ -1078,8 +1085,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // into a 'uniqued' section name, create and return the section now. if (GV->isWeakForLinker()) { const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); - std::string Name = Mang->makeNameProper(GV->getNameStr()); - return getCOFFSection((Prefix+Name).c_str(), false, Kind); + SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); + Mang->getNameWithPrefix(Name, GV, false); + return getCOFFSection(Name.str(), false, Kind); } if (Kind.isText()) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index c357b4d..c4ae5d2 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Target/TargetAsmParser.h" #include "X86.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" @@ -15,6 +16,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParsedAsmOperand.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmParser.h" @@ -46,7 +48,7 @@ private: /// @name Auto-generated Match Functions /// { - bool MatchInstruction(SmallVectorImpl<X86Operand> &Operands, + bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst); /// MatchRegisterName - Match the given string to a register name, or 0 if @@ -59,7 +61,8 @@ public: X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) : TargetAsmParser(T), Parser(_Parser) {} - virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst); + virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); }; @@ -71,7 +74,7 @@ namespace { /// X86Operand - Instances of this class represent a parsed X86 machine /// instruction. -struct X86Operand { +struct X86Operand : public MCParsedAsmOperand { enum { Token, Register, @@ -400,10 +403,11 @@ bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) { return false; } -bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { - SmallVector<X86Operand, 8> Operands; +bool X86ATTAsmParser:: +ParseInstruction(const StringRef &Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - Operands.push_back(X86Operand::CreateToken(Name)); + Operands.push_back(new X86Operand(X86Operand::CreateToken(Name))); SMLoc Loc = getLexer().getTok().getLoc(); if (getLexer().isNot(AsmToken::EndOfStatement)) { @@ -411,31 +415,27 @@ bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { // Parse '*' modifier. if (getLexer().is(AsmToken::Star)) { getLexer().Lex(); // Eat the star. - Operands.push_back(X86Operand::CreateToken("*")); + Operands.push_back(new X86Operand(X86Operand::CreateToken("*"))); } // Read the first operand. - Operands.push_back(X86Operand()); - if (ParseOperand(Operands.back())) + X86Operand Op; + if (ParseOperand(Op)) return true; + Operands.push_back(new X86Operand(Op)); + while (getLexer().is(AsmToken::Comma)) { getLexer().Lex(); // Eat the comma. // Parse and remember the operand. - Operands.push_back(X86Operand()); - if (ParseOperand(Operands.back())) + if (ParseOperand(Op)) return true; + Operands.push_back(new X86Operand(Op)); } } - if (!MatchInstruction(Operands, Inst)) - return false; - - // FIXME: We should give nicer diagnostics about the exact failure. - - Error(Loc, "unrecognized instruction"); - return true; + return false; } bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index b88063f..70c6dd0 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -201,6 +201,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { /// jump tables, constant pools, global address and external symbols, all of /// which print to a label with various suffixes for relocation types etc. void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { + SmallString<128> TempNameStr; switch (MO.getType()) { default: llvm_unreachable("unknown symbol type!"); case MachineOperand::MO_JumpTableIndex: @@ -236,41 +237,38 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) { - SmallString<128> NameStr; - Mang->getNameWithPrefix(NameStr, GV, true); - NameStr += "$non_lazy_ptr"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + Mang->getNameWithPrefix(TempNameStr, GV, true); + TempNameStr += "$non_lazy_ptr"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym); if (StubSym == 0) { - NameStr.clear(); - Mang->getNameWithPrefix(NameStr, GV, false); - StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + TempNameStr.clear(); + Mang->getNameWithPrefix(TempNameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){ - SmallString<128> NameStr; - Mang->getNameWithPrefix(NameStr, GV, true); - NameStr += "$non_lazy_ptr"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + Mang->getNameWithPrefix(TempNameStr, GV, true); + TempNameStr += "$non_lazy_ptr"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym); if (StubSym == 0) { - NameStr.clear(); - Mang->getNameWithPrefix(NameStr, GV, false); - StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + TempNameStr.clear(); + Mang->getNameWithPrefix(TempNameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { - SmallString<128> NameStr; - Mang->getNameWithPrefix(NameStr, GV, true); - NameStr += "$stub"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + Mang->getNameWithPrefix(TempNameStr, GV, true); + TempNameStr += "$stub"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) { - NameStr.clear(); - Mang->getNameWithPrefix(NameStr, GV, false); - StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + TempNameStr.clear(); + Mang->getNameWithPrefix(TempNameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } } @@ -285,24 +283,32 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { break; } case MachineOperand::MO_ExternalSymbol: { - std::string Name = Mang->makeNameProper(MO.getSymbolName()); + const MCSymbol *SymToPrint; if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { - Name += "$stub"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(StringRef(Name)); + Mang->getNameWithPrefix(TempNameStr, + StringRef(MO.getSymbolName())+"$stub"); + const MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str()); const MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) { - Name.erase(Name.end()-5, Name.end()); - StubSym = OutContext.GetOrCreateSymbol(StringRef(Name)); + TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end()); + StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str()); } + SymToPrint = StubSym; + } else { + Mang->getNameWithPrefix(TempNameStr, MO.getSymbolName()); + SymToPrint = OutContext.GetOrCreateSymbol(TempNameStr.str()); } // If the name begins with a dollar-sign, enclose it in parens. We do this // to avoid having it look like an integer immediate to the assembler. - if (Name[0] == '$') - O << '(' << Name << ')'; - else - O << Name; + if (SymToPrint->getName()[0] != '$') + SymToPrint->print(O, MAI); + else { + O << '('; + SymToPrint->print(O, MAI); + O << '('; + } break; } } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 1015b69..9ee118c 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/DebugInfo.h" using namespace llvm; @@ -399,6 +400,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(X86::MOVZX32rm16); lower_subreg32(&OutMI, 0); break; + case X86::MOV16r0: + OutMI.setOpcode(X86::MOV32r0); + lower_subreg32(&OutMI, 0); + break; + case X86::MOV64r0: + OutMI.setOpcode(X86::MOV32r0); + lower_subreg32(&OutMI, 0); + break; } } @@ -412,6 +421,25 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { case TargetInstrInfo::GC_LABEL: printLabel(MI); return; + case TargetInstrInfo::DEBUG_VALUE: { + if (!VerboseAsm) + return; + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason + DIVariable V((MDNode*)(MI->getOperand(2).getMetadata())); + O << V.getName(); + O << " <- "; + if (MI->getOperand(0).getType()==MachineOperand::MO_Register) + printOperand(MI, 0); + else { + assert(MI->getOperand(0).getType()==MachineOperand::MO_Immediate); + int64_t imm = MI->getOperand(0).getImm(); + O << '[' << ((imm<0) ? "EBP" : "ESP+") << imm << ']'; + } + O << "+"; + printOperand(MI, 1); + return; + } case TargetInstrInfo::INLINEASM: printInlineAsm(MI); return; diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 71ad51c..0f3e44b 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -916,3 +916,23 @@ cheaper to do fld1 than load from a constant pool for example, so "load, add 1.0, store" is better done in the fp stack, etc. //===---------------------------------------------------------------------===// + +The X86 backend should be able to if-convert SSE comparisons like "ucomisd" to +"cmpsd". For example, this code: + +double d1(double x) { return x == x ? x : x + x; } + +Compiles into: + +_d1: + ucomisd %xmm0, %xmm0 + jnp LBB1_2 + addsd %xmm0, %xmm0 + ret +LBB1_2: + ret + +Also, the 'ret's should be shared. This is PR6032. + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index afd9f53..aa7bb3d 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -530,7 +530,7 @@ We should inline lrintf and probably other libc functions. //===---------------------------------------------------------------------===// -Start using the flags more. For example, compile: +Use the FLAGS values from arithmetic instructions more. For example, compile: int add_zf(int *x, int y, int a, int b) { if ((*x += y) == 0) @@ -554,31 +554,8 @@ _add_zf: movl %ecx, %eax ret -and: - -int add_zf(int *x, int y, int a, int b) { - if ((*x + y) < 0) - return a; - else - return b; -} - -to: - -add_zf: - addl (%rdi), %esi - movl %edx, %eax - cmovns %ecx, %eax - ret - -instead of: - -_add_zf: - addl (%rdi), %esi - testl %esi, %esi - cmovs %edx, %ecx - movl %ecx, %eax - ret +As another example, compile function f2 in test/CodeGen/X86/cmp-test.ll +without a test instruction. //===---------------------------------------------------------------------===// @@ -685,55 +662,6 @@ Though this probably isn't worth it. //===---------------------------------------------------------------------===// -We need to teach the codegen to convert two-address INC instructions to LEA -when the flags are dead (likewise dec). For example, on X86-64, compile: - -int foo(int A, int B) { - return A+1; -} - -to: - -_foo: - leal 1(%edi), %eax - ret - -instead of: - -_foo: - incl %edi - movl %edi, %eax - ret - -Another example is: - -;; X's live range extends beyond the shift, so the register allocator -;; cannot coalesce it with Y. Because of this, a copy needs to be -;; emitted before the shift to save the register value before it is -;; clobbered. However, this copy is not needed if the register -;; allocator turns the shift into an LEA. This also occurs for ADD. - -; Check that the shift gets turned into an LEA. -; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \ -; RUN: not grep {mov E.X, E.X} - -@G = external global i32 ; <i32*> [#uses=3] - -define i32 @test1(i32 %X, i32 %Y) { - %Z = add i32 %X, %Y ; <i32> [#uses=1] - volatile store i32 %Y, i32* @G - volatile store i32 %Z, i32* @G - ret i32 %X -} - -define i32 @test2(i32 %X) { - %Z = add i32 %X, 1 ; <i32> [#uses=1] - volatile store i32 %Z, i32* @G - ret i32 %X -} - -//===---------------------------------------------------------------------===// - Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with a neg instead of a sub instruction. Consider: @@ -854,11 +782,6 @@ __Z11no_overflowjj: //===---------------------------------------------------------------------===// -Re-materialize MOV32r0 etc. with xor instead of changing them to moves if the -condition register is dead. xor reg reg is shorter than mov reg, #0. - -//===---------------------------------------------------------------------===// - The following code: bb114.preheader: ; preds = %cond_next94 diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index a6e1ca3..7919559 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -23,6 +23,7 @@ include "llvm/Target/Target.td" def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", "Enable conditional move instructions">; + def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", "Enable MMX instructions">; def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", @@ -66,6 +67,9 @@ def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", "Enable three-operand fused multiple-add">; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add">; +def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", + "HasVectorUAMem", "true", + "Allow unaligned memory operands on vector/SIMD instructions">; //===----------------------------------------------------------------------===// // X86 processors supported. diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 4892e17..828e872 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -135,7 +135,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { IsPIC = TM.getRelocationModel() == Reloc::PIC_; do { - DEBUG(errs() << "JITTing function '" + DEBUG(dbgs() << "JITTing function '" << MF.getFunction()->getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); @@ -477,7 +477,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, template<class CodeEmitter> void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, const TargetInstrDesc *Desc) { - DEBUG(errs() << MI); + DEBUG(dbgs() << MI); MCE.processDebugLoc(MI.getDebugLoc(), true); @@ -618,11 +618,11 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, const MachineOperand &MO = MI.getOperand(CurOp++); - DEBUG(errs() << "RawFrm CurOp " << CurOp << "\n"); - DEBUG(errs() << "isMBB " << MO.isMBB() << "\n"); - DEBUG(errs() << "isGlobal " << MO.isGlobal() << "\n"); - DEBUG(errs() << "isSymbol " << MO.isSymbol() << "\n"); - DEBUG(errs() << "isImm " << MO.isImm() << "\n"); + DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n"); + DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n"); + DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n"); + DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n"); + DEBUG(dbgs() << "isImm " << MO.isImm() << "\n"); if (MO.isMBB()) { emitPCRelativeBlockAddress(MO.getMBB()); @@ -843,7 +843,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, if (!Desc->isVariadic() && CurOp != NumOps) { #ifndef NDEBUG - errs() << "Cannot encode all operands of: " << MI << "\n"; + dbgs() << "Cannot encode all operands of: " << MI << "\n"; #endif llvm_unreachable(0); } @@ -1082,9 +1082,9 @@ public: } if (!OK) { - errs() << "couldn't convert inst '"; + dbgs() << "couldn't convert inst '"; MI.dump(); - errs() << "' to machine instr:\n"; + dbgs() << "' to machine instr:\n"; Instr->dump(); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 431c120..7e02d59 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -786,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { bool X86FastISel::X86SelectZExt(Instruction *I) { // Handle zero-extension from i1 to i8, which is common. - if (I->getType() == Type::getInt8Ty(I->getContext()) && - I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) { + if (I->getType()->isInteger(8) && + I->getOperand(0)->getType()->isInteger(1)) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. @@ -948,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { bool X86FastISel::X86SelectShift(Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; - if (I->getType() == Type::getInt8Ty(I->getContext())) { + if (I->getType()->isInteger(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { @@ -957,7 +957,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; default: return false; } - } else if (I->getType() == Type::getInt16Ty(I->getContext())) { + } else if (I->getType()->isInteger(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -966,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; default: return false; } - } else if (I->getType() == Type::getInt32Ty(I->getContext())) { + } else if (I->getType()->isInteger(32)) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { @@ -975,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; default: return false; } - } else if (I->getType() == Type::getInt64Ty(I->getContext())) { + } else if (I->getType()->isInteger(64)) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { @@ -1230,8 +1230,8 @@ bool X86FastISel::X86SelectCall(Instruction *I) { CC != CallingConv::X86_FastCall) return false; - // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't - // handle this for now. + // fastcc with -tailcallopt is intended to provide a guaranteed + // tail call optimization. Fastisel doesn't know how to do that. if (CC == CallingConv::Fast && PerformTailCallOpt) return false; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 044bd4b..503ac14 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -75,12 +75,12 @@ namespace { unsigned StackTop; // The current top of the FP stack. void dumpStack() const { - errs() << "Stack contents:"; + dbgs() << "Stack contents:"; for (unsigned i = 0; i != StackTop; ++i) { - errs() << " FP" << Stack[i]; + dbgs() << " FP" << Stack[i]; assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); } - errs() << "\n"; + dbgs() << "\n"; } private: /// isStackEmpty - Return true if the FP stack is empty. @@ -246,7 +246,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { PrevMI = prior(I); ++NumFP; // Keep track of # of pseudo instrs - DEBUG(errs() << "\nFPInst:\t" << *MI); + DEBUG(dbgs() << "\nFPInst:\t" << *MI); // Get dead variables list now because the MI pointer may be deleted as part // of processing! @@ -273,7 +273,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { unsigned Reg = DeadRegs[i]; if (Reg >= X86::FP0 && Reg <= X86::FP6) { - DEBUG(errs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); + DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); freeStackSlotAfter(I, Reg-X86::FP0); } } @@ -282,13 +282,13 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { DEBUG( MachineBasicBlock::iterator PrevI(PrevMI); if (I == PrevI) { - errs() << "Just deleted pseudo instruction\n"; + dbgs() << "Just deleted pseudo instruction\n"; } else { MachineBasicBlock::iterator Start = I; // Rewind to first instruction newly inserted. while (Start != BB.begin() && prior(Start) != PrevI) --Start; - errs() << "Inserted instructions:\n\t"; - Start->print(errs(), &MF.getTarget()); + dbgs() << "Inserted instructions:\n\t"; + Start->print(dbgs(), &MF.getTarget()); while (++Start != llvm::next(I)) {} } dumpStack(); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index cb82383..e2a53d1 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -113,37 +113,37 @@ namespace { } void dump() { - errs() << "X86ISelAddressMode " << this << '\n'; - errs() << "Base.Reg "; + dbgs() << "X86ISelAddressMode " << this << '\n'; + dbgs() << "Base.Reg "; if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); else - errs() << "nul"; - errs() << " Base.FrameIndex " << Base.FrameIndex << '\n' + dbgs() << "nul"; + dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n' << " Scale" << Scale << '\n' << "IndexReg "; if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); else - errs() << "nul"; - errs() << " Disp " << Disp << '\n' + dbgs() << "nul"; + dbgs() << " Disp " << Disp << '\n' << "GV "; if (GV) GV->dump(); else - errs() << "nul"; - errs() << " CP "; + dbgs() << "nul"; + dbgs() << " CP "; if (CP) CP->dump(); else - errs() << "nul"; - errs() << '\n' + dbgs() << "nul"; + dbgs() << '\n' << "ES "; if (ES) - errs() << ES; + dbgs() << ES; else - errs() << "nul"; - errs() << " JT" << JT << " Align" << Align << '\n'; + dbgs() << "nul"; + dbgs() << " JT" << JT << " Align" << Align << '\n'; } }; } @@ -190,7 +190,7 @@ namespace { #include "X86GenDAGISel.inc" private: - SDNode *Select(SDValue N); + SDNode *Select(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); @@ -201,19 +201,19 @@ namespace { bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); - bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, + bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectScalarSSELoad(SDValue Op, SDValue Pred, + bool SelectScalarSSELoad(SDNode *Op, SDValue Pred, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, SDValue &InChain, SDValue &OutChain); - bool TryFoldLoad(SDValue P, SDValue N, + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); @@ -310,6 +310,11 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, if (U == Root) switch (U->getOpcode()) { default: break; + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::AND: + case X86ISD::XOR: + case X86ISD::OR: case ISD::ADD: case ISD::ADDC: case ISD::ADDE: @@ -675,12 +680,12 @@ void X86DAGToDAGISel::InstructionSelect() { // Codegen the basic block. #ifndef NDEBUG - DEBUG(errs() << "===== Instruction selection begins:\n"); + DEBUG(dbgs() << "===== Instruction selection begins:\n"); Indent = 0; #endif SelectRoot(*CurDAG); #ifndef NDEBUG - DEBUG(errs() << "===== Instruction selection ends:\n"); + DEBUG(dbgs() << "===== Instruction selection ends:\n"); #endif CurDAG->RemoveDeadNodes(); @@ -850,7 +855,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); DEBUG({ - errs() << "MatchAddress: "; + dbgs() << "MatchAddress: "; AM.dump(); }); // Limit recursion. @@ -1268,7 +1273,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { /// SelectAddr - returns true if it is able pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. -bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, +bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; @@ -1291,7 +1296,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to /// match a load whose top elements are either undef or zeros. The load flavor /// is derived from the type of N, which is either v4f32 or v2f64. -bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, +bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, @@ -1302,7 +1307,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, if (ISD::isNON_EXTLoad(InChain.getNode()) && InChain.getValue(0).hasOneUse() && N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) { + IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) { LoadSDNode *LD = cast<LoadSDNode>(InChain); if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; @@ -1333,7 +1338,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. -bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, +bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { X86ISelAddressMode AM; @@ -1395,10 +1400,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, } /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. -bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, +bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { - assert(Op.getOpcode() == X86ISD::TLSADDR); + assert(Op->getOpcode() == X86ISD::TLSADDR); assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); @@ -1421,13 +1426,13 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, } -bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, +bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { if (ISD::isNON_EXTLoad(N.getNode()) && N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) + IsLegalAndProfitableToFold(N.getNode(), P, P)) return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); return false; } @@ -1454,7 +1459,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue In2L = Node->getOperand(2); SDValue In2H = Node->getOperand(3); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); @@ -1480,7 +1485,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return 0; bool isInc = false, isDec = false, isSub = false, isCN = false; @@ -1678,8 +1683,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) { return true; } -SDNode *X86DAGToDAGISel::Select(SDValue N) { - SDNode *Node = N.getNode(); +SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); @@ -1687,9 +1691,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent, ' ') << "Selecting: "; + dbgs() << std::string(Indent, ' ') << "Selecting: "; Node->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); Indent += 2; #endif @@ -1697,9 +1701,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { if (Node->isMachineOpcode()) { #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "== "; + dbgs() << std::string(Indent-2, ' ') << "== "; Node->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); Indent -= 2; #endif @@ -1767,10 +1771,10 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); // Multiply is commmutative. if (!foldedLoad) { - foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); if (foldedLoad) std::swap(N0, N1); } @@ -1793,21 +1797,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } // Copy the low half of the result, if it is needed. - if (!N.getValue(0).use_empty()) { + if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, InFlag); InFlag = Result.getValue(2); - ReplaceUses(N.getValue(0), Result); + ReplaceUses(SDValue(Node, 0), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } // Copy the high half of the result, if it is needed. - if (!N.getValue(1).use_empty()) { + if (!SDValue(Node, 1).use_empty()) { SDValue Result; if (HiReg == X86::AH && Subtarget->is64Bit()) { // Prevent use of AH in a REX instruction by referencing AX instead. @@ -1826,12 +1830,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { HiReg, NVT, InFlag); InFlag = Result.getValue(2); } - ReplaceUses(N.getValue(1), Result); + ReplaceUses(SDValue(Node, 1), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } @@ -1869,7 +1873,6 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { unsigned LoReg, HiReg, ClrReg; unsigned ClrOpcode, SExtOpcode; - EVT ClrVT = NVT; switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: @@ -1879,7 +1882,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; - ClrOpcode = X86::MOV32r0; ClrReg = X86::EDX; ClrVT = MVT::i32; + ClrOpcode = X86::MOV16r0; ClrReg = X86::DX; SExtOpcode = X86::CWD; break; case MVT::i32: @@ -1889,13 +1892,13 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { break; case MVT::i64: LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; - ClrOpcode = ~0U; // NOT USED. + ClrOpcode = X86::MOV64r0; SExtOpcode = X86::CQO; break; } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); bool signBitIsZero = CurDAG->SignBitIsZero(N0); SDValue InFlag; @@ -1903,7 +1906,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { // Special case for div8, just use a move with zero extension to AX to // clear the upper 8 bits (AH). SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; - if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { + if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; Move = SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16, @@ -1928,24 +1931,8 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. - SDValue ClrNode; - - if (NVT.getSimpleVT() == MVT::i64) { - ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32), - 0); - // We just did a 32-bit clear, insert it into a 64-bit register to - // clear the whole 64-bit reg. - SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64); - SDValue SubRegNo = - CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); - ClrNode = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl, - MVT::i64, Zero, ClrNode, SubRegNo), - 0); - } else { - ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0); - } - + SDValue ClrNode = + SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, ClrNode, InFlag).getValue(1); } @@ -1966,21 +1953,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } // Copy the division (low) result, if it is needed. - if (!N.getValue(0).use_empty()) { + if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, InFlag); InFlag = Result.getValue(2); - ReplaceUses(N.getValue(0), Result); + ReplaceUses(SDValue(Node, 0), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } // Copy the remainder (high) result, if it is needed. - if (!N.getValue(1).use_empty()) { + if (!SDValue(Node, 1).use_empty()) { SDValue Result; if (HiReg == X86::AH && Subtarget->is64Bit()) { // Prevent use of AH in a REX instruction by referencing AX instead. @@ -2000,12 +1987,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { HiReg, NVT, InFlag); InFlag = Result.getValue(2); } - ReplaceUses(N.getValue(1), Result); + ReplaceUses(SDValue(Node, 1), Result); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; + dbgs() << std::string(Indent-2, ' ') << "=> "; Result.getNode()->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); #endif } @@ -2124,16 +2111,16 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { } } - SDNode *ResNode = SelectCode(N); + SDNode *ResNode = SelectCode(Node); #ifndef NDEBUG DEBUG({ - errs() << std::string(Indent-2, ' ') << "=> "; - if (ResNode == NULL || ResNode == N.getNode()) - N.getNode()->dump(CurDAG); + dbgs() << std::string(Indent-2, ' ') << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); else ResNode->dump(CurDAG); - errs() << '\n'; + dbgs() << '\n'; }); Indent -= 2; #endif @@ -2150,7 +2137,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, case 'v': // not offsetable ?? default: return true; case 'm': // memory - if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4)) + if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4)) return true; break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c722fbf..228ec9f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -978,6 +978,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::MEMBARRIER); setTargetDAGCombine(ISD::ZERO_EXTEND); @@ -2077,10 +2078,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, assert(((Callee.getOpcode() == ISD::Register && (cast<RegisterSDNode>(Callee)->getReg() == X86::EAX || - cast<RegisterSDNode>(Callee)->getReg() == X86::R9)) || + cast<RegisterSDNode>(Callee)->getReg() == X86::R11)) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress) && - "Expecting an global address, external symbol, or register"); + "Expecting a global address, external symbol, or scratch register"); return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); @@ -5610,13 +5611,21 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, // because a TEST instruction will be better. bool NonFlagUse = false; for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::BRCOND && - UI->getOpcode() != ISD::SELECT && - UI->getOpcode() != ISD::SETCC) { + UE = Op.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + unsigned UOpNo = UI.getOperandNo(); + if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { + // Look pass truncate. + UOpNo = User->use_begin().getOperandNo(); + User = *User->use_begin(); + } + if (User->getOpcode() != ISD::BRCOND && + User->getOpcode() != ISD::SETCC && + (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { NonFlagUse = true; break; } + } if (!NonFlagUse) break; } @@ -5680,6 +5689,56 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); } +/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node +/// if it's possible. +static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC, + DebugLoc dl, SelectionDAG &DAG) { + SDValue LHS, RHS; + if (Op0.getOperand(1).getOpcode() == ISD::SHL) { + if (ConstantSDNode *Op010C = + dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0))) + if (Op010C->getZExtValue() == 1) { + LHS = Op0.getOperand(0); + RHS = Op0.getOperand(1).getOperand(1); + } + } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { + if (ConstantSDNode *Op000C = + dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0))) + if (Op000C->getZExtValue() == 1) { + LHS = Op0.getOperand(1); + RHS = Op0.getOperand(0).getOperand(1); + } + } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { + ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1)); + SDValue AndLHS = Op0.getOperand(0); + if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { + LHS = AndLHS.getOperand(0); + RHS = AndLHS.getOperand(1); + } + } + + if (LHS.getNode()) { + // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT + // instruction. Since the shift amount is in-range-or-undefined, we know + // that doing a bittest on the i16 value is ok. We extend to i32 because + // the encoding for the i16 version is larger than the i32 version. + if (LHS.getValueType() == MVT::i8) + LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); + + // If the operand types disagree, extend the shift amount to match. Since + // BT ignores high bits (like shifts) we can use anyextend. + if (LHS.getValueType() != RHS.getValueType()) + RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); + + SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); + unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(Cond, MVT::i8), BT); + } + + return SDValue(); +} + SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); @@ -5687,6 +5746,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + // Optimize to BT if possible. // Lower (X & (1 << N)) == 0 to BT(X, N). // Lower ((X >>u N) & 1) != 0 to BT(X, N). // Lower ((X >>s N) & 1) != 0 to BT(X, N). @@ -5695,48 +5755,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { Op1.getOpcode() == ISD::Constant && cast<ConstantSDNode>(Op1)->getZExtValue() == 0 && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - SDValue LHS, RHS; - if (Op0.getOperand(1).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op010C = - dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0))) - if (Op010C->getZExtValue() == 1) { - LHS = Op0.getOperand(0); - RHS = Op0.getOperand(1).getOperand(1); - } - } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op000C = - dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0))) - if (Op000C->getZExtValue() == 1) { - LHS = Op0.getOperand(1); - RHS = Op0.getOperand(0).getOperand(1); - } - } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { - ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1)); - SDValue AndLHS = Op0.getOperand(0); - if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { - LHS = AndLHS.getOperand(0); - RHS = AndLHS.getOperand(1); - } - } - - if (LHS.getNode()) { - // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT - // instruction. Since the shift amount is in-range-or-undefined, we know - // that doing a bittest on the i16 value is ok. We extend to i32 because - // the encoding for the i16 version is larger than the i32 version. - if (LHS.getValueType() == MVT::i8) - LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); - - // If the operand types disagree, extend the shift amount to match. Since - // BT ignores high bits (like shifts) we can use anyextend. - if (LHS.getValueType() != RHS.getValueType()) - RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); - - SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); - unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(Cond, MVT::i8), BT); - } + SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); + if (NewSetCC.getNode()) + return NewSetCC; } bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); @@ -5936,6 +5957,23 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { } if (addTest) { + // Look pass the truncate. + if (Cond.getOpcode() == ISD::TRUNCATE) + Cond = Cond.getOperand(0); + + // We know the result of AND is compared against zero. Try to match + // it to BT. + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); + if (NewSetCC.getNode()) { + CC = NewSetCC.getOperand(0); + Cond = NewSetCC.getOperand(1); + addTest = false; + } + } + } + + if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DAG); } @@ -6093,6 +6131,23 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { } if (addTest) { + // Look pass the truncate. + if (Cond.getOpcode() == ISD::TRUNCATE) + Cond = Cond.getOperand(0); + + // We know the result of AND is compared against zero. Try to match + // it to BT. + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); + if (NewSetCC.getNode()) { + CC = NewSetCC.getOperand(0); + Cond = NewSetCC.getOperand(1); + addTest = false; + } + } + } + + if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DAG); } @@ -7524,8 +7579,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return Ty1 == Type::getInt32Ty(Ty1->getContext()) && - Ty2 == Type::getInt64Ty(Ty1->getContext()) && Subtarget->is64Bit(); + return Ty1->isInteger(64) && Ty2->isInteger(64) && Subtarget->is64Bit(); } bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { @@ -7749,7 +7803,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, for (int i=0; i < 2 + X86AddrNumOperands; ++i) argOpers[i] = &bInstr->getOperand(i+2); - // x86 address has 4 operands: base, index, scale, and displacement + // x86 address has 5 operands: base, index, scale, displacement, and segment. int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] unsigned t1 = F->getRegInfo().createVirtualRegister(RC); @@ -7777,14 +7831,16 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg()) .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); - unsigned tt1 = F->getRegInfo().createVirtualRegister(RC); - unsigned tt2 = F->getRegInfo().createVirtualRegister(RC); + // The subsequent operations should be using the destination registers of + //the PHI instructions. if (invSrc) { - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt1).addReg(t1); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt2).addReg(t2); + t1 = F->getRegInfo().createVirtualRegister(RC); + t2 = F->getRegInfo().createVirtualRegister(RC); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg()); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg()); } else { - tt1 = t1; - tt2 = t2; + t1 = dest1Oper.getReg(); + t2 = dest2Oper.getReg(); } int valArgIndx = lastAddrIndx + 1; @@ -7798,7 +7854,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, else MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5); if (regOpcL != X86::MOV32rr) - MIB.addReg(tt1); + MIB.addReg(t1); (*MIB).addOperand(*argOpers[valArgIndx]); assert(argOpers[valArgIndx + 1]->isReg() == argOpers[valArgIndx]->isReg()); @@ -7809,7 +7865,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, else MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6); if (regOpcH != X86::MOV32rr) - MIB.addReg(tt2); + MIB.addReg(t2); (*MIB).addOperand(*argOpers[valArgIndx + 1]); MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX); @@ -9108,6 +9164,64 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (VT != MVT::i64 || !Subtarget->is64Bit()) + return SDValue(); + + // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) + return SDValue(); + + SDValue ShAmt0 = N0.getOperand(1); + if (ShAmt0.getValueType() != MVT::i8) + return SDValue(); + SDValue ShAmt1 = N1.getOperand(1); + if (ShAmt1.getValueType() != MVT::i8) + return SDValue(); + if (ShAmt0.getOpcode() == ISD::TRUNCATE) + ShAmt0 = ShAmt0.getOperand(0); + if (ShAmt1.getOpcode() == ISD::TRUNCATE) + ShAmt1 = ShAmt1.getOperand(0); + + DebugLoc DL = N->getDebugLoc(); + unsigned Opc = X86ISD::SHLD; + SDValue Op0 = N0.getOperand(0); + SDValue Op1 = N1.getOperand(0); + if (ShAmt0.getOpcode() == ISD::SUB) { + Opc = X86ISD::SHRD; + std::swap(Op0, Op1); + std::swap(ShAmt0, ShAmt1); + } + + if (ShAmt1.getOpcode() == ISD::SUB) { + SDValue Sum = ShAmt1.getOperand(0); + if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) { + if (SumC->getSExtValue() == 64 && + ShAmt1.getOperand(1) == ShAmt0) + return DAG.getNode(Opc, DL, VT, + Op0, Op1, + DAG.getNode(ISD::TRUNCATE, DL, + MVT::i8, ShAmt0)); + } + } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) { + ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0); + if (ShAmt0C && + ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64) + return DAG.getNode(Opc, DL, VT, + N0.getOperand(0), N1.getOperand(0), + DAG.getNode(ISD::TRUNCATE, DL, + MVT::i8, ShAmt0)); + } + + return SDValue(); +} + /// PerformSTORECombine - Do target-specific dag combines on STORE nodes. static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -9370,6 +9484,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); + case ISD::OR: return PerformOrCombine(N, DAG, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); @@ -9423,7 +9538,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { std::string AsmStr = IA->getAsmString(); // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" - std::vector<std::string> AsmPieces; + SmallVector<StringRef, 4> AsmPieces; SplitString(AsmStr, AsmPieces, "\n"); // ; as separator? switch (AsmPieces.size()) { @@ -9445,7 +9560,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { return LowerToBSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 - if (CI->getType() == Type::getInt16Ty(CI->getContext()) && + if (CI->getType()->isInteger(16) && AsmPieces.size() == 3 && AsmPieces[0] == "rorw" && AsmPieces[1] == "$$8," && @@ -9455,12 +9570,12 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { } break; case 3: - if (CI->getType() == Type::getInt64Ty(CI->getContext()) && + if (CI->getType()->isInteger(64) && Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 - std::vector<std::string> Words; + SmallVector<StringRef, 4> Words; SplitString(AsmPieces[0], Words, " \t"); if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { Words.clear(); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 65fbbda..08e1dd1 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1106,13 +1106,13 @@ def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; } // isTwoAddress def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), @@ -1598,17 +1598,21 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), // Alias Instructions //===----------------------------------------------------------------------===// -// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's -// equivalent due to implicit zero-extending, and it sometimes has a smaller -// encoding. +// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a +// smaller encoding, but doing so at isel time interferes with rematerialization +// in the current register allocator. For now, this is rewritten when the +// instruction is lowered to an MCInst. // FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove // when we have a better way to specify isel priority. -let AddedComplexity = 1 in -def : Pat<(i64 0), - (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>; - - -// Materialize i64 constant where top 32-bits are zero. +let Defs = [EFLAGS], + AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), + "", + [(set GR64:$dst, 0)]>; + +// Materialize i64 constant where top 32-bits are zero. This could theoretically +// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however +// that would make it more difficult to rematerialize. let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), "", [(set GR64:$dst, i64immZExt32:$src)]>; @@ -1683,6 +1687,7 @@ def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; +let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), "cmpxchg16b\t$dst", []>, TB; @@ -1962,6 +1967,17 @@ def : Pat<(add GR64:$src1, 0x0000000080000000), def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), (SUB64mi32 addr:$dst, 0xffffffff80000000)>; +// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it +// has an immediate with at least 32 bits of leading zeros, to avoid needing to +// materialize that immediate in a register first. +def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri + (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit), + imm:$imm), + x86_subreg_32bit)>; + // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; @@ -2028,7 +2044,7 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; -def : Pat<(srl_su GR16:$src, (i8 8)), +def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), @@ -2098,24 +2114,7 @@ def : Pat<(sra GR64:$src1, (and CL:$amt, 63)), def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), (SAR64mCL addr:$dst)>; -// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c) -def : Pat<(or (srl GR64:$src1, CL:$amt), - (shl GR64:$src2, (sub 64, CL:$amt))), - (SHRD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt), - (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), - (SHRD64mrCL addr:$dst, GR64:$src2)>; - -def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))), - (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - (SHRD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))), - (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - addr:$dst), - (SHRD64mrCL addr:$dst, GR64:$src2)>; - +// Double shift patterns def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; @@ -2123,24 +2122,6 @@ def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), addr:$dst), (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; -// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) -def : Pat<(or (shl GR64:$src1, CL:$amt), - (srl GR64:$src2, (sub 64, CL:$amt))), - (SHLD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt), - (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst), - (SHLD64mrCL addr:$dst, GR64:$src2)>; - -def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))), - (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - (SHLD64rrCL GR64:$src1, GR64:$src2)>; - -def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))), - (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))), - addr:$dst), - (SHLD64mrCL addr:$dst, GR64:$src2)>; - def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; @@ -2148,6 +2129,19 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), addr:$dst), (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. +let AddedComplexity = 5 in { // Try this before the selecting to OR +def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; +def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (ADD64rr GR64:$src1, GR64:$src2)>; +} // AddedComplexity + // X86 specific add which produces a flag. def : Pat<(addc GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e555cd1..7b39fb3 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" @@ -711,6 +712,62 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, } } +bool +X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { + switch (MI.getOpcode()) { + default: break; + case X86::MOVSX16rr8: + case X86::MOVZX16rr8: + case X86::MOVSX32rr8: + case X86::MOVZX32rr8: + case X86::MOVSX64rr8: + case X86::MOVZX64rr8: + if (!TM.getSubtarget<X86Subtarget>().is64Bit()) + // It's not always legal to reference the low 8-bit of the larger + // register in 32-bit mode. + return false; + case X86::MOVSX32rr16: + case X86::MOVZX32rr16: + case X86::MOVSX64rr16: + case X86::MOVZX64rr16: + case X86::MOVSX64rr32: + case X86::MOVZX64rr32: { + if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) + // Be conservative. + return false; + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + switch (MI.getOpcode()) { + default: + llvm_unreachable(0); + break; + case X86::MOVSX16rr8: + case X86::MOVZX16rr8: + case X86::MOVSX32rr8: + case X86::MOVZX32rr8: + case X86::MOVSX64rr8: + case X86::MOVZX64rr8: + SubIdx = 1; + break; + case X86::MOVSX32rr16: + case X86::MOVZX32rr16: + case X86::MOVSX64rr16: + case X86::MOVZX64rr16: + SubIdx = 3; + break; + case X86::MOVSX64rr32: + case X86::MOVZX64rr32: + SubIdx = 4; + break; + } + return true; + } + } + return false; +} + /// isFrameOperand - Return true and the FrameIndex if the specified /// operand and follow operands form a reference to the stack frame. bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, @@ -1018,12 +1075,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, switch (Opc) { default: break; case X86::MOV8r0: - case X86::MOV32r0: { + case X86::MOV16r0: + case X86::MOV32r0: + case X86::MOV64r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { switch (Opc) { default: break; case X86::MOV8r0: Opc = X86::MOV8ri; break; + case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; + case X86::MOV64r0: Opc = X86::MOV64ri; break; } Clone = false; } @@ -2290,8 +2351,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable2Addr; isTwoAddrFold = true; } else if (i == 0) { // If operand 0 - if (MI->getOpcode() == X86::MOV32r0) + if (MI->getOpcode() == X86::MOV64r0) + NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); + else if (MI->getOpcode() == X86::MOV32r0) NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); + else if (MI->getOpcode() == X86::MOV16r0) + NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); else if (MI->getOpcode() == X86::MOV8r0) NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); if (NewMI) @@ -2354,7 +2419,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // No fusion if (PrintFailedFusing) - errs() << "We failed to fuse operand " << i << " in " << *MI; + dbgs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; } @@ -2559,7 +2624,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, } else if (OpNum == 0) { // If operand 0 switch (Opc) { case X86::MOV8r0: + case X86::MOV16r0: case X86::MOV32r0: + case X86::MOV64r0: return true; default: break; } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index b83441d..0ab85f4 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -448,6 +448,16 @@ public: unsigned &SrcReg, unsigned &DstReg, unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + /// isCoalescableExtInstr - Return true if the instruction is a "coalescable" + /// extension instruction. That is, it's like a copy where it's legal for the + /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns + /// true, then it's expected the pre-extension value is available as a subreg + /// of the result register. This also returns the sub-register index in + /// SubIdx. + virtual bool isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination /// stack locations as well. This uses a heuristic so it isn't diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4d922a5..396cb53 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -160,15 +160,21 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, [SDNPHasChain, SDNPOptInFlag]>; -def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags>; +def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, + [SDNPCommutative]>; def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; -def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>; -def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>; +def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags, + [SDNPCommutative]>; def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; -def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags>; -def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags>; -def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags>; +def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, + [SDNPCommutative]>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -487,6 +493,21 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ return N->hasOneUse(); }]>; +// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + else { + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero0, KnownOne0; + CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + APInt KnownZero1, KnownOne1; + CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + return (~KnownZero0 & ~KnownZero1) == 0; + } +}]>; + // 'shld' and 'shrd' instruction patterns. Note that even though these have // the srl and shl in their patterns, the C++ code must still check for them, // because predicates are tested before children nodes are explored. @@ -3700,18 +3721,21 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "xor{b}\t$dst, $dst", [(set GR8:$dst, 0)]>; + +// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller +// encoding and avoids a partial-register update sometimes, but doing so +// at isel time interferes with rematerialization in the current register +// allocator. For now, this is rewritten when the instruction is lowered +// to an MCInst. +def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), + "", + [(set GR16:$dst, 0)]>, OpSize; def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "xor{l}\t$dst, $dst", [(set GR32:$dst, 0)]>; } -// Use xorl instead of xorw since we don't care about the high 16 bits, -// it's smaller, and it avoids a partial-register update. -let AddedComplexity = 1 in -def : Pat<(i16 0), - (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>; - //===----------------------------------------------------------------------===// // Thread Local Storage Instructions // @@ -3792,7 +3816,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { -def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr), +def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), "lock\n\t" "cmpxchg8b\t$ptr", [(X86cas8 addr:$ptr)]>, TB, LOCK; @@ -3858,6 +3882,7 @@ def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), "cmpxchg8b\t$dst", []>, TB; @@ -4466,7 +4491,7 @@ def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), x86_subreg_8bit_hi)>, Requires<[In32BitMode]>; -def : Pat<(srl_su GR16:$src, (i8 8)), +def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), @@ -4640,6 +4665,28 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. +let AddedComplexity = 5 in { // Try this before the selecting to OR +def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (ADD32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (ADD16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (ADD32rr GR32:$src1, GR32:$src2)>; +} // AddedComplexity + //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b26e508..94b9b55 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -131,11 +131,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr), // Like 'load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to -// be naturally aligned on some targets but not on others. -// FIXME: Actually implement support for targets that don't require the -// alignment. This probably wants a subtarget predicate. +// be naturally aligned on some targets but not on others. If the subtarget +// allows unaligned accesses, match any load, though this may require +// setting a feature bit in the processor (on startup, for example). +// Opteron 10h and later implement such a feature. def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast<LoadSDNode>(N)->getAlignment() >= 16; + return Subtarget->hasVectorUAMem() + || cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index c69cc83..f363903 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -348,7 +348,7 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { #endif #if 0 - DEBUG(errs() << "In callback! Addr=" << (void*)RetAddr + DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr << " ESP=" << (void*)StackPtr << ": Resolving call to function: " << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n"); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index d96aafd..9bd96af 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -591,6 +591,15 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); unsigned BasePtr; + // DEBUG_VALUE has a special representation, and is only robust enough to + // represent SP(or BP) +- offset addressing modes. We rewrite the + // FrameIndex to be a constant; implicitly positive constants are relative + // to ESP and negative ones to EBP. + if (MI.getOpcode()==TargetInstrInfo::DEBUG_VALUE) { + MI.getOperand(i).ChangeToImmediate(getFrameIndexOffset(MF, FrameIndex)); + return 0; + } + if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); else diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 75cdbad..2039be7 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -286,6 +286,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , HasFMA3(false) , HasFMA4(false) , IsBTMemSlow(false) + , HasVectorUAMem(false) , DarwinVers(0) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? @@ -317,7 +318,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, if (Is64Bit) HasX86_64 = true; - DEBUG(errs() << "Subtarget features: SSELevel " << X86SSELevel + DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); assert((!Is64Bit || HasX86_64) && diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index ef6dbaf..618dd10 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -78,6 +78,10 @@ protected: /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; + /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands. + /// This may require setting a feature bit in the processor. + bool HasVectorUAMem; + /// DarwinVers - Nonzero if this is a darwin platform: the numeric /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. unsigned char DarwinVers; // Is any darwin-x86 platform. @@ -142,6 +146,7 @@ public: bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } bool isBTMemSlow() const { return IsBTMemSlow; } + bool hasVectorUAMem() const { return HasVectorUAMem; } bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetELF() const { return TargetType == isELF; } @@ -169,7 +174,7 @@ public: p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"; else if (isTargetDarwin()) p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; - else if (isTargetCygMing() || isTargetWindows()) + else if (isTargetMingw() || isTargetWindows()) p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32"; else p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index da2fb04..383fd91 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -49,7 +49,7 @@ namespace { Lowering(*TM.getTargetLowering()), Subtarget(*TM.getSubtargetImpl()) { } - SDNode *Select(SDValue Op); + SDNode *Select(SDNode *N); /// getI32Imm - Return a target constant with the specified value, of type /// i32. @@ -58,11 +58,11 @@ namespace { } // Complex Pattern Selectors. - bool SelectADDRspii(SDValue Op, SDValue Addr, SDValue &Base, + bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRdpii(SDValue Op, SDValue Addr, SDValue &Base, + bool SelectADDRdpii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRcpii(SDValue Op, SDValue Addr, SDValue &Base, + bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); virtual void InstructionSelect(); @@ -83,7 +83,7 @@ FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) { return new XCoreDAGToDAGISel(TM); } -bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Op, SDValue Addr, +bool XCoreDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset) { FrameIndexSDNode *FIN = 0; if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) { @@ -105,7 +105,7 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Op, SDValue Addr, return false; } -bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Op, SDValue Addr, +bool XCoreDAGToDAGISel::SelectADDRdpii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset) { if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) { Base = Addr.getOperand(0); @@ -126,7 +126,7 @@ bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Op, SDValue Addr, return false; } -bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Op, SDValue Addr, +bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset) { if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) { Base = Addr.getOperand(0); @@ -156,8 +156,7 @@ void XCoreDAGToDAGISel::InstructionSelect() { CurDAG->RemoveDeadNodes(); } -SDNode *XCoreDAGToDAGISel::Select(SDValue Op) { - SDNode *N = Op.getNode(); +SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); EVT NVT = N->getValueType(0); if (NVT == MVT::i32) { @@ -185,7 +184,7 @@ SDNode *XCoreDAGToDAGISel::Select(SDValue Op) { // FIXME fold addition into the macc instruction SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32, CurDAG->getTargetConstant(0, MVT::i32)), 0); - SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) }; + SDValue Ops[] = { Zero, Zero, N->getOperand(0), N->getOperand(1) }; SDNode *ResNode = CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32, Ops, 4); ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1)); @@ -196,7 +195,7 @@ SDNode *XCoreDAGToDAGISel::Select(SDValue Op) { // FIXME fold addition into the macc / lmul instruction SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32, CurDAG->getTargetConstant(0, MVT::i32)), 0); - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Zero, Zero }; SDNode *ResNode = CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32, Ops, 4); @@ -205,19 +204,19 @@ SDNode *XCoreDAGToDAGISel::Select(SDValue Op) { return NULL; } case XCoreISD::LADD: { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), - Op.getOperand(2) }; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2) }; return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, Ops, 3); } case XCoreISD::LSUB: { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), - Op.getOperand(2) }; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2) }; return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, Ops, 3); } // Other cases are autogenerated. } } - return SelectCode(Op); + return SelectCode(N); } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index dd5a6d8..d8190a4 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -147,7 +147,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); if (const StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { - DEBUG(errs() << "argpromotion disable promoting argument '" + DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); } else { @@ -409,7 +409,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // to do. if (ToPromote.find(Operands) == ToPromote.end()) { if (maxElements > 0 && ToPromote.size() == maxElements) { - DEBUG(errs() << "argpromotion not promoting argument '" + DEBUG(dbgs() << "argpromotion not promoting argument '" << Arg->getName() << "' because it would require adding more " << "than " << maxElements << " arguments to the function.\n"); // We limit aggregate promotion to only promoting up to a fixed number @@ -593,7 +593,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, NF->copyAttributesFrom(F); - DEBUG(errs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" + DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for @@ -808,7 +808,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, LI->replaceAllUsesWith(I2); AA.replaceWithNewValue(LI, I2); LI->eraseFromParent(); - DEBUG(errs() << "*** Promoted load of argument '" << I->getName() + DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back()); @@ -835,7 +835,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, NewName += ".val"; TheArg->setName(NewName); - DEBUG(errs() << "*** Promoted agg argument '" << TheArg->getName() + DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index a3db836..1749b1e 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -425,7 +425,7 @@ void DAE::SurveyFunction(Function &F) { return; } - DEBUG(errs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n"); + DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n"); // Keep track of the number of live retvals, so we can skip checks once all // of them turn out to be live. unsigned NumLiveRetVals = 0; @@ -488,7 +488,7 @@ void DAE::SurveyFunction(Function &F) { for (unsigned i = 0; i != RetCount; ++i) MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); - DEBUG(errs() << "DAE - Inspecting args for fn: " << F.getName() << "\n"); + DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n"); // Now, check all of our arguments. unsigned i = 0; @@ -530,7 +530,7 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L, /// mark any values that are used as this function's parameters or by its return /// values (according to Uses) live as well. void DAE::MarkLive(const Function &F) { - DEBUG(errs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); + DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); // Mark the function as live. LiveFunctions.insert(&F); // Mark all arguments as live. @@ -551,7 +551,7 @@ void DAE::MarkLive(const RetOrArg &RA) { if (!LiveValues.insert(RA).second) return; // We were already marked Live. - DEBUG(errs() << "DAE - Marking " << RA.getDescription() << " live\n"); + DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n"); PropagateLiveness(RA); } @@ -616,7 +616,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { NewRetIdxs[i] = RetTypes.size() - 1; } else { ++NumRetValsEliminated; - DEBUG(errs() << "DAE - Removing return value " << i << " from " + DEBUG(dbgs() << "DAE - Removing return value " << i << " from " << F->getName() << "\n"); } } @@ -626,7 +626,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { RetTypes.push_back(RetTy); NewRetIdxs[0] = 0; } else { - DEBUG(errs() << "DAE - Removing return value from " << F->getName() + DEBUG(dbgs() << "DAE - Removing return value from " << F->getName() << "\n"); ++NumRetValsEliminated; } @@ -681,7 +681,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs)); } else { ++NumArgumentsEliminated; - DEBUG(errs() << "DAE - Removing argument " << i << " (" << I->getName() + DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName() << ") from " << F->getName() << "\n"); } } @@ -915,7 +915,7 @@ bool DAE::runOnModule(Module &M) { // removed. We can do this if they never call va_start. This loop cannot be // fused with the next loop, because deleting a function invalidates // information computed while surveying other functions. - DEBUG(errs() << "DAE - Deleting dead varargs\n"); + DEBUG(dbgs() << "DAE - Deleting dead varargs\n"); for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { Function &F = *I++; if (F.getFunctionType()->isVarArg()) @@ -926,7 +926,7 @@ bool DAE::runOnModule(Module &M) { // We assume all arguments are dead unless proven otherwise (allowing us to // determine that dead arguments passed into recursive functions are dead). // - DEBUG(errs() << "DAE - Determining liveness\n"); + DEBUG(dbgs() << "DAE - Determining liveness\n"); for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) SurveyFunction(*I); diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index a16d335..64a6d78 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -79,16 +79,47 @@ Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } /// memory that is local to the function. Global constants are considered /// local to all functions. bool FunctionAttrs::PointsToLocalMemory(Value *V) { - V = V->getUnderlyingObject(); - // An alloca instruction defines local memory. - if (isa<AllocaInst>(V)) - return true; - // A global constant counts as local memory for our purposes. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - return GV->isConstant(); - // Could look through phi nodes and selects here, but it doesn't seem - // to be useful in practice. - return false; + SmallVector<Value*, 16> Worklist; + unsigned MaxLookup = 8; + + Worklist.push_back(V); + + do { + V = Worklist.pop_back_val()->getUnderlyingObject(); + + // An alloca instruction defines local memory. + if (isa<AllocaInst>(V)) + continue; + + // A global constant counts as local memory for our purposes. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + if (!GV->isConstant()) + return false; + continue; + } + + // If both select values point to local memory, then so does the select. + if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + // If all values incoming to a phi node point to local memory, then so does + // the phi. + if (PHINode *PN = dyn_cast<PHINode>(V)) { + // Don't bother inspecting phi nodes with many operands. + if (PN->getNumIncomingValues() > MaxLookup) + return false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } + + return false; + } while (!Worklist.empty() && --MaxLookup); + + return Worklist.empty(); } /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. @@ -136,6 +167,21 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { // Ignore calls to functions in the same SCC. if (SCCNodes.count(CS.getCalledFunction())) continue; + // Ignore intrinsics that only access local memory. + if (unsigned id = CS.getCalledFunction()->getIntrinsicID()) + if (AliasAnalysis::getModRefBehavior(id) == + AliasAnalysis::AccessesArguments) { + // Check that all pointer arguments point to local memory. + for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI) { + Value *Arg = *CI; + if (isa<PointerType>(Arg->getType()) && !PointsToLocalMemory(Arg)) + // Writes memory. Just give up. + return false; + } + // Only reads and writes local memory. + continue; + } } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { // Ignore loads from local memory. if (PointsToLocalMemory(LI->getPointerOperand())) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 1793bbf..ee260e9 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -544,7 +544,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewGlobals.empty()) return 0; - DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV); + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -771,14 +771,14 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { } if (Changed) { - DEBUG(errs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); + DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); ++NumGlobUses; } // If we nuked all of the loads, then none of the stores are needed either, // nor is the global. if (AllNonStoreUsesGone) { - DEBUG(errs() << " *** GLOBAL NOW DEAD!\n"); + DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); CleanupConstantGlobalUsers(GV, 0); if (GV->use_empty()) { GV->eraseFromParent(); @@ -815,7 +815,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, const Type *AllocTy, Value* NElems, TargetData* TD) { - DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); + DEBUG(dbgs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); const Type *IntPtrTy = TD->getIntPtrType(GV->getContext()); @@ -1268,7 +1268,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Value* NElems, TargetData *TD) { - DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); + DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); const Type* MAT = getMallocAllocatedType(CI); const StructType *STy = cast<StructType>(MAT); @@ -1600,7 +1600,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) return false; - DEBUG(errs() << " *** SHRINKING TO BOOL: " << *GV); + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); // Create the new global, initializing it to false. GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), @@ -1681,7 +1681,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GV->removeDeadConstantUsers(); if (GV->use_empty()) { - DEBUG(errs() << "GLOBAL DEAD: " << *GV); + DEBUG(dbgs() << "GLOBAL DEAD: " << *GV); GV->eraseFromParent(); ++NumDeleted; return true; @@ -1689,26 +1689,26 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (!AnalyzeGlobal(GV, GS, PHIUsers)) { #if 0 - DEBUG(errs() << "Global: " << *GV); - DEBUG(errs() << " isLoaded = " << GS.isLoaded << "\n"); - DEBUG(errs() << " StoredType = "); + DEBUG(dbgs() << "Global: " << *GV); + DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n"); + DEBUG(dbgs() << " StoredType = "); switch (GS.StoredType) { - case GlobalStatus::NotStored: DEBUG(errs() << "NEVER STORED\n"); break; - case GlobalStatus::isInitializerStored: DEBUG(errs() << "INIT STORED\n"); + case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break; + case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n"); break; - case GlobalStatus::isStoredOnce: DEBUG(errs() << "STORED ONCE\n"); break; - case GlobalStatus::isStored: DEBUG(errs() << "stored\n"); break; + case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break; + case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break; } if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) - DEBUG(errs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); + DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) - DEBUG(errs() << " AccessingFunction = " << GS.AccessingFunction->getName() + DEBUG(dbgs() << " AccessingFunction = " << GS.AccessingFunction->getName() << "\n"); - DEBUG(errs() << " HasMultipleAccessingFunctions = " + DEBUG(dbgs() << " HasMultipleAccessingFunctions = " << GS.HasMultipleAccessingFunctions << "\n"); - DEBUG(errs() << " HasNonInstructionUser = " + DEBUG(dbgs() << " HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n"); - DEBUG(errs() << "\n"); + DEBUG(dbgs() << "\n"); #endif // If this is a first class global and has only one accessing function @@ -1726,7 +1726,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GS.AccessingFunction->getName() == "main" && GS.AccessingFunction->hasExternalLinkage() && GV->getType()->getAddressSpace() == 0) { - DEBUG(errs() << "LOCALIZING GLOBAL: " << *GV); + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); const Type* ElemTy = GV->getType()->getElementType(); // FIXME: Pass Global's alignment when globals have alignment @@ -1743,7 +1743,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.isLoaded) { - DEBUG(errs() << "GLOBAL NEVER LOADED: " << *GV); + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); // Delete any stores we can find to the global. We may not be able to // make it completely dead though. @@ -1758,7 +1758,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return Changed; } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DEBUG(errs() << "MARKING CONSTANT: " << *GV); + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); GV->setConstant(true); // Clean up any obviously simplifiable users now. @@ -1766,7 +1766,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // If the global is dead now, just nuke it. if (GV->use_empty()) { - DEBUG(errs() << " *** Marking constant allowed us to simplify " + DEBUG(dbgs() << " *** Marking constant allowed us to simplify " << "all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; @@ -1794,7 +1794,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, CleanupConstantGlobalUsers(GV, GV->getInitializer()); if (GV->use_empty()) { - DEBUG(errs() << " *** Substituting initializer allowed us to " + DEBUG(dbgs() << " *** Substituting initializer allowed us to " << "simplify all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; @@ -1925,11 +1925,11 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { if (!ATy) return 0; const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); if (!STy || STy->getNumElements() != 2 || - STy->getElementType(0) != Type::getInt32Ty(M.getContext())) return 0; + !STy->getElementType(0)->isInteger(32)) return 0; const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); if (!PFTy) return 0; const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); - if (!FTy || FTy->getReturnType() != Type::getVoidTy(M.getContext()) || + if (!FTy || !FTy->getReturnType()->isVoidTy() || FTy->isVarArg() || FTy->getNumParams() != 0) return 0; @@ -2091,8 +2091,8 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, return Val; } + std::vector<Constant*> Elts; if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { - std::vector<Constant*> Elts; // Break up the constant into its elements. if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { @@ -2120,28 +2120,38 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, STy->isPacked()); } else { ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); - const ArrayType *ATy = cast<ArrayType>(Init->getType()); + const SequentialType *InitTy = cast<SequentialType>(Init->getType()); + uint64_t NumElts; + if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) + NumElts = ATy->getNumElements(); + else + NumElts = cast<VectorType>(InitTy)->getNumElements(); + + // Break up the array into elements. - std::vector<Constant*> Elts; if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) Elts.push_back(cast<Constant>(*i)); + } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) { + for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) + Elts.push_back(cast<Constant>(*i)); } else if (isa<ConstantAggregateZero>(Init)) { - Constant *Elt = Constant::getNullValue(ATy->getElementType()); - Elts.assign(ATy->getNumElements(), Elt); - } else if (isa<UndefValue>(Init)) { - Constant *Elt = UndefValue::get(ATy->getElementType()); - Elts.assign(ATy->getNumElements(), Elt); + Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); } else { - llvm_unreachable("This code is out of sync with " + assert(isa<UndefValue>(Init) && "This code is out of sync with " " ConstantFoldLoadThroughGEPConstantExpr"); + Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); } - assert(CI->getZExtValue() < ATy->getNumElements()); + assert(CI->getZExtValue() < NumElts); Elts[CI->getZExtValue()] = EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - return ConstantArray::get(ATy, Elts); + + if (isa<ArrayType>(Init->getType())) + return ConstantArray::get(cast<ArrayType>(InitTy), Elts); + else + return ConstantVector::get(&Elts[0], Elts.size()); } } @@ -2153,13 +2163,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) { GV->setInitializer(Val); return; } - + ConstantExpr *CE = cast<ConstantExpr>(Addr); GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); - - Constant *Init = GV->getInitializer(); - Init = EvaluateStoreInto(Init, Val, CE, 2); - GV->setInitializer(Init); + GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); } /// ComputeLoadResult - Return the value that would be computed by a load from @@ -2402,7 +2409,7 @@ static bool EvaluateStaticConstructor(Function *F) { MutatedMemory, AllocaTmps); if (EvalSuccess) { // We succeeded at evaluation: commit the result. - DEBUG(errs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" + DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" << F->getName() << "' to " << MutatedMemory.size() << " stores.\n"); for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(), diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 6918fe8..5725db1 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -147,7 +147,7 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! - DEBUG(errs() << " ***MERGED ALLOCA: " << *AI); + DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI); AI->replaceAllUsesWith(AvailableAlloca); AI->eraseFromParent(); @@ -178,13 +178,13 @@ bool Inliner::shouldInline(CallSite CS) { InlineCost IC = getInlineCost(CS); if (IC.isAlways()) { - DEBUG(errs() << " Inlining: cost=always" + DEBUG(dbgs() << " Inlining: cost=always" << ", Call: " << *CS.getInstruction() << "\n"); return true; } if (IC.isNever()) { - DEBUG(errs() << " NOT Inlining: cost=never" + DEBUG(dbgs() << " NOT Inlining: cost=never" << ", Call: " << *CS.getInstruction() << "\n"); return false; } @@ -200,7 +200,7 @@ bool Inliner::shouldInline(CallSite CS) { float FudgeFactor = getInlineFudgeFactor(CS); if (Cost >= (int)(CurrentThreshold * FudgeFactor)) { - DEBUG(errs() << " NOT Inlining: cost=" << Cost + DEBUG(dbgs() << " NOT Inlining: cost=" << Cost << ", Call: " << *CS.getInstruction() << "\n"); return false; } @@ -263,14 +263,14 @@ bool Inliner::shouldInline(CallSite CS) { if (outerCallsFound && someOuterCallWouldNotBeInlined && TotalSecondaryCost < Cost) { - DEBUG(errs() << " NOT Inlining: " << *CS.getInstruction() << + DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << Cost << ", outer Cost = " << TotalSecondaryCost << '\n'); return false; } } - DEBUG(errs() << " Inlining: cost=" << Cost + DEBUG(dbgs() << " Inlining: cost=" << Cost << ", Call: " << *CS.getInstruction() << '\n'); return true; } @@ -280,11 +280,11 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { const TargetData *TD = getAnalysisIfAvailable<TargetData>(); SmallPtrSet<Function*, 8> SCCFunctions; - DEBUG(errs() << "Inliner visiting SCC:"); + DEBUG(dbgs() << "Inliner visiting SCC:"); for (unsigned i = 0, e = SCC.size(); i != e; ++i) { Function *F = SCC[i]->getFunction(); if (F) SCCFunctions.insert(F); - DEBUG(errs() << " " << (F ? F->getName() : "INDIRECTNODE")); + DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); } // Scan through and identify all call sites ahead of time so that we only @@ -314,7 +314,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { } } - DEBUG(errs() << ": " << CallSites.size() << " call sites.\n"); + DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); // Now that we have all of the call sites, move the ones to functions in the // current SCC to the end of the list. @@ -346,7 +346,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. if (isInstructionTriviallyDead(CS.getInstruction())) { - DEBUG(errs() << " -> Deleting dead call: " + DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() << "\n"); // Update the call graph by deleting the edge from Callee to Caller. CG[Caller]->removeCallEdgeFor(CS); @@ -377,7 +377,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { // callgraph references to the node, we cannot delete it yet, this // could invalidate the CGSCC iterator. CG[Callee]->getNumReferences() == 0) { - DEBUG(errs() << " -> Deleting dead function: " + DEBUG(dbgs() << " -> Deleting dead function: " << Callee->getName() << "\n"); CallGraphNode *CalleeNode = CG[Callee]; diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 20ae0d5..3d31932 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -131,7 +131,7 @@ bool InternalizePass::runOnModule(Module &M) { if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]); Changed = true; ++NumFunctions; - DEBUG(errs() << "Internalizing func " << I->getName() << "\n"); + DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n"); } // Never internalize the llvm.used symbol. It is used to implement @@ -160,7 +160,7 @@ bool InternalizePass::runOnModule(Module &M) { I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumGlobals; - DEBUG(errs() << "Internalized gvar " << I->getName() << "\n"); + DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n"); } // Mark all aliases that are not in the api as internal as well. @@ -171,7 +171,7 @@ bool InternalizePass::runOnModule(Module &M) { I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumAliases; - DEBUG(errs() << "Internalized alias " << I->getName() << "\n"); + DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n"); } return Changed; diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index b2bdabc..fa8845b 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -498,7 +498,7 @@ static void ThunkGToF(Function *F, Function *G) { CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); - if (NewG->getReturnType() == Type::getVoidTy(F->getContext())) { + if (NewG->getReturnType()->isVoidTy()) { ReturnInst::Create(F->getContext(), BB); } else if (CI->getType() != NewG->getReturnType()) { Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB); @@ -633,17 +633,17 @@ bool MergeFunctions::runOnModule(Module &M) { bool LocalChanged; do { LocalChanged = false; - DEBUG(errs() << "size: " << FnMap.size() << "\n"); + DEBUG(dbgs() << "size: " << FnMap.size() << "\n"); for (std::map<unsigned long, std::vector<Function *> >::iterator I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { std::vector<Function *> &FnVec = I->second; - DEBUG(errs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); + DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); for (int i = 0, e = FnVec.size(); i != e; ++i) { for (int j = i + 1; j != e; ++j) { bool isEqual = equals(FnVec[i], FnVec[j]); - DEBUG(errs() << " " << FnVec[i]->getName() + DEBUG(dbgs() << " " << FnVec[i]->getName() << (isEqual ? " == " : " != ") << FnVec[j]->getName() << "\n"); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index b955b97..f40902f 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -145,7 +145,7 @@ bool PartialInliner::runOnModule(Module& M) { worklist.reserve(M.size()); for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) if (!FI->use_empty() && !FI->isDeclaration()) - worklist.push_back(&*FI); + worklist.push_back(&*FI); bool changed = false; while (!worklist.empty()) { diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp index 67fc934..dda32d0 100644 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -93,11 +93,10 @@ CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn()) return 0; - DEBUG(errs() << "SretPromotion: Looking at sret function " + DEBUG(dbgs() << "SretPromotion: Looking at sret function " << F->getName() << "\n"); - assert(F->getReturnType() == Type::getVoidTy(F->getContext()) && - "Invalid function return type"); + assert(F->getReturnType()->isVoidTy() && "Invalid function return type"); Function::arg_iterator AI = F->arg_begin(); const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType()); assert(FArgType && "Invalid sret parameter type"); @@ -107,12 +106,12 @@ CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { // Check if it is ok to perform this promotion. if (isSafeToUpdateAllCallers(F) == false) { - DEBUG(errs() << "SretPromotion: Not all callers can be updated\n"); + DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n"); NumRejectedSRETUses++; return 0; } - DEBUG(errs() << "SretPromotion: sret argument will be promoted\n"); + DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n"); NumSRET++; // [1] Replace use of sret parameter AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", @@ -358,7 +357,7 @@ bool SRETPromotion::nestedStructType(const StructType *STy) { unsigned Num = STy->getNumElements(); for (unsigned i = 0; i < Num; i++) { const Type *Ty = STy->getElementType(i); - if (!Ty->isSingleValueType() && Ty != Type::getVoidTy(STy->getContext())) + if (!Ty->isSingleValueType() && !Ty->isVoidTy()) return true; } return false; diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt new file mode 100644 index 0000000..5b1ff3e --- /dev/null +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -0,0 +1,17 @@ +add_llvm_library(LLVMInstCombine + InstructionCombining.cpp + InstCombineAddSub.cpp + InstCombineAndOrXor.cpp + InstCombineCalls.cpp + InstCombineCasts.cpp + InstCombineCompares.cpp + InstCombineLoadStoreAlloca.cpp + InstCombineMulDivRem.cpp + InstCombinePHI.cpp + InstCombineSelect.cpp + InstCombineShifts.cpp + InstCombineSimplifyDemanded.cpp + InstCombineVectorOps.cpp + ) + +target_link_libraries (LLVMInstCombine LLVMTransformUtils) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h new file mode 100644 index 0000000..5367900 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -0,0 +1,349 @@ +//===- InstCombine.h - Main InstCombine pass definition -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTCOMBINE_INSTCOMBINE_H +#define INSTCOMBINE_INSTCOMBINE_H + +#include "InstCombineWorklist.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/TargetFolder.h" + +namespace llvm { + class CallSite; + class TargetData; + class DbgDeclareInst; + class MemIntrinsic; + class MemSetInst; + +/// SelectPatternFlavor - We can match a variety of different patterns for +/// select operations. +enum SelectPatternFlavor { + SPF_UNKNOWN = 0, + SPF_SMIN, SPF_UMIN, + SPF_SMAX, SPF_UMAX + //SPF_ABS - TODO. +}; + +/// getComplexity: Assign a complexity or rank value to LLVM Values... +/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst +static inline unsigned getComplexity(Value *V) { + if (isa<Instruction>(V)) { + if (BinaryOperator::isNeg(V) || + BinaryOperator::isFNeg(V) || + BinaryOperator::isNot(V)) + return 3; + return 4; + } + if (isa<Argument>(V)) return 3; + return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2; +} + + +/// InstCombineIRInserter - This is an IRBuilder insertion helper that works +/// just like the normal insertion helper, but also adds any new instructions +/// to the instcombine worklist. +class VISIBILITY_HIDDEN InstCombineIRInserter + : public IRBuilderDefaultInserter<true> { + InstCombineWorklist &Worklist; +public: + InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} + + void InsertHelper(Instruction *I, const Twine &Name, + BasicBlock *BB, BasicBlock::iterator InsertPt) const { + IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); + Worklist.Add(I); + } +}; + +/// InstCombiner - The -instcombine pass. +class VISIBILITY_HIDDEN InstCombiner + : public FunctionPass, + public InstVisitor<InstCombiner, Instruction*> { + TargetData *TD; + bool MustPreserveLCSSA; + bool MadeIRChange; +public: + /// Worklist - All of the instructions that need to be simplified. + InstCombineWorklist Worklist; + + /// Builder - This is an IRBuilder that automatically inserts new + /// instructions into the worklist when they are created. + typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy; + BuilderTy *Builder; + + static char ID; // Pass identification, replacement for typeid + InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {} + +public: + virtual bool runOnFunction(Function &F); + + bool DoOneIteration(Function &F, unsigned ItNum); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + TargetData *getTargetData() const { return TD; } + + // Visitation implementation - Implement instruction combining for different + // instruction types. The semantics are as follows: + // Return Value: + // null - No change was made + // I - Change was made, I is still valid, I may be dead though + // otherwise - Change was made, replace I with returned instruction + // + Instruction *visitAdd(BinaryOperator &I); + Instruction *visitFAdd(BinaryOperator &I); + Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty); + Instruction *visitSub(BinaryOperator &I); + Instruction *visitFSub(BinaryOperator &I); + Instruction *visitMul(BinaryOperator &I); + Instruction *visitFMul(BinaryOperator &I); + Instruction *visitURem(BinaryOperator &I); + Instruction *visitSRem(BinaryOperator &I); + Instruction *visitFRem(BinaryOperator &I); + bool SimplifyDivRemOfSelect(BinaryOperator &I); + Instruction *commonRemTransforms(BinaryOperator &I); + Instruction *commonIRemTransforms(BinaryOperator &I); + Instruction *commonDivTransforms(BinaryOperator &I); + Instruction *commonIDivTransforms(BinaryOperator &I); + Instruction *visitUDiv(BinaryOperator &I); + Instruction *visitSDiv(BinaryOperator &I); + Instruction *visitFDiv(BinaryOperator &I); + Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); + Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); + Instruction *visitAnd(BinaryOperator &I); + Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); + Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); + Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C); + Instruction *visitOr (BinaryOperator &I); + Instruction *visitXor(BinaryOperator &I); + Instruction *visitShl(BinaryOperator &I); + Instruction *visitAShr(BinaryOperator &I); + Instruction *visitLShr(BinaryOperator &I); + Instruction *commonShiftTransforms(BinaryOperator &I); + Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, + Constant *RHSC); + Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, + GlobalVariable *GV, CmpInst &ICI, + ConstantInt *AndCst = 0); + Instruction *visitFCmpInst(FCmpInst &I); + Instruction *visitICmpInst(ICmpInst &I); + Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); + Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, + Instruction *LHS, + ConstantInt *RHS); + Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS); + Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI, + ICmpInst::Predicate Pred, Value *TheAdd); + Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, Instruction &I); + Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + BinaryOperator &I); + Instruction *commonCastTransforms(CastInst &CI); + Instruction *commonPointerCastTransforms(CastInst &CI); + Instruction *visitTrunc(TruncInst &CI); + Instruction *visitZExt(ZExtInst &CI); + Instruction *visitSExt(SExtInst &CI); + Instruction *visitFPTrunc(FPTruncInst &CI); + Instruction *visitFPExt(CastInst &CI); + Instruction *visitFPToUI(FPToUIInst &FI); + Instruction *visitFPToSI(FPToSIInst &FI); + Instruction *visitUIToFP(CastInst &CI); + Instruction *visitSIToFP(CastInst &CI); + Instruction *visitPtrToInt(PtrToIntInst &CI); + Instruction *visitIntToPtr(IntToPtrInst &CI); + Instruction *visitBitCast(BitCastInst &CI); + Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, + Instruction *FI); + Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); + Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, + Value *A, Value *B, Instruction &Outer, + SelectPatternFlavor SPF2, Value *C); + Instruction *visitSelectInst(SelectInst &SI); + Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); + Instruction *visitCallInst(CallInst &CI); + Instruction *visitInvokeInst(InvokeInst &II); + + Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); + Instruction *visitPHINode(PHINode &PN); + Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); + Instruction *visitAllocaInst(AllocaInst &AI); + Instruction *visitFree(Instruction &FI); + Instruction *visitLoadInst(LoadInst &LI); + Instruction *visitStoreInst(StoreInst &SI); + Instruction *visitBranchInst(BranchInst &BI); + Instruction *visitSwitchInst(SwitchInst &SI); + Instruction *visitInsertElementInst(InsertElementInst &IE); + Instruction *visitExtractElementInst(ExtractElementInst &EI); + Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); + Instruction *visitExtractValueInst(ExtractValueInst &EV); + + // visitInstruction - Specify what to return for unhandled instructions... + Instruction *visitInstruction(Instruction &I) { return 0; } + +private: + bool ShouldChangeType(const Type *From, const Type *To) const; + Value *dyn_castNegVal(Value *V) const; + Value *dyn_castFNegVal(Value *V) const; + const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, + SmallVectorImpl<Value*> &NewIndices); + Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); + + /// ValueRequiresCast - Return true if the cast from "V to Ty" actually + /// results in any code being generated. It does not require codegen if V is + /// simple enough or if the cast can be folded into other casts. + bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V, + const Type *Ty); + + Instruction *visitCallSite(CallSite CS); + bool transformConstExprCastCall(CallSite CS); + Instruction *transformCallThroughTrampoline(CallSite CS); + Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, + bool DoXform = true); + bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); + DbgDeclareInst *hasOneUsePlusDeclare(Value *V); + Value *EmitGEPOffset(User *GEP); + +public: + // InsertNewInstBefore - insert an instruction New before instruction Old + // in the program. Add the new instruction to the worklist. + // + Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { + assert(New && New->getParent() == 0 && + "New instruction already inserted into a basic block!"); + BasicBlock *BB = Old.getParent(); + BB->getInstList().insert(&Old, New); // Insert inst + Worklist.Add(New); + return New; + } + + // ReplaceInstUsesWith - This method is to be used when an instruction is + // found to be dead, replacable with another preexisting expression. Here + // we add all uses of I to the worklist, replace all uses of I with the new + // value, then return I, so that the inst combiner will know that I was + // modified. + // + Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { + Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. + + // If we are replacing the instruction with itself, this must be in a + // segment of unreachable code, so just clobber the instruction. + if (&I == V) + V = UndefValue::get(I.getType()); + + I.replaceAllUsesWith(V); + return &I; + } + + // EraseInstFromFunction - When dealing with an instruction that has side + // effects or produces a void value, we can't rely on DCE to delete the + // instruction. Instead, visit methods should return the value returned by + // this function. + Instruction *EraseInstFromFunction(Instruction &I) { + DEBUG(errs() << "IC: ERASE " << I << '\n'); + + assert(I.use_empty() && "Cannot erase instruction that is used!"); + // Make sure that we reprocess all operands now that we reduced their + // use counts. + if (I.getNumOperands() < 8) { + for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) + if (Instruction *Op = dyn_cast<Instruction>(*i)) + Worklist.Add(Op); + } + Worklist.Remove(&I); + I.eraseFromParent(); + MadeIRChange = true; + return 0; // Don't do anything with FI + } + + void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, + APInt &KnownOne, unsigned Depth = 0) const { + return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + } + + bool MaskedValueIsZero(Value *V, const APInt &Mask, + unsigned Depth = 0) const { + return llvm::MaskedValueIsZero(V, Mask, TD, Depth); + } + unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const { + return llvm::ComputeNumSignBits(Op, TD, Depth); + } + +private: + + /// SimplifyCommutative - This performs a few simplifications for + /// commutative operators. + bool SimplifyCommutative(BinaryOperator &I); + + /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value + /// based on the demanded bits. + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + APInt& KnownZero, APInt& KnownOne, + unsigned Depth); + bool SimplifyDemandedBits(Use &U, APInt DemandedMask, + APInt& KnownZero, APInt& KnownOne, + unsigned Depth=0); + + /// SimplifyDemandedInstructionBits - Inst is an integer instruction that + /// SimplifyDemandedBits knows about. See if the instruction has any + /// properties that allow us to simplify its operands. + bool SimplifyDemandedInstructionBits(Instruction &Inst); + + Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, + APInt& UndefElts, unsigned Depth = 0); + + // FoldOpIntoPhi - Given a binary operator, cast instruction, or select + // which has a PHI node as operand #0, see if we can fold the instruction + // into the PHI (which is only possible if all operands to the PHI are + // constants). + // + // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms + // that would normally be unprofitable because they strongly encourage jump + // threading. + Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false); + + // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" + // operator and they all are only used by the PHI, PHI together their + // inputs, and do the operation once, to the result of the PHI. + Instruction *FoldPHIArgOpIntoPHI(PHINode &PN); + Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN); + Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); + Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); + + + Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, + ConstantInt *AndRHS, BinaryOperator &TheAnd); + + Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, + bool isSub, Instruction &I); + Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, + bool isSigned, bool Inside, Instruction &IB); + Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); + Instruction *MatchBSwap(BinaryOperator &I); + bool SimplifyStoreAtEndOfBlock(StoreInst &SI); + Instruction *SimplifyMemTransfer(MemIntrinsic *MI); + Instruction *SimplifyMemSet(MemSetInst *MI); + + + Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); + + unsigned GetOrEnforceKnownAlignment(Value *V, + unsigned PrefAlign = 0); + +}; + + + +} // end namespace llvm. + +#endif diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp new file mode 100644 index 0000000..4891ff0 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -0,0 +1,740 @@ +//===- InstCombineAddSub.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for add, fadd, sub, and fsub. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// AddOne - Add one to a ConstantInt. +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +} +/// SubOne - Subtract one from a ConstantInt. +static Constant *SubOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue()-1); +} + + +// dyn_castFoldableMul - If this value is a multiply that can be folded into +// other computations (because it has a constant operand), return the +// non-constant operand of the multiply, and set CST to point to the multiplier. +// Otherwise, return null. +// +static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { + if (!V->hasOneUse() || !V->getType()->isInteger()) + return 0; + + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return 0; + + if (I->getOpcode() == Instruction::Mul) + if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) + return I->getOperand(0); + if (I->getOpcode() == Instruction::Shl) + if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) { + // The multiplier is really 1 << CST. + uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + uint32_t CSTVal = CST->getLimitedValue(BitWidth); + CST = ConstantInt::get(V->getType()->getContext(), + APInt(BitWidth, 1).shl(CSTVal)); + return I->getOperand(0); + } + return 0; +} + + +/// WillNotOverflowSignedAdd - Return true if we can prove that: +/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) +/// This basically requires proving that the add in the original type would not +/// overflow to change the sign bit or have a carry out. +bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { + // There are different heuristics we can use for this. Here are some simple + // ones. + + // Add has the property that adding any two 2's complement numbers can only + // have one carry bit which can change a sign. As such, if LHS and RHS each + // have at least two sign bits, we know that the addition of the two values + // will sign extend fine. + if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) + return true; + + + // If one of the operands only has one non-zero bit, and if the other operand + // has a known-zero bit in a more significant place than it (not including the + // sign bit) the ripple may go up to and fill the zero, but won't change the + // sign. For example, (X & ~4) + 1. + + // TODO: Implement. + + return false; +} + +Instruction *InstCombiner::visitAdd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), TD)) + return ReplaceInstUsesWith(I, V); + + + if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) { + // X + (signbit) --> X ^ signbit + const APInt& Val = CI->getValue(); + uint32_t BitWidth = Val.getBitWidth(); + if (Val == APInt::getSignBit(BitWidth)) + return BinaryOperator::CreateXor(LHS, RHS); + + // See if SimplifyDemandedBits can simplify this. This handles stuff like + // (X & 254)+1 -> (X&254)|1 + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // zext(bool) + C -> bool ? C + 1 : C + if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) + if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) + return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); + } + + if (isa<PHINode>(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + ConstantInt *XorRHS = 0; + Value *XorLHS = 0; + if (isa<ConstantInt>(RHSC) && + match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { + uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); + const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue(); + + uint32_t Size = TySizeBits / 2; + APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); + APInt CFF80Val(-C0080Val); + do { + if (TySizeBits > Size) { + // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. + // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. + if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || + (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { + // This is a sign extend if the top bits are known zero. + if (!MaskedValueIsZero(XorLHS, + APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) + Size = 0; // Not a sign ext, but can't be any others either. + break; + } + } + Size >>= 1; + C0080Val = APIntOps::lshr(C0080Val, Size); + CFF80Val = APIntOps::ashr(CFF80Val, Size); + } while (Size >= 1); + + // FIXME: This shouldn't be necessary. When the backends can handle types + // with funny bit widths then this switch statement should be removed. It + // is just here to get the size of the "middle" type back up to something + // that the back ends can handle. + const Type *MiddleType = 0; + switch (Size) { + default: break; + case 32: + case 16: + case 8: MiddleType = IntegerType::get(I.getContext(), Size); break; + } + if (MiddleType) { + Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); + return new SExtInst(NewTrunc, I.getType(), I.getName()); + } + } + } + + if (I.getType()->isInteger(1)) + return BinaryOperator::CreateXor(LHS, RHS); + + if (I.getType()->isInteger()) { + // X + X --> X << 1 + if (LHS == RHS) + return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); + + if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) { + if (RHSI->getOpcode() == Instruction::Sub) + if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B + return ReplaceInstUsesWith(I, RHSI->getOperand(0)); + } + if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) { + if (LHSI->getOpcode() == Instruction::Sub) + if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B + return ReplaceInstUsesWith(I, LHSI->getOperand(0)); + } + } + + // -A + B --> B - A + // -A + -B --> -(A + B) + if (Value *LHSV = dyn_castNegVal(LHS)) { + if (LHS->getType()->isIntOrIntVector()) { + if (Value *RHSV = dyn_castNegVal(RHS)) { + Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + return BinaryOperator::CreateNeg(NewAdd); + } + } + + return BinaryOperator::CreateSub(RHS, LHSV); + } + + // A + -B --> A - B + if (!isa<Constant>(RHS)) + if (Value *V = dyn_castNegVal(RHS)) + return BinaryOperator::CreateSub(LHS, V); + + + ConstantInt *C2; + if (Value *X = dyn_castFoldableMul(LHS, C2)) { + if (X == RHS) // X*C + X --> X * (C+1) + return BinaryOperator::CreateMul(RHS, AddOne(C2)); + + // X*C1 + X*C2 --> X * (C1+C2) + ConstantInt *C1; + if (X == dyn_castFoldableMul(RHS, C1)) + return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); + } + + // X + X*C --> X * (C+1) + if (dyn_castFoldableMul(RHS, C2) == LHS) + return BinaryOperator::CreateMul(LHS, AddOne(C2)); + + // X + ~X --> -1 since ~X = -X-1 + if (match(LHS, m_Not(m_Specific(RHS))) || + match(RHS, m_Not(m_Specific(LHS)))) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + + // A+B --> A|B iff A and B have no bits set in common. + if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { + APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); + APInt LHSKnownOne(IT->getBitWidth(), 0); + APInt LHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + if (LHSKnownZero != 0) { + APInt RHSKnownOne(IT->getBitWidth(), 0); + APInt RHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + + // No bits in common -> bitwise or. + if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) + return BinaryOperator::CreateOr(LHS, RHS); + } + } + + // W*X + Y*Z --> W * (X+Z) iff W == Y + if (I.getType()->isIntOrIntVector()) { + Value *W, *X, *Y, *Z; + if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && + match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { + if (W != Y) { + if (W == Z) { + std::swap(Y, Z); + } else if (Y == X) { + std::swap(W, X); + } else if (X == Z) { + std::swap(Y, Z); + std::swap(W, X); + } + } + + if (W == Y) { + Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); + return BinaryOperator::CreateMul(W, NewAdd); + } + } + } + + if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { + Value *X = 0; + if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X + return BinaryOperator::CreateSub(SubOne(CRHS), X); + + // (X & FF00) + xx00 -> (X+xx00) & FF00 + if (LHS->hasOneUse() && + match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { + Constant *Anded = ConstantExpr::getAnd(CRHS, C2); + if (Anded == CRHS) { + // See if all bits from the first bit set in the Add RHS up are included + // in the mask. First, get the rightmost bit. + const APInt &AddRHSV = CRHS->getValue(); + + // Form a mask of all bits from the lowest bit added through the top. + APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); + + // See if the and mask includes all of these bits. + APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); + + if (AddRHSHighBits == AddRHSHighBitsAnd) { + // Okay, the xform is safe. Insert the new add pronto. + Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); + return BinaryOperator::CreateAnd(NewAdd, C2); + } + } + } + + // Try to fold constant add into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + } + + // add (select X 0 (sub n A)) A --> select X A n + { + SelectInst *SI = dyn_cast<SelectInst>(LHS); + Value *A = RHS; + if (!SI) { + SI = dyn_cast<SelectInst>(RHS); + A = LHS; + } + if (SI && SI->hasOneUse()) { + Value *TV = SI->getTrueValue(); + Value *FV = SI->getFalseValue(); + Value *N; + + // Can we fold the add into the argument of the select? + // We check both true and false select arguments for a matching subtract. + if (match(FV, m_Zero()) && + match(TV, m_Sub(m_Value(N), m_Specific(A)))) + // Fold the add into the true select value. + return SelectInst::Create(SI->getCondition(), N, A); + if (match(TV, m_Zero()) && + match(FV, m_Sub(m_Value(N), m_Specific(A)))) + // Fold the add into the false select value. + return SelectInst::Create(SI->getCondition(), A, N); + } + } + + // Check for (add (sext x), y), see if we can merge this into an + // integer add followed by a sext. + if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { + // (add (sext x), cst) --> (sext (add x, cst')) + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { + Constant *CI = + ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSExt(CI, I.getType()) == RHSC && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + // Insert the new, smaller add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + CI, "addconv"); + return new SExtInst(NewAdd, I.getType()); + } + } + + // (add (sext x), (sext y)) --> (sext (add int x, y)) + if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { + // Only do this if x/y have the same type, if at last one of them has a + // single use (so we don't increase the number of sexts), and if the + // integer add will not overflow. + if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0))) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), "addconv"); + return new SExtInst(NewAdd, I.getType()); + } + } + } + + return Changed ? &I : 0; +} + +Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + // X + 0 --> X + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { + if (CFP->isExactlyValue(ConstantFP::getNegativeZero + (I.getType())->getValueAPF())) + return ReplaceInstUsesWith(I, LHS); + } + + if (isa<PHINode>(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + // -A + B --> B - A + // -A + -B --> -(A + B) + if (Value *LHSV = dyn_castFNegVal(LHS)) + return BinaryOperator::CreateFSub(RHS, LHSV); + + // A + -B --> A - B + if (!isa<Constant>(RHS)) + if (Value *V = dyn_castFNegVal(RHS)) + return BinaryOperator::CreateFSub(LHS, V); + + // Check for X+0.0. Simplify it to X if we know X is not -0.0. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) + if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) + return ReplaceInstUsesWith(I, LHS); + + // Check for (add double (sitofp x), y), see if we can merge this into an + // integer add followed by a promotion. + if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { + // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) + // ... if the constant fits in the integer value. This is useful for things + // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer + // requires a constant pool load, and generally allows the add to be better + // instcombined. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { + Constant *CI = + ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSIToFP(CI, I.getType()) == CFP && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + CI, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } + } + + // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) + if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { + // Only do this if x/y have the same type, if at last one of them has a + // single use (so we don't increase the number of int->fp conversions), + // and if the integer add will not overflow. + if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0))) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0),"addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } + } + } + + return Changed ? &I : 0; +} + + +/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the +/// code necessary to compute the offset from the base pointer (without adding +/// in the base pointer). Return the result as a signed integer of intptr size. +Value *InstCombiner::EmitGEPOffset(User *GEP) { + TargetData &TD = *getTargetData(); + gep_type_iterator GTI = gep_type_begin(GEP); + const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); + Value *Result = Constant::getNullValue(IntPtrTy); + + // Build a mask for high order bits. + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); + + for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; + ++i, ++GTI) { + Value *Op = *i; + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; + if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) { + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + + Result = Builder->CreateAdd(Result, + ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"); + continue; + } + + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + Constant *OC = + ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); + Scale = ConstantExpr::getMul(OC, Scale); + // Emit an add instruction. + Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); + continue; + } + // Convert to correct type. + if (Op->getType() != IntPtrTy) + Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); + if (Size != 1) { + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + // We'll let instcombine(mul) convert this to a shl if possible. + Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); + } + + // Emit an add instruction. + Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); + } + return Result; +} + + + + +/// Optimize pointer differences into the same array into a size. Consider: +/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer +/// operands to the ptrtoint instructions for the LHS/RHS of the subtract. +/// +Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, + const Type *Ty) { + assert(TD && "Must have target data info for this"); + + // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize + // this. + bool Swapped = false; + GetElementPtrInst *GEP = 0; + ConstantExpr *CstGEP = 0; + + // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo". + // For now we require one side to be the base pointer "A" or a constant + // expression derived from it. + if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) { + // (gep X, ...) - X + if (LHSGEP->getOperand(0) == RHS) { + GEP = LHSGEP; + Swapped = false; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) { + // (gep X, ...) - (ce_gep X, ...) + if (CE->getOpcode() == Instruction::GetElementPtr && + LHSGEP->getOperand(0) == CE->getOperand(0)) { + CstGEP = CE; + GEP = LHSGEP; + Swapped = false; + } + } + } + + if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) { + // X - (gep X, ...) + if (RHSGEP->getOperand(0) == LHS) { + GEP = RHSGEP; + Swapped = true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) { + // (ce_gep X, ...) - (gep X, ...) + if (CE->getOpcode() == Instruction::GetElementPtr && + RHSGEP->getOperand(0) == CE->getOperand(0)) { + CstGEP = CE; + GEP = RHSGEP; + Swapped = true; + } + } + } + + if (GEP == 0) + return 0; + + // Emit the offset of the GEP and an intptr_t. + Value *Result = EmitGEPOffset(GEP); + + // If we had a constant expression GEP on the other side offsetting the + // pointer, subtract it from the offset we have. + if (CstGEP) { + Value *CstOffset = EmitGEPOffset(CstGEP); + Result = Builder->CreateSub(Result, CstOffset); + } + + + // If we have p - gep(p, ...) then we have to negate the result. + if (Swapped) + Result = Builder->CreateNeg(Result, "diff.neg"); + + return Builder->CreateIntCast(Result, Ty, true); +} + + +Instruction *InstCombiner::visitSub(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Op0 == Op1) // sub X, X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. + if (Value *V = dyn_castNegVal(Op1)) { + BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); + Res->setHasNoSignedWrap(I.hasNoSignedWrap()); + Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + return Res; + } + + if (isa<UndefValue>(Op0)) + return ReplaceInstUsesWith(I, Op0); // undef - X -> undef + if (isa<UndefValue>(Op1)) + return ReplaceInstUsesWith(I, Op1); // X - undef -> undef + if (I.getType()->isInteger(1)) + return BinaryOperator::CreateXor(Op0, Op1); + + if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { + // Replace (-1 - A) with (~A). + if (C->isAllOnesValue()) + return BinaryOperator::CreateNot(Op1); + + // C - ~X == X + (1+C) + Value *X = 0; + if (match(Op1, m_Not(m_Value(X)))) + return BinaryOperator::CreateAdd(X, AddOne(C)); + + // -(X >>u 31) -> (X >>s 31) + // -(X >>s 31) -> (X >>u 31) + if (C->isZero()) { + if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) { + if (SI->getOpcode() == Instruction::LShr) { + if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { + // Check to see if we are shifting out everything but the sign bit. + if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == + SI->getType()->getPrimitiveSizeInBits()-1) { + // Ok, the transformation is safe. Insert AShr. + return BinaryOperator::Create(Instruction::AShr, + SI->getOperand(0), CU, SI->getName()); + } + } + } else if (SI->getOpcode() == Instruction::AShr) { + if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { + // Check to see if we are shifting out everything but the sign bit. + if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == + SI->getType()->getPrimitiveSizeInBits()-1) { + // Ok, the transformation is safe. Insert LShr. + return BinaryOperator::CreateLShr( + SI->getOperand(0), CU, SI->getName()); + } + } + } + } + } + + // Try to fold constant sub into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + // C - zext(bool) -> bool ? C - 1 : C + if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) + if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext())) + return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); + } + + if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { + if (Op1I->getOpcode() == Instruction::Add) { + if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y + return BinaryOperator::CreateNeg(Op1I->getOperand(1), + I.getName()); + else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y + return BinaryOperator::CreateNeg(Op1I->getOperand(0), + I.getName()); + else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) { + if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1))) + // C1-(X+C2) --> (C1-C2)-X + return BinaryOperator::CreateSub( + ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); + } + } + + if (Op1I->hasOneUse()) { + // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression + // is not used by anyone else... + // + if (Op1I->getOpcode() == Instruction::Sub) { + // Swap the two operands of the subexpr... + Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); + Op1I->setOperand(0, IIOp1); + Op1I->setOperand(1, IIOp0); + + // Create the new top level add instruction... + return BinaryOperator::CreateAdd(Op0, Op1); + } + + // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... + // + if (Op1I->getOpcode() == Instruction::And && + (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { + Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); + + Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); + return BinaryOperator::CreateAnd(Op0, NewNot); + } + + // 0 - (X sdiv C) -> (X sdiv -C) + if (Op1I->getOpcode() == Instruction::SDiv) + if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) + if (CSI->isZero()) + if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1))) + return BinaryOperator::CreateSDiv(Op1I->getOperand(0), + ConstantExpr::getNeg(DivRHS)); + + // X - X*C --> X * (1-C) + ConstantInt *C2 = 0; + if (dyn_castFoldableMul(Op1I, C2) == Op0) { + Constant *CP1 = + ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), + C2); + return BinaryOperator::CreateMul(Op0, CP1); + } + } + } + + if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { + if (Op0I->getOpcode() == Instruction::Add) { + if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X + return ReplaceInstUsesWith(I, Op0I->getOperand(1)); + else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X + return ReplaceInstUsesWith(I, Op0I->getOperand(0)); + } else if (Op0I->getOpcode() == Instruction::Sub) { + if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y + return BinaryOperator::CreateNeg(Op0I->getOperand(1), + I.getName()); + } + } + + ConstantInt *C1; + if (Value *X = dyn_castFoldableMul(Op0, C1)) { + if (X == Op1) // X*C - X --> X * (C-1) + return BinaryOperator::CreateMul(Op1, SubOne(C1)); + + ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) + if (X == dyn_castFoldableMul(Op1, C2)) + return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); + } + + // Optimize pointer differences into the same array into a size. Consider: + // &A[10] - &A[0]: we should compile this to "10". + if (TD) { + Value *LHSOp, *RHSOp; + if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && + match(Op1, m_PtrToInt(m_Value(RHSOp)))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); + + // trunc(p)-trunc(q) -> trunc(p-q) + if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && + match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); + } + + return 0; +} + +Instruction *InstCombiner::visitFSub(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // If this is a 'B = x-(-A)', change to B = x+A... + if (Value *V = dyn_castFNegVal(Op1)) + return BinaryOperator::CreateFAdd(Op0, V); + + return 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp new file mode 100644 index 0000000..af300fc --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -0,0 +1,1990 @@ +//===- InstCombineAndOrXor.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitAnd, visitOr, and visitXor functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Intrinsics.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + + +/// AddOne - Add one to a ConstantInt. +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +} +/// SubOne - Subtract one from a ConstantInt. +static Constant *SubOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue()-1); +} + +/// isFreeToInvert - Return true if the specified value is free to invert (apply +/// ~ to). This happens in cases where the ~ can be eliminated. +static inline bool isFreeToInvert(Value *V) { + // ~(~(X)) -> X. + if (BinaryOperator::isNot(V)) + return true; + + // Constants can be considered to be not'ed values. + if (isa<ConstantInt>(V)) + return true; + + // Compares can be inverted if they have a single use. + if (CmpInst *CI = dyn_cast<CmpInst>(V)) + return CI->hasOneUse(); + + return false; +} + +static inline Value *dyn_castNotVal(Value *V) { + // If this is not(not(x)) don't return that this is a not: we want the two + // not's to be folded first. + if (BinaryOperator::isNot(V)) { + Value *Operand = BinaryOperator::getNotArgument(V); + if (!isFreeToInvert(Operand)) + return Operand; + } + + // Constants can be considered to be not'ed values... + if (ConstantInt *C = dyn_cast<ConstantInt>(V)) + return ConstantInt::get(C->getType(), ~C->getValue()); + return 0; +} + + +/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits +/// are carefully arranged to allow folding of expressions such as: +/// +/// (A < B) | (A > B) --> (A != B) +/// +/// Note that this is only valid if the first and second predicates have the +/// same sign. Is illegal to do: (A u< B) | (A s> B) +/// +/// Three bits are used to represent the condition, as follows: +/// 0 A > B +/// 1 A == B +/// 2 A < B +/// +/// <=> Value Definition +/// 000 0 Always false +/// 001 1 A > B +/// 010 2 A == B +/// 011 3 A >= B +/// 100 4 A < B +/// 101 5 A != B +/// 110 6 A <= B +/// 111 7 Always true +/// +static unsigned getICmpCode(const ICmpInst *ICI) { + switch (ICI->getPredicate()) { + // False -> 0 + case ICmpInst::ICMP_UGT: return 1; // 001 + case ICmpInst::ICMP_SGT: return 1; // 001 + case ICmpInst::ICMP_EQ: return 2; // 010 + case ICmpInst::ICMP_UGE: return 3; // 011 + case ICmpInst::ICMP_SGE: return 3; // 011 + case ICmpInst::ICMP_ULT: return 4; // 100 + case ICmpInst::ICMP_SLT: return 4; // 100 + case ICmpInst::ICMP_NE: return 5; // 101 + case ICmpInst::ICMP_ULE: return 6; // 110 + case ICmpInst::ICMP_SLE: return 6; // 110 + // True -> 7 + default: + llvm_unreachable("Invalid ICmp predicate!"); + return 0; + } +} + +/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp +/// predicate into a three bit mask. It also returns whether it is an ordered +/// predicate by reference. +static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { + isOrdered = false; + switch (CC) { + case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 + case FCmpInst::FCMP_UNO: return 0; // 000 + case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 + case FCmpInst::FCMP_UGT: return 1; // 001 + case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 + case FCmpInst::FCMP_UEQ: return 2; // 010 + case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 + case FCmpInst::FCMP_UGE: return 3; // 011 + case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 + case FCmpInst::FCMP_ULT: return 4; // 100 + case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 + case FCmpInst::FCMP_UNE: return 5; // 101 + case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 + case FCmpInst::FCMP_ULE: return 6; // 110 + // True -> 7 + default: + // Not expecting FCMP_FALSE and FCMP_TRUE; + llvm_unreachable("Unexpected FCmp predicate!"); + return 0; + } +} + +/// getICmpValue - This is the complement of getICmpCode, which turns an +/// opcode and two operands into either a constant true or false, or a brand +/// new ICmp instruction. The sign is passed in to determine which kind +/// of predicate to use in the new icmp instruction. +static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS) { + switch (Code) { + default: assert(0 && "Illegal ICmp code!"); + case 0: + return ConstantInt::getFalse(LHS->getContext()); + case 1: + if (Sign) + return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); + return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); + case 2: + return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); + case 3: + if (Sign) + return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); + return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); + case 4: + if (Sign) + return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); + return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); + case 5: + return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); + case 6: + if (Sign) + return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); + return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); + case 7: + return ConstantInt::getTrue(LHS->getContext()); + } +} + +/// getFCmpValue - This is the complement of getFCmpCode, which turns an +/// opcode and two operands into either a FCmp instruction. isordered is passed +/// in to determine which kind of predicate to use in the new fcmp instruction. +static Value *getFCmpValue(bool isordered, unsigned code, + Value *LHS, Value *RHS) { + switch (code) { + default: llvm_unreachable("Illegal FCmp code!"); + case 0: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); + case 1: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); + case 2: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); + case 3: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); + case 4: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); + case 5: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); + case 6: + if (isordered) + return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); + else + return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); + case 7: return ConstantInt::getTrue(LHS->getContext()); + } +} + +/// PredicatesFoldable - Return true if both predicates match sign or if at +/// least one of them is an equality comparison (which is signless). +static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { + return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || + (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || + (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); +} + +// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where +// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is +// guaranteed to be a binary operator. +Instruction *InstCombiner::OptAndOp(Instruction *Op, + ConstantInt *OpRHS, + ConstantInt *AndRHS, + BinaryOperator &TheAnd) { + Value *X = Op->getOperand(0); + Constant *Together = 0; + if (!Op->isShift()) + Together = ConstantExpr::getAnd(AndRHS, OpRHS); + + switch (Op->getOpcode()) { + case Instruction::Xor: + if (Op->hasOneUse()) { + // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) + Value *And = Builder->CreateAnd(X, AndRHS); + And->takeName(Op); + return BinaryOperator::CreateXor(And, Together); + } + break; + case Instruction::Or: + if (Together == AndRHS) // (X | C) & C --> C + return ReplaceInstUsesWith(TheAnd, AndRHS); + + if (Op->hasOneUse() && Together != OpRHS) { + // (X | C1) & C2 --> (X | (C1&C2)) & C2 + Value *Or = Builder->CreateOr(X, Together); + Or->takeName(Op); + return BinaryOperator::CreateAnd(Or, AndRHS); + } + break; + case Instruction::Add: + if (Op->hasOneUse()) { + // Adding a one to a single bit bit-field should be turned into an XOR + // of the bit. First thing to check is to see if this AND is with a + // single bit constant. + const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue(); + + // If there is only one bit set. + if (AndRHSV.isPowerOf2()) { + // Ok, at this point, we know that we are masking the result of the + // ADD down to exactly one bit. If the constant we are adding has + // no bits set below this bit, then we can eliminate the ADD. + const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue(); + + // Check to see if any bits below the one bit set in AndRHSV are set. + if ((AddRHS & (AndRHSV-1)) == 0) { + // If not, the only thing that can effect the output of the AND is + // the bit specified by AndRHSV. If that bit is set, the effect of + // the XOR is to toggle the bit. If it is clear, then the ADD has + // no effect. + if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop + TheAnd.setOperand(0, X); + return &TheAnd; + } else { + // Pull the XOR out of the AND. + Value *NewAnd = Builder->CreateAnd(X, AndRHS); + NewAnd->takeName(Op); + return BinaryOperator::CreateXor(NewAnd, AndRHS); + } + } + } + } + break; + + case Instruction::Shl: { + // We know that the AND will not produce any of the bits shifted in, so if + // the anded constant includes them, clear them now! + // + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); + ConstantInt *CI = ConstantInt::get(AndRHS->getContext(), + AndRHS->getValue() & ShlMask); + + if (CI->getValue() == ShlMask) { + // Masking out bits that the shift already masks + return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. + } else if (CI != AndRHS) { // Reducing bits set in and. + TheAnd.setOperand(1, CI); + return &TheAnd; + } + break; + } + case Instruction::LShr: { + // We know that the AND will not produce any of the bits shifted in, so if + // the anded constant includes them, clear them now! This only applies to + // unsigned shifts, because a signed shr may bring in set bits! + // + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + ConstantInt *CI = ConstantInt::get(Op->getContext(), + AndRHS->getValue() & ShrMask); + + if (CI->getValue() == ShrMask) { + // Masking out bits that the shift already masks. + return ReplaceInstUsesWith(TheAnd, Op); + } else if (CI != AndRHS) { + TheAnd.setOperand(1, CI); // Reduce bits set in and cst. + return &TheAnd; + } + break; + } + case Instruction::AShr: + // Signed shr. + // See if this is shifting in some sign extension, then masking it out + // with an and. + if (Op->hasOneUse()) { + uint32_t BitWidth = AndRHS->getType()->getBitWidth(); + uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); + APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); + Constant *C = ConstantInt::get(Op->getContext(), + AndRHS->getValue() & ShrMask); + if (C == AndRHS) { // Masking out bits shifted in. + // (Val ashr C1) & C2 -> (Val lshr C1) & C2 + // Make the argument unsigned. + Value *ShVal = Op->getOperand(0); + ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); + return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); + } + } + break; + } + return 0; +} + + +/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is +/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient +/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates +/// whether to treat the V, Lo and HI as signed or not. IB is the location to +/// insert new instructions. +Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, + bool isSigned, bool Inside, + Instruction &IB) { + assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? + ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && + "Lo is not <= Hi in range emission code!"); + + if (Inside) { + if (Lo == Hi) // Trivially false. + return new ICmpInst(ICmpInst::ICMP_NE, V, V); + + // V >= Min && V < Hi --> V < Hi + if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { + ICmpInst::Predicate pred = (isSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); + return new ICmpInst(pred, V, Hi); + } + + // Emit V-Lo <u Hi-Lo + Constant *NegLo = ConstantExpr::getNeg(Lo); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); + return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); + } + + if (Lo == Hi) // Trivially true. + return new ICmpInst(ICmpInst::ICMP_EQ, V, V); + + // V < Min || V >= Hi -> V > Hi-1 + Hi = SubOne(cast<ConstantInt>(Hi)); + if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { + ICmpInst::Predicate pred = (isSigned ? + ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); + return new ICmpInst(pred, V, Hi); + } + + // Emit V-Lo >u Hi-1-Lo + // Note that Hi has already had one subtracted from it, above. + ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); + Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); + return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); +} + +// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with +// any number of 0s on either side. The 1s are allowed to wrap from LSB to +// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is +// not, since all 1s are not contiguous. +static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { + const APInt& V = Val->getValue(); + uint32_t BitWidth = Val->getType()->getBitWidth(); + if (!APIntOps::isShiftedMask(BitWidth, V)) return false; + + // look for the first zero bit after the run of ones + MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); + // look for the first non-zero bit + ME = V.getActiveBits(); + return true; +} + +/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, +/// where isSub determines whether the operator is a sub. If we can fold one of +/// the following xforms: +/// +/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask +/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 +/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 +/// +/// return (A +/- B). +/// +Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, + ConstantInt *Mask, bool isSub, + Instruction &I) { + Instruction *LHSI = dyn_cast<Instruction>(LHS); + if (!LHSI || LHSI->getNumOperands() != 2 || + !isa<ConstantInt>(LHSI->getOperand(1))) return 0; + + ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1)); + + switch (LHSI->getOpcode()) { + default: return 0; + case Instruction::And: + if (ConstantExpr::getAnd(N, Mask) == Mask) { + // If the AndRHS is a power of two minus one (0+1+), this is simple. + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == + Mask->getValue().getBitWidth()) + break; + + // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ + // part, we don't need any explicit masks to take them out of A. If that + // is all N is, ignore it. + uint32_t MB = 0, ME = 0; + if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive + uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth(); + APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); + if (MaskedValueIsZero(RHS, Mask)) + break; + } + } + return 0; + case Instruction::Or: + case Instruction::Xor: + // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() + && ConstantExpr::getAnd(N, Mask)->isNullValue()) + break; + return 0; + } + + if (isSub) + return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); + return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); +} + +/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. +Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, + ICmpInst *LHS, ICmpInst *RHS) { + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + + // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) + if (PredicatesFoldable(LHSCC, RHSCC)) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) & getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + Value *RV = getICmpValue(isSigned, Code, Op0, Op1); + if (Instruction *I = dyn_cast<Instruction>(RV)) + return I; + // Otherwise, it's a constant boolean value. + return ReplaceInstUsesWith(I, RV); + } + } + + // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). + Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); + ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); + ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); + if (LHSCst == 0 || RHSCst == 0) return 0; + + if (LHSCst == RHSCst && LHSCC == RHSCC) { + // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) + // where C is a power of 2 + if (LHSCC == ICmpInst::ICMP_ULT && + LHSCst->getValue().isPowerOf2()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } + + // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) + if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } + } + + // From here on, we only handle: + // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. + if (Val != Val2) return 0; + + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. + if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || + RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || + LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || + RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) + return 0; + + // We can't fold (ugt x, C) & (sgt x, C2). + if (!PredicatesFoldable(LHSCC, RHSCC)) + return 0; + + // Ensure that the larger constant is on the RHS. + bool ShouldSwap; + if (CmpInst::isSigned(LHSCC) || + (ICmpInst::isEquality(LHSCC) && + CmpInst::isSigned(RHSCC))) + ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); + else + ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + + if (ShouldSwap) { + std::swap(LHS, RHS); + std::swap(LHSCst, RHSCst); + std::swap(LHSCC, RHSCC); + } + + // At this point, we know we have have two icmp instructions + // comparing a value against two constants and and'ing the result + // together. Because of the above check, we know that we only have + // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know + // (from the icmp folding check above), that the two constants + // are not equal and that the larger constant is on the RHS + assert(LHSCst != RHSCst && "Compares not folded above?"); + + switch (LHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false + case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false + case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 + case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 + case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 + return ReplaceInstUsesWith(I, LHS); + } + case ICmpInst::ICMP_NE: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_ULT: + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 + return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); + break; // (X != 13 & X u< 15) -> no change + case ICmpInst::ICMP_SLT: + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 + return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); + break; // (X != 13 & X s< 15) -> no change + case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 + case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_NE: + if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); + return new ICmpInst(ICmpInst::ICMP_UGT, Add, + ConstantInt::get(Add->getType(), 1)); + } + break; // (X != 13 & X != 15) -> no change + } + break; + case ICmpInst::ICMP_ULT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false + case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 + case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SLT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false + case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 + case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_UGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: + if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 + return new ICmpInst(LHSCC, Val, RHSCst); + break; // (X u> 13 & X != 15) -> no change + case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1 + return InsertRangeTest(Val, AddOne(LHSCst), + RHSCst, false, true, I); + case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 + case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: + if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 + return new ICmpInst(LHSCC, Val, RHSCst); + break; // (X s> 13 & X != 15) -> no change + case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 + return InsertRangeTest(Val, AddOne(LHSCst), + RHSCst, true, true, I); + case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change + break; + } + break; + } + + return 0; +} + +Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, + FCmpInst *RHS) { + + if (LHS->getPredicate() == FCmpInst::FCMP_ORD && + RHS->getPredicate() == FCmpInst::FCMP_ORD) { + // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) + if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // false. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return new FCmpInst(FCmpInst::FCMP_ORD, + LHS->getOperand(0), RHS->getOperand(0)); + } + + // Handle vector zeros. This occurs because the canonical form of + // "fcmp ord x,x" is "fcmp ord x, 0". + if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && + isa<ConstantAggregateZero>(RHS->getOperand(1))) + return new FCmpInst(FCmpInst::FCMP_ORD, + LHS->getOperand(0), RHS->getOperand(0)); + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); + + if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + if (Op0CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, RHS); + if (Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, LHS); + + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op1Pred == 0) { + std::swap(LHS, RHS); + std::swap(Op0Pred, Op1Pred); + std::swap(Op0Ordered, Op1Ordered); + } + if (Op0Pred == 0) { + // uno && ueq -> uno && (uno || eq) -> ueq + // ord && olt -> ord && (ord && lt) -> olt + if (Op0Ordered == Op1Ordered) + return ReplaceInstUsesWith(I, RHS); + + // uno && oeq -> uno && (ord && eq) -> false + // uno && ord -> false + if (!Op0Ordered) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + // ord && ueq -> ord && (uno || eq) -> oeq + return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS)); + } + } + + return 0; +} + + +Instruction *InstCombiner::visitAnd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyAndInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { + const APInt &AndRHSMask = AndRHS->getValue(); + APInt NotAndRHS(~AndRHSMask); + + // Optimize a variety of ((val OP C1) & C2) combinations... + if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { + Value *Op0LHS = Op0I->getOperand(0); + Value *Op0RHS = Op0I->getOperand(1); + switch (Op0I->getOpcode()) { + default: break; + case Instruction::Xor: + case Instruction::Or: + // If the mask is only needed on one incoming arm, push it up. + if (!Op0I->hasOneUse()) break; + + if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { + // Not masking anything out for the LHS, move to RHS. + Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); + } + if (!isa<Constant>(Op0RHS) && + MaskedValueIsZero(Op0RHS, NotAndRHS)) { + // Not masking anything out for the RHS, move to LHS. + Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); + } + + break; + case Instruction::Add: + // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. + // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 + // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 + if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) + return BinaryOperator::CreateAnd(V, AndRHS); + if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) + return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes + break; + + case Instruction::Sub: + // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. + // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 + // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 + if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) + return BinaryOperator::CreateAnd(V, AndRHS); + + // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS + // has 1's for all bits that the subtraction with A might affect. + if (Op0I->hasOneUse()) { + uint32_t BitWidth = AndRHSMask.getBitWidth(); + uint32_t Zeros = AndRHSMask.countLeadingZeros(); + APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); + + ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS); + if (!(A && A->isZero()) && // avoid infinite recursion. + MaskedValueIsZero(Op0LHS, Mask)) { + Value *NewNeg = Builder->CreateNeg(Op0RHS); + return BinaryOperator::CreateAnd(NewNeg, AndRHS); + } + } + break; + + case Instruction::Shl: + case Instruction::LShr: + // (1 << x) & 1 --> zext(x == 0) + // (1 >> x) & 1 --> zext(x == 0) + if (AndRHSMask == 1 && Op0LHS == AndRHS) { + Value *NewICmp = + Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); + return new ZExtInst(NewICmp, I.getType()); + } + break; + } + + if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) + if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) + return Res; + } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) { + // If this is an integer truncation or change from signed-to-unsigned, and + // if the source is an and/or with immediate, transform it. This + // frequently occurs for bitfield accesses. + if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) { + if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) && + CastOp->getNumOperands() == 2) + if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){ + if (CastOp->getOpcode() == Instruction::And) { + // Change: and (cast (and X, C1) to T), C2 + // into : and (cast X to T), trunc_or_bitcast(C1)&C2 + // This will fold the two constants together, which may allow + // other simplifications. + Value *NewCast = Builder->CreateTruncOrBitCast( + CastOp->getOperand(0), I.getType(), + CastOp->getName()+".shrunk"); + // trunc_or_bitcast(C1)&C2 + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + C3 = ConstantExpr::getAnd(C3, AndRHS); + return BinaryOperator::CreateAnd(NewCast, C3); + } else if (CastOp->getOpcode() == Instruction::Or) { + // Change: and (cast (or X, C1) to T), C2 + // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) + // trunc(C1)&C2 + return ReplaceInstUsesWith(I, AndRHS); + } + } + } + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + + // (~A & ~B) == (~(A | B)) - De Morgan's Law + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(Or); + } + + { + Value *A = 0, *B = 0, *C = 0, *D = 0; + // (A|B) & ~(A&B) -> A^B + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); + + // ~(A&B) & (A|B) -> A^B + if (match(Op1, m_Or(m_Value(A), m_Value(B))) && + match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); + + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1) { // (A^B)&A -> A&(A^B) + I.swapOperands(); // Simplify below + std::swap(Op0, Op1); + } else if (B == Op1) { // (A^B)&B -> B&(B^A) + cast<BinaryOperator>(Op0)->swapOperands(); + I.swapOperands(); // Simplify below + std::swap(Op0, Op1); + } + } + + if (Op1->hasOneUse() && + match(Op1, m_Xor(m_Value(A), m_Value(B)))) { + if (B == Op0) { // B&(A^B) -> B&(B^A) + cast<BinaryOperator>(Op1)->swapOperands(); + std::swap(A, B); + } + if (A == Op0) // A&(A^B) -> A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); + } + + // (A&((~A)|B)) -> A&B + if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || + match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) + return BinaryOperator::CreateAnd(A, Op1); + if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || + match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) + return BinaryOperator::CreateAnd(A, Op0); + } + + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) + if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) + return Res; + + // fold (and (cast A), (cast B)) -> (cast (and A, B)) + if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) + if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector() && + // Only do this if the casts both really cause code to be generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { + Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } + + // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. + if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { + if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + SI0->getOperand(1) == SI1->getOperand(1) && + (SI0->hasOneUse() || SI1->hasOneUse())) { + Value *NewOp = + Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); + return BinaryOperator::Create(SI1->getOpcode(), NewOp, + SI1->getOperand(1)); + } + } + + // If and'ing two fcmp, try combine them into one. + if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) + return Res; + } + + return Changed ? &I : 0; +} + +/// CollectBSwapParts - Analyze the specified subexpression and see if it is +/// capable of providing pieces of a bswap. The subexpression provides pieces +/// of a bswap if it is proven that each of the non-zero bytes in the output of +/// the expression came from the corresponding "byte swapped" byte in some other +/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then +/// we know that the expression deposits the low byte of %X into the high byte +/// of the bswap result and that all other bytes are zero. This expression is +/// accepted, the high byte of ByteValues is set to X to indicate a correct +/// match. +/// +/// This function returns true if the match was unsuccessful and false if so. +/// On entry to the function the "OverallLeftShift" is a signed integer value +/// indicating the number of bytes that the subexpression is later shifted. For +/// example, if the expression is later right shifted by 16 bits, the +/// OverallLeftShift value would be -2 on entry. This is used to specify which +/// byte of ByteValues is actually being set. +/// +/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding +/// byte is masked to zero by a user. For example, in (X & 255), X will be +/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits +/// this function to working on up to 32-byte (256 bit) values. ByteMask is +/// always in the local (OverallLeftShift) coordinate space. +/// +static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, + SmallVector<Value*, 8> &ByteValues) { + if (Instruction *I = dyn_cast<Instruction>(V)) { + // If this is an or instruction, it may be an inner node of the bswap. + if (I->getOpcode() == Instruction::Or) { + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues) || + CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, + ByteValues); + } + + // If this is a logical shift by a constant multiple of 8, recurse with + // OverallLeftShift and ByteMask adjusted. + if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { + unsigned ShAmt = + cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); + // Ensure the shift amount is defined and of a byte value. + if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) + return true; + + unsigned ByteShift = ShAmt >> 3; + if (I->getOpcode() == Instruction::Shl) { + // X << 2 -> collect(X, +2) + OverallLeftShift += ByteShift; + ByteMask >>= ByteShift; + } else { + // X >>u 2 -> collect(X, -2) + OverallLeftShift -= ByteShift; + ByteMask <<= ByteShift; + ByteMask &= (~0U >> (32-ByteValues.size())); + } + + if (OverallLeftShift >= (int)ByteValues.size()) return true; + if (OverallLeftShift <= -(int)ByteValues.size()) return true; + + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues); + } + + // If this is a logical 'and' with a mask that clears bytes, clear the + // corresponding bytes in ByteMask. + if (I->getOpcode() == Instruction::And && + isa<ConstantInt>(I->getOperand(1))) { + // Scan every byte of the and mask, seeing if the byte is either 0 or 255. + unsigned NumBytes = ByteValues.size(); + APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); + const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); + + for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { + // If this byte is masked out by a later operation, we don't care what + // the and mask is. + if ((ByteMask & (1 << i)) == 0) + continue; + + // If the AndMask is all zeros for this byte, clear the bit. + APInt MaskB = AndMask & Byte; + if (MaskB == 0) { + ByteMask &= ~(1U << i); + continue; + } + + // If the AndMask is not all ones for this byte, it's not a bytezap. + if (MaskB != Byte) + return true; + + // Otherwise, this byte is kept. + } + + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + ByteValues); + } + } + + // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be + // the input value to the bswap. Some observations: 1) if more than one byte + // is demanded from this input, then it could not be successfully assembled + // into a byteswap. At least one of the two bytes would not be aligned with + // their ultimate destination. + if (!isPowerOf2_32(ByteMask)) return true; + unsigned InputByteNo = CountTrailingZeros_32(ByteMask); + + // 2) The input and ultimate destinations must line up: if byte 3 of an i32 + // is demanded, it needs to go into byte 0 of the result. This means that the + // byte needs to be shifted until it lands in the right byte bucket. The + // shift amount depends on the position: if the byte is coming from the high + // part of the value (e.g. byte 3) then it must be shifted right. If from the + // low part, it must be shifted left. + unsigned DestByteNo = InputByteNo + OverallLeftShift; + if (InputByteNo < ByteValues.size()/2) { + if (ByteValues.size()-1-DestByteNo != InputByteNo) + return true; + } else { + if (ByteValues.size()-1-DestByteNo != InputByteNo) + return true; + } + + // If the destination byte value is already defined, the values are or'd + // together, which isn't a bswap (unless it's an or of the same bits). + if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) + return true; + ByteValues[DestByteNo] = V; + return false; +} + +/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. +/// If so, insert the new bswap intrinsic and return it. +Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { + const IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); + if (!ITy || ITy->getBitWidth() % 16 || + // ByteMask only allows up to 32-byte values. + ITy->getBitWidth() > 32*8) + return 0; // Can only bswap pairs of bytes. Can't do vectors. + + /// ByteValues - For each byte of the result, we keep track of which value + /// defines each byte. + SmallVector<Value*, 8> ByteValues; + ByteValues.resize(ITy->getBitWidth()/8); + + // Try to find all the pieces corresponding to the bswap. + uint32_t ByteMask = ~0U >> (32-ByteValues.size()); + if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) + return 0; + + // Check to see if all of the bytes come from the same value. + Value *V = ByteValues[0]; + if (V == 0) return 0; // Didn't find a byte? Must be zero. + + // Check to make sure that all of the bytes come from the same value. + for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) + if (ByteValues[i] != V) + return 0; + const Type *Tys[] = { ITy }; + Module *M = I.getParent()->getParent()->getParent(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + return CallInst::Create(F, V); +} + +/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check +/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then +/// we can simplify this expression to "cond ? C : D or B". +static Instruction *MatchSelectFromAndOr(Value *A, Value *B, + Value *C, Value *D) { + // If A is not a select of -1/0, this cannot match. + Value *Cond = 0; + if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) + return 0; + + // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. + if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) + return SelectInst::Create(Cond, C, B); + if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, B); + // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. + if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) + return SelectInst::Create(Cond, C, D); + if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) + return SelectInst::Create(Cond, C, D); + return 0; +} + +/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. +Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, + ICmpInst *LHS, ICmpInst *RHS) { + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + + // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) + if (PredicatesFoldable(LHSCC, RHSCC)) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) | getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + Value *RV = getICmpValue(isSigned, Code, Op0, Op1); + if (Instruction *I = dyn_cast<Instruction>(RV)) + return I; + // Otherwise, it's a constant boolean value. + return ReplaceInstUsesWith(I, RV); + } + } + + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). + Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); + ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); + ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); + if (LHSCst == 0 || RHSCst == 0) return 0; + + // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) + if (LHSCst == RHSCst && LHSCC == RHSCC && + LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } + + // From here on, we only handle: + // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. + if (Val != Val2) return 0; + + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. + if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || + RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || + LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || + RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) + return 0; + + // We can't fold (ugt x, C) | (sgt x, C2). + if (!PredicatesFoldable(LHSCC, RHSCC)) + return 0; + + // Ensure that the larger constant is on the RHS. + bool ShouldSwap; + if (CmpInst::isSigned(LHSCC) || + (ICmpInst::isEquality(LHSCC) && + CmpInst::isSigned(RHSCC))) + ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); + else + ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); + + if (ShouldSwap) { + std::swap(LHS, RHS); + std::swap(LHSCst, RHSCst); + std::swap(LHSCC, RHSCC); + } + + // At this point, we know we have have two icmp instructions + // comparing a value against two constants and or'ing the result + // together. Because of the above check, we know that we only have + // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the + // icmp folding check above), that the two constants are not + // equal. + assert(LHSCst != RHSCst && "Compares not folded above?"); + + switch (LHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: + if (LHSCst == SubOne(RHSCst)) { + // (X == 13 | X == 14) -> X-13 <u 2 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); + AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); + return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); + } + break; // (X == 13 | X == 15) -> no change + case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change + case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change + break; + case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 + case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 + return ReplaceInstUsesWith(I, RHS); + } + break; + case ICmpInst::ICMP_NE: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 + case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 + case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true + case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true + case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + } + break; + case ICmpInst::ICMP_ULT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change + break; + case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(false)) + return ReplaceInstUsesWith(I, LHS); + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), + false, false, I); + case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SLT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change + break; + case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 + // If RHSCst is [us]MAXINT, it is always false. Not handling + // this can cause overflow. + if (RHSCst->isMaxValue(true)) + return ReplaceInstUsesWith(I, LHS); + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), + true, false, I); + case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 + case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 + return ReplaceInstUsesWith(I, RHS); + case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_UGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 + case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true + case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change + break; + } + break; + case ICmpInst::ICMP_SGT: + switch (RHSCC) { + default: llvm_unreachable("Unknown integer condition code!"); + case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 + case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 + return ReplaceInstUsesWith(I, LHS); + case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change + break; + case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true + case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change + break; + } + break; + } + return 0; +} + +Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, + FCmpInst *RHS) { + if (LHS->getPredicate() == FCmpInst::FCMP_UNO && + RHS->getPredicate() == FCmpInst::FCMP_UNO && + LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { + if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // true. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + + // Otherwise, no need to compare the two constants, compare the + // rest. + return new FCmpInst(FCmpInst::FCMP_UNO, + LHS->getOperand(0), RHS->getOperand(0)); + } + + // Handle vector zeros. This occurs because the canonical form of + // "fcmp uno x,x" is "fcmp uno x, 0". + if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && + isa<ConstantAggregateZero>(RHS->getOperand(1))) + return new FCmpInst(FCmpInst::FCMP_UNO, + LHS->getOperand(0), RHS->getOperand(0)); + + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, + Op0LHS, Op0RHS); + if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, RHS); + if (Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, LHS); + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op0Ordered == Op1Ordered) { + // If both are ordered or unordered, return a new fcmp with + // or'ed predicates. + Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS); + if (Instruction *I = dyn_cast<Instruction>(RV)) + return I; + // Otherwise, it's a constant boolean value... + return ReplaceInstUsesWith(I, RV); + } + } + return 0; +} + +/// FoldOrWithConstants - This helper function folds: +/// +/// ((A | B) & C1) | (B & C2) +/// +/// into: +/// +/// (A & C1) | B +/// +/// when the XOR of the two constants is "all ones" (-1). +Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, + Value *A, Value *B, Value *C) { + ConstantInt *CI1 = dyn_cast<ConstantInt>(C); + if (!CI1) return 0; + + Value *V1 = 0; + ConstantInt *CI2 = 0; + if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; + + APInt Xor = CI1->getValue() ^ CI2->getValue(); + if (!Xor.isAllOnesValue()) return 0; + + if (V1 == A || V1 == B) { + Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); + return BinaryOperator::CreateOr(NewOp, V1); + } + + return 0; +} + +Instruction *InstCombiner::visitOr(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyOrInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { + ConstantInt *C1 = 0; Value *X = 0; + // (X & C1) | C2 --> (X | C2) & (C1|C2) + if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && + Op0->hasOneUse()) { + Value *Or = Builder->CreateOr(X, RHS); + Or->takeName(Op0); + return BinaryOperator::CreateAnd(Or, + ConstantInt::get(I.getContext(), + RHS->getValue() | C1->getValue())); + } + + // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) + if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && + Op0->hasOneUse()) { + Value *Or = Builder->CreateOr(X, RHS); + Or->takeName(Op0); + return BinaryOperator::CreateXor(Or, + ConstantInt::get(I.getContext(), + C1->getValue() & ~RHS->getValue())); + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + Value *A = 0, *B = 0; + ConstantInt *C1 = 0, *C2 = 0; + + // (A | B) | C and A | (B | C) -> bswap if possible. + // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. + if (match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())) || + (match(Op0, m_Shift(m_Value(), m_Value())) && + match(Op1, m_Shift(m_Value(), m_Value())))) { + if (Instruction *BSwap = MatchBSwap(I)) + return BSwap; + } + + // (X^C)|Y -> (X|Y)^C iff Y&C == 0 + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && + MaskedValueIsZero(Op1, C1->getValue())) { + Value *NOr = Builder->CreateOr(A, Op1); + NOr->takeName(Op0); + return BinaryOperator::CreateXor(NOr, C1); + } + + // Y|(X^C) -> (X|Y)^C iff Y&C == 0 + if (Op1->hasOneUse() && + match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && + MaskedValueIsZero(Op0, C1->getValue())) { + Value *NOr = Builder->CreateOr(A, Op0); + NOr->takeName(Op0); + return BinaryOperator::CreateXor(NOr, C1); + } + + // (A & C)|(B & D) + Value *C = 0, *D = 0; + if (match(Op0, m_And(m_Value(A), m_Value(C))) && + match(Op1, m_And(m_Value(B), m_Value(D)))) { + Value *V1 = 0, *V2 = 0, *V3 = 0; + C1 = dyn_cast<ConstantInt>(C); + C2 = dyn_cast<ConstantInt>(D); + if (C1 && C2) { // (A & C1)|(B & C2) + // If we have: ((V + N) & C1) | (V & C2) + // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + // replace with V+N. + if (C1->getValue() == ~C2->getValue()) { + if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ + match(A, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) + return ReplaceInstUsesWith(I, A); + if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) + return ReplaceInstUsesWith(I, A); + } + // Or commutes, try both ways. + if ((C1->getValue() & (C1->getValue()+1)) == 0 && + match(B, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) + return ReplaceInstUsesWith(I, B); + if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) + return ReplaceInstUsesWith(I, B); + } + } + + if ((C1->getValue() & C2->getValue()) == 0) { + // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) + // iff (C1&C2) == 0 and (N&~C1) == 0 + if (match(A, m_Or(m_Value(V1), m_Value(V2))) && + ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N) + (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V) + return BinaryOperator::CreateAnd(A, + ConstantInt::get(A->getContext(), + C1->getValue()|C2->getValue())); + // Or commutes, try both ways. + if (match(B, m_Or(m_Value(V1), m_Value(V2))) && + ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N) + (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V) + return BinaryOperator::CreateAnd(B, + ConstantInt::get(B->getContext(), + C1->getValue()|C2->getValue())); + + // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) + // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. + ConstantInt *C3 = 0, *C4 = 0; + if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) && + (C3->getValue() & ~C1->getValue()) == 0 && + match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && + (C4->getValue() & ~C2->getValue()) == 0) { + V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); + return BinaryOperator::CreateAnd(V2, + ConstantInt::get(B->getContext(), + C1->getValue()|C2->getValue())); + } + } + } + + // Check to see if we have any common things being and'ed. If so, find the + // terms for V1 & (V2|V3). + if (Op0->hasOneUse() || Op1->hasOneUse()) { + V1 = 0; + if (A == B) // (A & C)|(A & D) == A & (C|D) + V1 = A, V2 = C, V3 = D; + else if (A == D) // (A & C)|(B & A) == A & (B|C) + V1 = A, V2 = B, V3 = C; + else if (C == B) // (A & C)|(C & D) == C & (A|D) + V1 = C, V2 = A, V3 = D; + else if (C == D) // (A & C)|(B & C) == C & (A|B) + V1 = C, V2 = A, V3 = B; + + if (V1) { + Value *Or = Builder->CreateOr(V2, V3, "tmp"); + return BinaryOperator::CreateAnd(V1, Or); + } + } + + // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants + if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) + return Match; + + // ((A&~B)|(~A&B)) -> A^B + if ((match(C, m_Not(m_Specific(D))) && + match(B, m_Not(m_Specific(A))))) + return BinaryOperator::CreateXor(A, D); + // ((~B&A)|(~A&B)) -> A^B + if ((match(A, m_Not(m_Specific(D))) && + match(B, m_Not(m_Specific(C))))) + return BinaryOperator::CreateXor(C, D); + // ((A&~B)|(B&~A)) -> A^B + if ((match(C, m_Not(m_Specific(B))) && + match(D, m_Not(m_Specific(A))))) + return BinaryOperator::CreateXor(A, B); + // ((~B&A)|(B&~A)) -> A^B + if ((match(A, m_Not(m_Specific(B))) && + match(D, m_Not(m_Specific(C))))) + return BinaryOperator::CreateXor(C, B); + } + + // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. + if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { + if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + SI0->getOperand(1) == SI1->getOperand(1) && + (SI0->hasOneUse() || SI1->hasOneUse())) { + Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); + return BinaryOperator::Create(SI1->getOpcode(), NewOp, + SI1->getOperand(1)); + } + } + + // ((A|B)&1)|(B&-2) -> (A&1) | B + if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || + match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { + Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); + if (Ret) return Ret; + } + // (B&-2)|((A|B)&1) -> (A&1) | B + if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || + match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { + Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); + if (Ret) return Ret; + } + + // (~A | ~B) == (~(A & B)) - De Morgan's Law + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(And); + } + + if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) + if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) + return Res; + + // fold (or (cast A), (cast B)) -> (cast (or A, B)) + if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { + if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? + if (!isa<ICmpInst>(Op0C->getOperand(0)) || + !isa<ICmpInst>(Op1C->getOperand(0))) { + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector() && + // Only do this if the casts both really cause code to be + // generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { + Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } + } + } + + + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) + return Res; + } + + return Changed ? &I : 0; +} + +Instruction *InstCombiner::visitXor(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (isa<UndefValue>(Op1)) { + if (isa<UndefValue>(Op0)) + // Handle undef ^ undef -> 0 special case. This is a common + // idiom (misuse). + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef + } + + // xor X, X = 0 + if (Op0 == Op1) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(I)) + return &I; + if (isa<VectorType>(I.getType())) + if (isa<ConstantAggregateZero>(Op1)) + return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X + + // Is this a ~ operation? + if (Value *NotOp = dyn_castNotVal(&I)) { + if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) { + if (Op0I->getOpcode() == Instruction::And || + Op0I->getOpcode() == Instruction::Or) { + // ~(~X & Y) --> (X | ~Y) - De Morgan's Law + // ~(~X | Y) === (X & ~Y) - De Morgan's Law + if (dyn_castNotVal(Op0I->getOperand(1))) + Op0I->swapOperands(); + if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { + Value *NotY = + Builder->CreateNot(Op0I->getOperand(1), + Op0I->getOperand(1)->getName()+".not"); + if (Op0I->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(Op0NotVal, NotY); + return BinaryOperator::CreateAnd(Op0NotVal, NotY); + } + + // ~(X & Y) --> (~X | ~Y) - De Morgan's Law + // ~(X | Y) === (~X & ~Y) - De Morgan's Law + if (isFreeToInvert(Op0I->getOperand(0)) && + isFreeToInvert(Op0I->getOperand(1))) { + Value *NotX = + Builder->CreateNot(Op0I->getOperand(0), "notlhs"); + Value *NotY = + Builder->CreateNot(Op0I->getOperand(1), "notrhs"); + if (Op0I->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(NotX, NotY); + return BinaryOperator::CreateAnd(NotX, NotY); + } + } + } + } + + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { + if (RHS->isOne() && Op0->hasOneUse()) { + // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B + if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0)) + return new ICmpInst(ICI->getInversePredicate(), + ICI->getOperand(0), ICI->getOperand(1)); + + if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0)) + return new FCmpInst(FCI->getInversePredicate(), + FCI->getOperand(0), FCI->getOperand(1)); + } + + // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). + if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { + if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) { + if (CI->hasOneUse() && Op0C->hasOneUse()) { + Instruction::CastOps Opcode = Op0C->getOpcode(); + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && + (RHS == ConstantExpr::getCast(Opcode, + ConstantInt::getTrue(I.getContext()), + Op0C->getDestTy()))) { + CI->setPredicate(CI->getInversePredicate()); + return CastInst::Create(Opcode, CI, Op0C->getType()); + } + } + } + } + + if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { + // ~(c-X) == X-c-1 == X+(-c-1) + if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) + if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) { + Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); + Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, + ConstantInt::get(I.getType(), 1)); + return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); + } + + if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { + if (Op0I->getOpcode() == Instruction::Add) { + // ~(X-c) --> (-c-1)-X + if (RHS->isAllOnesValue()) { + Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); + return BinaryOperator::CreateSub( + ConstantExpr::getSub(NegOp0CI, + ConstantInt::get(I.getType(), 1)), + Op0I->getOperand(0)); + } else if (RHS->getValue().isSignBit()) { + // (X + C) ^ signbit -> (X + C + signbit) + Constant *C = ConstantInt::get(I.getContext(), + RHS->getValue() + Op0CI->getValue()); + return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); + + } + } else if (Op0I->getOpcode() == Instruction::Or) { + // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 + if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { + Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); + // Anything in both C1 and C2 is known to be zero, remove it from + // NewRHS. + Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); + NewRHS = ConstantExpr::getAnd(NewRHS, + ConstantExpr::getNot(CommonBits)); + Worklist.Add(Op0I); + I.setOperand(0, Op0I->getOperand(0)); + I.setOperand(1, NewRHS); + return &I; + } + } + } + } + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 + if (X == Op1) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + + if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 + if (X == Op0) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); + + + BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); + if (Op1I) { + Value *A, *B; + if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { + if (A == Op0) { // B^(B|A) == (A|B)^B + Op1I->swapOperands(); + I.swapOperands(); + std::swap(Op0, Op1); + } else if (B == Op0) { // B^(A|B) == (A|B)^B + I.swapOperands(); // Simplified below. + std::swap(Op0, Op1); + } + } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { + return ReplaceInstUsesWith(I, B); // A^(A^B) == B + } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { + return ReplaceInstUsesWith(I, A); // A^(B^A) == B + } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && + Op1I->hasOneUse()){ + if (A == Op0) { // A^(A&B) -> A^(B&A) + Op1I->swapOperands(); + std::swap(A, B); + } + if (B == Op0) { // A^(B&A) -> (B&A)^A + I.swapOperands(); // Simplified below. + std::swap(Op0, Op1); + } + } + } + + BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0); + if (Op0I) { + Value *A, *B; + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()) { + if (A == Op1) // (B|A)^B == (A|B)^B + std::swap(A, B); + if (B == Op1) // (A|B)^B == A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); + } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { + return ReplaceInstUsesWith(I, B); // (A^B)^A == B + } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { + return ReplaceInstUsesWith(I, A); // (B^A)^A == B + } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()){ + if (A == Op1) // (A&B)^A -> (B&A)^A + std::swap(A, B); + if (B == Op1 && // (B&A)^A == ~B & A + !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C + return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); + } + } + } + + // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. + if (Op0I && Op1I && Op0I->isShift() && + Op0I->getOpcode() == Op1I->getOpcode() && + Op0I->getOperand(1) == Op1I->getOperand(1) && + (Op1I->hasOneUse() || Op1I->hasOneUse())) { + Value *NewOp = + Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), + Op0I->getName()); + return BinaryOperator::Create(Op1I->getOpcode(), NewOp, + Op1I->getOperand(1)); + } + + if (Op0I && Op1I) { + Value *A, *B, *C, *D; + // (A & B)^(A | B) -> A ^ B + if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + match(Op1I, m_Or(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + // (A | B)^(A & B) -> A ^ B + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + match(Op1I, m_And(m_Value(C), m_Value(D)))) { + if ((A == C && B == D) || (A == D && B == C)) + return BinaryOperator::CreateXor(A, B); + } + + // (A & B)^(C & D) + if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && + match(Op0I, m_And(m_Value(A), m_Value(B))) && + match(Op1I, m_And(m_Value(C), m_Value(D)))) { + // (X & Y)^(X & Y) -> (Y^Z) & X + Value *X = 0, *Y = 0, *Z = 0; + if (A == C) + X = A, Y = B, Z = D; + else if (A == D) + X = A, Y = B, Z = C; + else if (B == C) + X = B, Y = A, Z = D; + else if (B == D) + X = B, Y = A, Z = C; + + if (X) { + Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); + return BinaryOperator::CreateAnd(NewOp, X); + } + } + } + + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) + if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) + if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + Value *RV = getICmpValue(isSigned, Code, Op0, Op1); + if (Instruction *I = dyn_cast<Instruction>(RV)) + return I; + // Otherwise, it's a constant boolean value. + return ReplaceInstUsesWith(I, RV); + } + } + + // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) + if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { + if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) + if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + // Only do this if the casts both really cause code to be generated. + ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { + Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); + return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); + } + } + } + + return Changed ? &I : 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp new file mode 100644 index 0000000..47c37c4 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -0,0 +1,1142 @@ +//===- InstCombineCalls.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitCall and visitInvoke functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Analysis/MemoryBuiltins.h" +using namespace llvm; + +/// getPromotedType - Return the specified type promoted as it would be to pass +/// though a va_arg area. +static const Type *getPromotedType(const Type *Ty) { + if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { + if (ITy->getBitWidth() < 32) + return Type::getInt32Ty(Ty->getContext()); + } + return Ty; +} + +/// EnforceKnownAlignment - If the specified pointer points to an object that +/// we control, modify the object's alignment to PrefAlign. This isn't +/// often possible though. If alignment is important, a more reliable approach +/// is to simply align all global variables and allocation instructions to +/// their preferred alignment from the beginning. +/// +static unsigned EnforceKnownAlignment(Value *V, + unsigned Align, unsigned PrefAlign) { + + User *U = dyn_cast<User>(V); + if (!U) return Align; + + switch (Operator::getOpcode(U)) { + default: break; + case Instruction::BitCast: + return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); + case Instruction::GetElementPtr: { + // If all indexes are zero, it is just the alignment of the base pointer. + bool AllZeroOperands = true; + for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) + if (!isa<Constant>(*i) || + !cast<Constant>(*i)->isNullValue()) { + AllZeroOperands = false; + break; + } + + if (AllZeroOperands) { + // Treat this like a bitcast. + return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); + } + break; + } + } + + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + // If there is a large requested alignment and we can, bump up the alignment + // of the global. + if (!GV->isDeclaration()) { + if (GV->getAlignment() >= PrefAlign) + Align = GV->getAlignment(); + else { + GV->setAlignment(PrefAlign); + Align = PrefAlign; + } + } + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + // If there is a requested alignment and if this is an alloca, round up. + if (AI->getAlignment() >= PrefAlign) + Align = AI->getAlignment(); + else { + AI->setAlignment(PrefAlign); + Align = PrefAlign; + } + } + + return Align; +} + +/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that +/// we can determine, return it, otherwise return 0. If PrefAlign is specified, +/// and it is more than the alignment of the ultimate object, see if we can +/// increase the alignment of the ultimate object, making this check succeed. +unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, + unsigned PrefAlign) { + unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : + sizeof(PrefAlign) * CHAR_BIT; + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne); + unsigned TrailZ = KnownZero.countTrailingOnes(); + unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + + if (PrefAlign > Align) + Align = EnforceKnownAlignment(V, Align, PrefAlign); + + // We don't need to make any adjustment. + return Align; +} + +Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { + unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); + unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); + unsigned MinAlign = std::min(DstAlign, SrcAlign); + unsigned CopyAlign = MI->getAlignment(); + + if (CopyAlign < MinAlign) { + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + MinAlign, false)); + return MI; + } + + // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with + // load/store. + ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3)); + if (MemOpLength == 0) return 0; + + // Source and destination pointer types are always "i8*" for intrinsic. See + // if the size is something we can handle with a single primitive load/store. + // A single load+store correctly handles overlapping memory in the memmove + // case. + unsigned Size = MemOpLength->getZExtValue(); + if (Size == 0) return MI; // Delete this mem transfer. + + if (Size > 8 || (Size&(Size-1))) + return 0; // If not 1/2/4/8 bytes, exit. + + // Use an integer load+store unless we can find something better. + Type *NewPtrTy = + PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3)); + + // Memcpy forces the use of i8* for the source and destination. That means + // that if you're using memcpy to move one double around, you'll get a cast + // from double* to i8*. We'd much rather use a double load+store rather than + // an i64 load+store, here because this improves the odds that the source or + // dest address will be promotable. See if we can find a better type than the + // integer datatype. + Value *StrippedDest = MI->getOperand(1)->stripPointerCasts(); + if (StrippedDest != MI->getOperand(1)) { + const Type *SrcETy = cast<PointerType>(StrippedDest->getType()) + ->getElementType(); + if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { + // The SrcETy might be something like {{{double}}} or [1 x double]. Rip + // down through these levels if so. + while (!SrcETy->isSingleValueType()) { + if (const StructType *STy = dyn_cast<StructType>(SrcETy)) { + if (STy->getNumElements() == 1) + SrcETy = STy->getElementType(0); + else + break; + } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { + if (ATy->getNumElements() == 1) + SrcETy = ATy->getElementType(); + else + break; + } else + break; + } + + if (SrcETy->isSingleValueType()) + NewPtrTy = PointerType::getUnqual(SrcETy); + } + } + + + // If the memcpy/memmove provides better alignment info than we can + // infer, use it. + SrcAlign = std::max(SrcAlign, CopyAlign); + DstAlign = std::max(DstAlign, CopyAlign); + + Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); + Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy); + Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); + InsertNewInstBefore(L, *MI); + InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); + + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); + return MI; +} + +Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { + unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); + if (MI->getAlignment() < Alignment) { + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + Alignment, false)); + return MI; + } + + // Extract the length and alignment and fill if they are constant. + ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); + ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); + if (!LenC || !FillC || !FillC->getType()->isInteger(8)) + return 0; + uint64_t Len = LenC->getZExtValue(); + Alignment = MI->getAlignment(); + + // If the length is zero, this is a no-op + if (Len == 0) return MI; // memset(d,c,0,a) -> noop + + // memset(s,c,n) -> store s, c (for n=1,2,4,8) + if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { + const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. + + Value *Dest = MI->getDest(); + Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); + + // Alignment 0 is identity for alignment 1 for memset, but not store. + if (Alignment == 0) Alignment = 1; + + // Extract the fill value and store. + uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; + InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), + Dest, false, Alignment), *MI); + + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setLength(Constant::getNullValue(LenC->getType())); + return MI; + } + + return 0; +} + + +/// visitCallInst - CallInst simplification. This mostly only handles folding +/// of intrinsic instructions. For normal calls, it allows visitCallSite to do +/// the heavy lifting. +/// +Instruction *InstCombiner::visitCallInst(CallInst &CI) { + if (isFreeCall(&CI)) + return visitFree(CI); + + // If the caller function is nounwind, mark the call as nounwind, even if the + // callee isn't. + if (CI.getParent()->getParent()->doesNotThrow() && + !CI.doesNotThrow()) { + CI.setDoesNotThrow(); + return &CI; + } + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); + if (!II) return visitCallSite(&CI); + + // Intrinsics cannot occur in an invoke, so handle them here instead of in + // visitCallSite. + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) { + bool Changed = false; + + // memmove/cpy/set of zero bytes is a noop. + if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { + if (NumBytes->isNullValue()) return EraseInstFromFunction(CI); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) + if (CI->getZExtValue() == 1) { + // Replace the instruction with just byte operations. We would + // transform other cases to loads/stores, but we don't know if + // alignment is sufficient. + } + } + + // If we have a memmove and the source operation is a constant global, + // then the source and dest pointers can't alias, so we can change this + // into a call to memcpy. + if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { + if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource())) + if (GVSrc->isConstant()) { + Module *M = CI.getParent()->getParent()->getParent(); + Intrinsic::ID MemCpyID = Intrinsic::memcpy; + const Type *Tys[1]; + Tys[0] = CI.getOperand(3)->getType(); + CI.setOperand(0, + Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); + Changed = true; + } + } + + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { + // memmove(x,x,size) -> noop. + if (MTI->getSource() == MTI->getDest()) + return EraseInstFromFunction(CI); + } + + // If we can determine a pointer alignment that is bigger than currently + // set, update the alignment. + if (isa<MemTransferInst>(MI)) { + if (Instruction *I = SimplifyMemTransfer(MI)) + return I; + } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) { + if (Instruction *I = SimplifyMemSet(MSI)) + return I; + } + + if (Changed) return II; + } + + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::bswap: + // bswap(bswap(x)) -> x + if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1))) + if (Operand->getIntrinsicID() == Intrinsic::bswap) + return ReplaceInstUsesWith(CI, Operand->getOperand(1)); + + // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) + if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) { + if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0))) + if (Operand->getIntrinsicID() == Intrinsic::bswap) { + unsigned C = Operand->getType()->getPrimitiveSizeInBits() - + TI->getType()->getPrimitiveSizeInBits(); + Value *CV = ConstantInt::get(Operand->getType(), C); + Value *V = Builder->CreateLShr(Operand->getOperand(1), CV); + return new TruncInst(V, TI->getType()); + } + } + + break; + case Intrinsic::powi: + if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) { + // powi(x, 0) -> 1.0 + if (Power->isZero()) + return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); + // powi(x, 1) -> x + if (Power->isOne()) + return ReplaceInstUsesWith(CI, II->getOperand(1)); + // powi(x, -1) -> 1/x + if (Power->isAllOnesValue()) + return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), + II->getOperand(1)); + } + break; + case Intrinsic::cttz: { + // If all bits below the first known one are known zero, + // this value is constant. + const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); + uint32_t BitWidth = IT->getBitWidth(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth), + KnownZero, KnownOne); + unsigned TrailingZeros = KnownOne.countTrailingZeros(); + APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); + if ((Mask & KnownZero) == Mask) + return ReplaceInstUsesWith(CI, ConstantInt::get(IT, + APInt(BitWidth, TrailingZeros))); + + } + break; + case Intrinsic::ctlz: { + // If all bits above the first known one are known zero, + // this value is constant. + const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); + uint32_t BitWidth = IT->getBitWidth(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth), + KnownZero, KnownOne); + unsigned LeadingZeros = KnownOne.countLeadingZeros(); + APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); + if ((Mask & KnownZero) == Mask) + return ReplaceInstUsesWith(CI, ConstantInt::get(IT, + APInt(BitWidth, LeadingZeros))); + + } + break; + case Intrinsic::uadd_with_overflow: { + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); + uint32_t BitWidth = IT->getBitWidth(); + APInt Mask = APInt::getSignBit(BitWidth); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; + bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; + + if (LHSKnownNegative || LHSKnownPositive) { + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; + bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; + if (LHSKnownNegative && RHSKnownNegative) { + // The sign bit is set in both cases: this MUST overflow. + // Create a simple add instruction, and insert it into the struct. + Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI); + Worklist.Add(Add); + Constant *V[] = { + UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext()) + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, Add, 0); + } + + if (LHSKnownPositive && RHSKnownPositive) { + // The sign bit is clear in both cases: this CANNOT overflow. + // Create a simple add instruction, and insert it into the struct. + Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI); + Worklist.Add(Add); + Constant *V[] = { + UndefValue::get(LHS->getType()), + ConstantInt::getFalse(II->getContext()) + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, Add, 0); + } + } + } + // FALL THROUGH uadd into sadd + case Intrinsic::sadd_with_overflow: + // Canonicalize constants into the RHS. + if (isa<Constant>(II->getOperand(1)) && + !isa<Constant>(II->getOperand(2))) { + Value *LHS = II->getOperand(1); + II->setOperand(1, II->getOperand(2)); + II->setOperand(2, LHS); + return II; + } + + // X + undef -> undef + if (isa<UndefValue>(II->getOperand(2))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { + // X + 0 -> {X, false} + if (RHS->isZero()) { + Constant *V[] = { + UndefValue::get(II->getOperand(0)->getType()), + ConstantInt::getFalse(II->getContext()) + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, II->getOperand(1), 0); + } + } + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + // undef - X -> undef + // X - undef -> undef + if (isa<UndefValue>(II->getOperand(1)) || + isa<UndefValue>(II->getOperand(2))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { + // X - 0 -> {X, false} + if (RHS->isZero()) { + Constant *V[] = { + UndefValue::get(II->getOperand(1)->getType()), + ConstantInt::getFalse(II->getContext()) + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, II->getOperand(1), 0); + } + } + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + // Canonicalize constants into the RHS. + if (isa<Constant>(II->getOperand(1)) && + !isa<Constant>(II->getOperand(2))) { + Value *LHS = II->getOperand(1); + II->setOperand(1, II->getOperand(2)); + II->setOperand(2, LHS); + return II; + } + + // X * undef -> undef + if (isa<UndefValue>(II->getOperand(2))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) { + // X*0 -> {0, false} + if (RHSI->isZero()) + return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); + + // X * 1 -> {X, false} + if (RHSI->equalsInt(1)) { + Constant *V[] = { + UndefValue::get(II->getOperand(1)->getType()), + ConstantInt::getFalse(II->getContext()) + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, II->getOperand(1), 0); + } + } + break; + case Intrinsic::ppc_altivec_lvx: + case Intrinsic::ppc_altivec_lvxl: + case Intrinsic::x86_sse_loadu_ps: + case Intrinsic::x86_sse2_loadu_pd: + case Intrinsic::x86_sse2_loadu_dq: + // Turn PPC lvx -> load if the pointer is known aligned. + // Turn X86 loadups -> load if the pointer is known aligned. + if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { + Value *Ptr = Builder->CreateBitCast(II->getOperand(1), + PointerType::getUnqual(II->getType())); + return new LoadInst(Ptr); + } + break; + case Intrinsic::ppc_altivec_stvx: + case Intrinsic::ppc_altivec_stvxl: + // Turn stvx -> store if the pointer is known aligned. + if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { + const Type *OpPtrTy = + PointerType::getUnqual(II->getOperand(1)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); + return new StoreInst(II->getOperand(1), Ptr); + } + break; + case Intrinsic::x86_sse_storeu_ps: + case Intrinsic::x86_sse2_storeu_pd: + case Intrinsic::x86_sse2_storeu_dq: + // Turn X86 storeu -> store if the pointer is known aligned. + if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { + const Type *OpPtrTy = + PointerType::getUnqual(II->getOperand(2)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); + return new StoreInst(II->getOperand(2), Ptr); + } + break; + + case Intrinsic::x86_sse_cvttss2si: { + // These intrinsics only demands the 0th element of its input vector. If + // we can simplify the input based on that, do so now. + unsigned VWidth = + cast<VectorType>(II->getOperand(1)->getType())->getNumElements(); + APInt DemandedElts(VWidth, 1); + APInt UndefElts(VWidth, 0); + if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + UndefElts)) { + II->setOperand(1, V); + return II; + } + break; + } + + case Intrinsic::ppc_altivec_vperm: + // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. + if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) { + assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); + + // Check that all of the elements are integer constants or undefs. + bool AllEltsOk = true; + for (unsigned i = 0; i != 16; ++i) { + if (!isa<ConstantInt>(Mask->getOperand(i)) && + !isa<UndefValue>(Mask->getOperand(i))) { + AllEltsOk = false; + break; + } + } + + if (AllEltsOk) { + // Cast the input vectors to byte vectors. + Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); + Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); + Value *Result = UndefValue::get(Op0->getType()); + + // Only extract each element once. + Value *ExtractedElts[32]; + memset(ExtractedElts, 0, sizeof(ExtractedElts)); + + for (unsigned i = 0; i != 16; ++i) { + if (isa<UndefValue>(Mask->getOperand(i))) + continue; + unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue(); + Idx &= 31; // Match the hardware behavior. + + if (ExtractedElts[Idx] == 0) { + ExtractedElts[Idx] = + Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + ConstantInt::get(Type::getInt32Ty(II->getContext()), + Idx&15, false), "tmp"); + } + + // Insert this value into the result vector. + Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], + ConstantInt::get(Type::getInt32Ty(II->getContext()), + i, false), "tmp"); + } + return CastInst::Create(Instruction::BitCast, Result, CI.getType()); + } + } + break; + + case Intrinsic::stackrestore: { + // If the save is right next to the restore, remove the restore. This can + // happen when variable allocas are DCE'd. + if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) { + if (SS->getIntrinsicID() == Intrinsic::stacksave) { + BasicBlock::iterator BI = SS; + if (&*++BI == II) + return EraseInstFromFunction(CI); + } + } + + // Scan down this block to see if there is another stack restore in the + // same block without an intervening call/alloca. + BasicBlock::iterator BI = II; + TerminatorInst *TI = II->getParent()->getTerminator(); + bool CannotRemove = false; + for (++BI; &*BI != TI; ++BI) { + if (isa<AllocaInst>(BI) || isMalloc(BI)) { + CannotRemove = true; + break; + } + if (CallInst *BCI = dyn_cast<CallInst>(BI)) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) { + // If there is a stackrestore below this one, remove this one. + if (II->getIntrinsicID() == Intrinsic::stackrestore) + return EraseInstFromFunction(CI); + // Otherwise, ignore the intrinsic. + } else { + // If we found a non-intrinsic call, we can't remove the stack + // restore. + CannotRemove = true; + break; + } + } + } + + // If the stack restore is in a return/unwind block and if there are no + // allocas or calls between the restore and the return, nuke the restore. + if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI))) + return EraseInstFromFunction(CI); + break; + } + case Intrinsic::objectsize: { + ConstantInt *Const = cast<ConstantInt>(II->getOperand(2)); + const Type *Ty = CI.getType(); + + // 0 is maximum number of bytes left, 1 is minimum number of bytes left. + // TODO: actually add these values, the current return values are "don't + // know". + if (Const->getZExtValue() == 0) + return ReplaceInstUsesWith(CI, Constant::getAllOnesValue(Ty)); + else + return ReplaceInstUsesWith(CI, ConstantInt::get(Ty, 0)); + } + } + + return visitCallSite(II); +} + +// InvokeInst simplification +// +Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { + return visitCallSite(&II); +} + +/// isSafeToEliminateVarargsCast - If this cast does not affect the value +/// passed through the varargs area, we can eliminate the use of the cast. +static bool isSafeToEliminateVarargsCast(const CallSite CS, + const CastInst * const CI, + const TargetData * const TD, + const int ix) { + if (!CI->isLosslessCast()) + return false; + + // The size of ByVal arguments is derived from the type, so we + // can't change to a type with a different size. If the size were + // passed explicitly we could avoid this check. + if (!CS.paramHasAttr(ix, Attribute::ByVal)) + return true; + + const Type* SrcTy = + cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); + const Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); + if (!SrcTy->isSized() || !DstTy->isSized()) + return false; + if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) + return false; + return true; +} + +// visitCallSite - Improvements for call and invoke instructions. +// +Instruction *InstCombiner::visitCallSite(CallSite CS) { + bool Changed = false; + + // If the callee is a constexpr cast of a function, attempt to move the cast + // to the arguments of the call/invoke. + if (transformConstExprCastCall(CS)) return 0; + + Value *Callee = CS.getCalledValue(); + + if (Function *CalleeF = dyn_cast<Function>(Callee)) + if (CalleeF->getCallingConv() != CS.getCallingConv()) { + Instruction *OldCall = CS.getInstruction(); + // If the call and callee calling conventions don't match, this call must + // be unreachable, as the call is undefined. + new StoreInst(ConstantInt::getTrue(Callee->getContext()), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + OldCall); + // If OldCall dues not return void then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!OldCall->getType()->isVoidTy()) + OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); + if (isa<CallInst>(OldCall)) // Not worth removing an invoke here. + return EraseInstFromFunction(*OldCall); + return 0; + } + + if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { + // This instruction is not reachable, just remove it. We insert a store to + // undef so that we know that this code is not reachable, despite the fact + // that we can't modify the CFG here. + new StoreInst(ConstantInt::getTrue(Callee->getContext()), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + CS.getInstruction()); + + // If CS dues not return void then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!CS.getInstruction()->getType()->isVoidTy()) + CS.getInstruction()-> + replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); + + if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { + // Don't break the CFG, insert a dummy cond branch. + BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), + ConstantInt::getTrue(Callee->getContext()), II); + } + return EraseInstFromFunction(*CS.getInstruction()); + } + + if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee)) + if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0))) + if (In->getIntrinsicID() == Intrinsic::init_trampoline) + return transformCallThroughTrampoline(CS); + + const PointerType *PTy = cast<PointerType>(Callee->getType()); + const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + if (FTy->isVarArg()) { + int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1); + // See if we can optimize any arguments passed through the varargs area of + // the call. + for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), + E = CS.arg_end(); I != E; ++I, ++ix) { + CastInst *CI = dyn_cast<CastInst>(*I); + if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) { + *I = CI->getOperand(0); + Changed = true; + } + } + } + + if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) { + // Inline asm calls cannot throw - mark them 'nounwind'. + CS.setDoesNotThrow(); + Changed = true; + } + + return Changed ? CS.getInstruction() : 0; +} + +// transformConstExprCastCall - If the callee is a constexpr cast of a function, +// attempt to move the cast to the arguments of the call/invoke. +// +bool InstCombiner::transformConstExprCastCall(CallSite CS) { + if (!isa<ConstantExpr>(CS.getCalledValue())) return false; + ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue()); + if (CE->getOpcode() != Instruction::BitCast || + !isa<Function>(CE->getOperand(0))) + return false; + Function *Callee = cast<Function>(CE->getOperand(0)); + Instruction *Caller = CS.getInstruction(); + const AttrListPtr &CallerPAL = CS.getAttributes(); + + // Okay, this is a cast from a function to a different type. Unless doing so + // would cause a type conversion of one of our arguments, change this call to + // be a direct call with arguments casted to the appropriate types. + // + const FunctionType *FT = Callee->getFunctionType(); + const Type *OldRetTy = Caller->getType(); + const Type *NewRetTy = FT->getReturnType(); + + if (isa<StructType>(NewRetTy)) + return false; // TODO: Handle multiple return values. + + // Check to see if we are changing the return type... + if (OldRetTy != NewRetTy) { + if (Callee->isDeclaration() && + // Conversion is ok if changing from one pointer type to another or from + // a pointer to an integer of the same size. + !((isa<PointerType>(OldRetTy) || !TD || + OldRetTy == TD->getIntPtrType(Caller->getContext())) && + (isa<PointerType>(NewRetTy) || !TD || + NewRetTy == TD->getIntPtrType(Caller->getContext())))) + return false; // Cannot transform this return value. + + if (!Caller->use_empty() && + // void -> non-void is handled specially + !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy)) + return false; // Cannot transform this return value. + + if (!CallerPAL.isEmpty() && !Caller->use_empty()) { + Attributes RAttrs = CallerPAL.getRetAttributes(); + if (RAttrs & Attribute::typeIncompatible(NewRetTy)) + return false; // Attribute not compatible with transformed value. + } + + // If the callsite is an invoke instruction, and the return value is used by + // a PHI node in a successor, we cannot change the return type of the call + // because there is no place to put the cast instruction (without breaking + // the critical edge). Bail out in this case. + if (!Caller->use_empty()) + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) + for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); + UI != E; ++UI) + if (PHINode *PN = dyn_cast<PHINode>(*UI)) + if (PN->getParent() == II->getNormalDest() || + PN->getParent() == II->getUnwindDest()) + return false; + } + + unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); + unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); + + CallSite::arg_iterator AI = CS.arg_begin(); + for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { + const Type *ParamTy = FT->getParamType(i); + const Type *ActTy = (*AI)->getType(); + + if (!CastInst::isCastable(ActTy, ParamTy)) + return false; // Cannot transform this parameter value. + + if (CallerPAL.getParamAttributes(i + 1) + & Attribute::typeIncompatible(ParamTy)) + return false; // Attribute not compatible with transformed value. + + // Converting from one pointer type to another or between a pointer and an + // integer of the same size is safe even if we do not have a body. + bool isConvertible = ActTy == ParamTy || + (TD && ((isa<PointerType>(ParamTy) || + ParamTy == TD->getIntPtrType(Caller->getContext())) && + (isa<PointerType>(ActTy) || + ActTy == TD->getIntPtrType(Caller->getContext())))); + if (Callee->isDeclaration() && !isConvertible) return false; + } + + if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && + Callee->isDeclaration()) + return false; // Do not delete arguments unless we have a function body. + + if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && + !CallerPAL.isEmpty()) + // In this case we have more arguments than the new function type, but we + // won't be dropping them. Check that these extra arguments have attributes + // that are compatible with being a vararg call argument. + for (unsigned i = CallerPAL.getNumSlots(); i; --i) { + if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) + break; + Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs; + if (PAttrs & Attribute::VarArgsIncompatible) + return false; + } + + // Okay, we decided that this is a safe thing to do: go ahead and start + // inserting cast instructions as necessary... + std::vector<Value*> Args; + Args.reserve(NumActualArgs); + SmallVector<AttributeWithIndex, 8> attrVec; + attrVec.reserve(NumCommonArgs); + + // Get any return attributes. + Attributes RAttrs = CallerPAL.getRetAttributes(); + + // If the return value is not being used, the type may not be compatible + // with the existing attributes. Wipe out any problematic attributes. + RAttrs &= ~Attribute::typeIncompatible(NewRetTy); + + // Add the new return attributes. + if (RAttrs) + attrVec.push_back(AttributeWithIndex::get(0, RAttrs)); + + AI = CS.arg_begin(); + for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { + const Type *ParamTy = FT->getParamType(i); + if ((*AI)->getType() == ParamTy) { + Args.push_back(*AI); + } else { + Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, + false, ParamTy, false); + Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp")); + } + + // Add any parameter attributes. + if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) + attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + } + + // If the function takes more arguments than the call was taking, add them + // now. + for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) + Args.push_back(Constant::getNullValue(FT->getParamType(i))); + + // If we are removing arguments to the function, emit an obnoxious warning. + if (FT->getNumParams() < NumActualArgs) { + if (!FT->isVarArg()) { + errs() << "WARNING: While resolving call to function '" + << Callee->getName() << "' arguments were dropped!\n"; + } else { + // Add all of the arguments in their promoted form to the arg list. + for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { + const Type *PTy = getPromotedType((*AI)->getType()); + if (PTy != (*AI)->getType()) { + // Must promote to pass through va_arg area! + Instruction::CastOps opcode = + CastInst::getCastOpcode(*AI, false, PTy, false); + Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp")); + } else { + Args.push_back(*AI); + } + + // Add any parameter attributes. + if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) + attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + } + } + } + + if (Attributes FnAttrs = CallerPAL.getFnAttributes()) + attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + + if (NewRetTy->isVoidTy()) + Caller->setName(""); // Void type should not have a name. + + const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), + attrVec.end()); + + Instruction *NC; + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(), + Args.begin(), Args.end(), + Caller->getName(), Caller); + cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); + cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); + } else { + NC = CallInst::Create(Callee, Args.begin(), Args.end(), + Caller->getName(), Caller); + CallInst *CI = cast<CallInst>(Caller); + if (CI->isTailCall()) + cast<CallInst>(NC)->setTailCall(); + cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); + cast<CallInst>(NC)->setAttributes(NewCallerPAL); + } + + // Insert a cast of the return type as necessary. + Value *NV = NC; + if (OldRetTy != NV->getType() && !Caller->use_empty()) { + if (!NV->getType()->isVoidTy()) { + Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, + OldRetTy, false); + NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); + + // If this is an invoke instruction, we should insert it after the first + // non-phi, instruction in the normal successor block. + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI(); + InsertNewInstBefore(NC, *I); + } else { + // Otherwise, it's a call, just insert cast right after the call instr + InsertNewInstBefore(NC, *Caller); + } + Worklist.AddUsersToWorkList(*Caller); + } else { + NV = UndefValue::get(Caller->getType()); + } + } + + + if (!Caller->use_empty()) + Caller->replaceAllUsesWith(NV); + + EraseInstFromFunction(*Caller); + return true; +} + +// transformCallThroughTrampoline - Turn a call to a function created by the +// init_trampoline intrinsic into a direct call to the underlying function. +// +Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { + Value *Callee = CS.getCalledValue(); + const PointerType *PTy = cast<PointerType>(Callee->getType()); + const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + const AttrListPtr &Attrs = CS.getAttributes(); + + // If the call already has the 'nest' attribute somewhere then give up - + // otherwise 'nest' would occur twice after splicing in the chain. + if (Attrs.hasAttrSomewhere(Attribute::Nest)) + return 0; + + IntrinsicInst *Tramp = + cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); + + Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts()); + const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); + const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); + + const AttrListPtr &NestAttrs = NestF->getAttributes(); + if (!NestAttrs.isEmpty()) { + unsigned NestIdx = 1; + const Type *NestTy = 0; + Attributes NestAttr = Attribute::None; + + // Look for a parameter marked with the 'nest' attribute. + for (FunctionType::param_iterator I = NestFTy->param_begin(), + E = NestFTy->param_end(); I != E; ++NestIdx, ++I) + if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) { + // Record the parameter type and any other attributes. + NestTy = *I; + NestAttr = NestAttrs.getParamAttributes(NestIdx); + break; + } + + if (NestTy) { + Instruction *Caller = CS.getInstruction(); + std::vector<Value*> NewArgs; + NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); + + SmallVector<AttributeWithIndex, 8> NewAttrs; + NewAttrs.reserve(Attrs.getNumSlots() + 1); + + // Insert the nest argument into the call argument list, which may + // mean appending it. Likewise for attributes. + + // Add any result attributes. + if (Attributes Attr = Attrs.getRetAttributes()) + NewAttrs.push_back(AttributeWithIndex::get(0, Attr)); + + { + unsigned Idx = 1; + CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + do { + if (Idx == NestIdx) { + // Add the chain argument and attributes. + Value *NestVal = Tramp->getOperand(3); + if (NestVal->getType() != NestTy) + NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); + NewArgs.push_back(NestVal); + NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); + } + + if (I == E) + break; + + // Add the original argument and attributes. + NewArgs.push_back(*I); + if (Attributes Attr = Attrs.getParamAttributes(Idx)) + NewAttrs.push_back + (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); + + ++Idx, ++I; + } while (1); + } + + // Add any function attributes. + if (Attributes Attr = Attrs.getFnAttributes()) + NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); + + // The trampoline may have been bitcast to a bogus type (FTy). + // Handle this by synthesizing a new function type, equal to FTy + // with the chain parameter inserted. + + std::vector<const Type*> NewTypes; + NewTypes.reserve(FTy->getNumParams()+1); + + // Insert the chain's type into the list of parameter types, which may + // mean appending it. + { + unsigned Idx = 1; + FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); + + do { + if (Idx == NestIdx) + // Add the chain's type. + NewTypes.push_back(NestTy); + + if (I == E) + break; + + // Add the original type. + NewTypes.push_back(*I); + + ++Idx, ++I; + } while (1); + } + + // Replace the trampoline call with a direct call. Let the generic + // code sort out any function type mismatches. + FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, + FTy->isVarArg()); + Constant *NewCallee = + NestF->getType() == PointerType::getUnqual(NewFTy) ? + NestF : ConstantExpr::getBitCast(NestF, + PointerType::getUnqual(NewFTy)); + const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), + NewAttrs.end()); + + Instruction *NewCaller; + if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { + NewCaller = InvokeInst::Create(NewCallee, + II->getNormalDest(), II->getUnwindDest(), + NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); + cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); + } else { + NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), + Caller->getName(), Caller); + if (cast<CallInst>(Caller)->isTailCall()) + cast<CallInst>(NewCaller)->setTailCall(); + cast<CallInst>(NewCaller)-> + setCallingConv(cast<CallInst>(Caller)->getCallingConv()); + cast<CallInst>(NewCaller)->setAttributes(NewPAL); + } + if (!Caller->getType()->isVoidTy()) + Caller->replaceAllUsesWith(NewCaller); + Caller->eraseFromParent(); + Worklist.Remove(Caller); + return 0; + } + } + + // Replace the trampoline call with a direct call. Since there is no 'nest' + // parameter, there is no need to adjust the argument list. Let the generic + // code sort out any function type mismatches. + Constant *NewCallee = + NestF->getType() == PTy ? NestF : + ConstantExpr::getBitCast(NestF, PTy); + CS.setCalledFunction(NewCallee); + return CS.getInstruction(); +} + diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp new file mode 100644 index 0000000..e018b35 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -0,0 +1,1301 @@ +//===- InstCombineCasts.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for cast operations. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear +/// expression. If so, decompose it, returning some value X, such that Val is +/// X*Scale+Offset. +/// +static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, + int &Offset) { + assert(Val->getType()->isInteger(32) && "Unexpected allocation size type!"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { + Offset = CI->getZExtValue(); + Scale = 0; + return ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0); + } + + if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { + if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { + if (I->getOpcode() == Instruction::Shl) { + // This is a value scaled by '1 << the shift amt'. + Scale = 1U << RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } + + if (I->getOpcode() == Instruction::Mul) { + // This value is scaled by 'RHS'. + Scale = RHS->getZExtValue(); + Offset = 0; + return I->getOperand(0); + } + + if (I->getOpcode() == Instruction::Add) { + // We have X+C. Check to see if we really have (X*C2)+C1, + // where C1 is divisible by C2. + unsigned SubScale; + Value *SubVal = + DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); + Offset += RHS->getZExtValue(); + Scale = SubScale; + return SubVal; + } + } + } + + // Otherwise, we can't look past this. + Scale = 1; + Offset = 0; + return Val; +} + +/// PromoteCastOfAllocation - If we find a cast of an allocation instruction, +/// try to eliminate the cast by moving the type information into the alloc. +Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, + AllocaInst &AI) { + // This requires TargetData to get the alloca alignment and size information. + if (!TD) return 0; + + const PointerType *PTy = cast<PointerType>(CI.getType()); + + BuilderTy AllocaBuilder(*Builder); + AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); + + // Get the type really allocated and the type casted to. + const Type *AllocElTy = AI.getAllocatedType(); + const Type *CastElTy = PTy->getElementType(); + if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; + + unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); + unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy); + if (CastElTyAlign < AllocElTyAlign) return 0; + + // If the allocation has multiple uses, only promote it if we are strictly + // increasing the alignment of the resultant allocation. If we keep it the + // same, we open the door to infinite loops of various kinds. (A reference + // from a dbg.declare doesn't count as a use for this purpose.) + if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) && + CastElTyAlign == AllocElTyAlign) return 0; + + uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); + uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); + if (CastElTySize == 0 || AllocElTySize == 0) return 0; + + // See if we can satisfy the modulus by pulling a scale out of the array + // size argument. + unsigned ArraySizeScale; + int ArrayOffset; + Value *NumElements = // See if the array size is a decomposable linear expr. + DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); + + // If we can now satisfy the modulus, by using a non-1 scale, we really can + // do the xform. + if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || + (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; + + unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; + Value *Amt = 0; + if (Scale == 1) { + Amt = NumElements; + } else { + Amt = ConstantInt::get(Type::getInt32Ty(CI.getContext()), Scale); + // Insert before the alloca, not before the cast. + Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); + } + + if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { + Value *Off = ConstantInt::get(Type::getInt32Ty(CI.getContext()), + Offset, true); + Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); + } + + AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); + New->setAlignment(AI.getAlignment()); + New->takeName(&AI); + + // If the allocation has one real use plus a dbg.declare, just remove the + // declare. + if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) { + EraseInstFromFunction(*(Instruction*)DI); + } + // If the allocation has multiple real uses, insert a cast and change all + // things that used it to use the new cast. This will also hack on CI, but it + // will die soon. + else if (!AI.hasOneUse()) { + // New is the allocation instruction, pointer typed. AI is the original + // allocation instruction, also pointer typed. Thus, cast to use is BitCast. + Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); + AI.replaceAllUsesWith(NewCast); + } + return ReplaceInstUsesWith(CI, New); +} + + + +/// EvaluateInDifferentType - Given an expression that +/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually +/// insert the code to evaluate the expression. +Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, + bool isSigned) { + if (Constant *C = dyn_cast<Constant>(V)) { + C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); + // If we got a constantexpr back, try to simplify it with TD info. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + C = ConstantFoldConstantExpression(CE, TD); + return C; + } + + // Otherwise, it must be an instruction. + Instruction *I = cast<Instruction>(V); + Instruction *Res = 0; + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::AShr: + case Instruction::LShr: + case Instruction::Shl: + case Instruction::UDiv: + case Instruction::URem: { + Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); + Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); + Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + // If the source type of the cast is the type we're trying for then we can + // just return the source. There's no need to insert it because it is not + // new. + if (I->getOperand(0)->getType() == Ty) + return I->getOperand(0); + + // Otherwise, must be the same type of cast, so just reinsert a new one. + // This also handles the case of zext(trunc(x)) -> zext(x). + Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty, + Opc == Instruction::SExt); + break; + case Instruction::Select: { + Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); + Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned); + Res = SelectInst::Create(I->getOperand(0), True, False); + break; + } + case Instruction::PHI: { + PHINode *OPN = cast<PHINode>(I); + PHINode *NPN = PHINode::Create(Ty); + for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { + Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); + NPN->addIncoming(V, OPN->getIncomingBlock(i)); + } + Res = NPN; + break; + } + default: + // TODO: Can handle more cases here. + llvm_unreachable("Unreachable!"); + break; + } + + Res->takeName(I); + return InsertNewInstBefore(Res, *I); +} + + +/// This function is a wrapper around CastInst::isEliminableCastPair. It +/// simply extracts arguments and returns what that function returns. +static Instruction::CastOps +isEliminableCastPair( + const CastInst *CI, ///< The first cast instruction + unsigned opcode, ///< The opcode of the second cast instruction + const Type *DstTy, ///< The target type for the second cast instruction + TargetData *TD ///< The target data for pointer size +) { + + const Type *SrcTy = CI->getOperand(0)->getType(); // A from above + const Type *MidTy = CI->getType(); // B from above + + // Get the opcodes of the two Cast instructions + Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); + Instruction::CastOps secondOp = Instruction::CastOps(opcode); + + unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, + DstTy, + TD ? TD->getIntPtrType(CI->getContext()) : 0); + + // We don't want to form an inttoptr or ptrtoint that converts to an integer + // type that differs from the pointer size. + if ((Res == Instruction::IntToPtr && + (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || + (Res == Instruction::PtrToInt && + (!TD || DstTy != TD->getIntPtrType(CI->getContext())))) + Res = 0; + + return Instruction::CastOps(Res); +} + +/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results +/// in any code being generated. It does not require codegen if V is simple +/// enough or if the cast can be folded into other casts. +bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V, + const Type *Ty) { + if (V->getType() == Ty || isa<Constant>(V)) return false; + + // If this is another cast that can be eliminated, it isn't codegen either. + if (const CastInst *CI = dyn_cast<CastInst>(V)) + if (isEliminableCastPair(CI, opcode, Ty, TD)) + return false; + return true; +} + + +/// @brief Implement the transforms common to all CastInst visitors. +Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { + Value *Src = CI.getOperand(0); + + // Many cases of "cast of a cast" are eliminable. If it's eliminable we just + // eliminate it now. + if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast + if (Instruction::CastOps opc = + isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { + // The first cast (CSrc) is eliminable so we need to fix up or replace + // the second cast (CI). CSrc will then have a good chance of being dead. + return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); + } + } + + // If we are casting a select then fold the cast into the select + if (SelectInst *SI = dyn_cast<SelectInst>(Src)) + if (Instruction *NV = FoldOpIntoSelect(CI, SI)) + return NV; + + // If we are casting a PHI then fold the cast into the PHI + if (isa<PHINode>(Src)) { + // We don't do this if this would create a PHI node with an illegal type if + // it is currently legal. + if (!isa<IntegerType>(Src->getType()) || + !isa<IntegerType>(CI.getType()) || + ShouldChangeType(CI.getType(), Src->getType())) + if (Instruction *NV = FoldOpIntoPhi(CI)) + return NV; + } + + return 0; +} + +/// CanEvaluateTruncated - Return true if we can evaluate the specified +/// expression tree as type Ty instead of its larger type, and arrive with the +/// same value. This is used by code that tries to eliminate truncates. +/// +/// Ty will always be a type smaller than V. We should return true if trunc(V) +/// can be computed by computing V in the smaller type. If V is an instruction, +/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only +/// makes sense if x and y can be efficiently truncated. +/// +/// This function works on both vectors and scalars. +/// +static bool CanEvaluateTruncated(Value *V, const Type *Ty) { + // We can always evaluate constants in another type. + if (isa<Constant>(V)) + return true; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + const Type *OrigTy = V->getType(); + + // If this is an extension from the dest type, we can eliminate it, even if it + // has multiple uses. + if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && + I->getOperand(0)->getType() == Ty) + return true; + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // These operators can all arbitrarily be extended or truncated. + return CanEvaluateTruncated(I->getOperand(0), Ty) && + CanEvaluateTruncated(I->getOperand(1), Ty); + + case Instruction::UDiv: + case Instruction::URem: { + // UDiv and URem can be truncated if all the truncated bits are zero. + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (BitWidth < OrigBitWidth) { + APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); + if (MaskedValueIsZero(I->getOperand(0), Mask) && + MaskedValueIsZero(I->getOperand(1), Mask)) { + return CanEvaluateTruncated(I->getOperand(0), Ty) && + CanEvaluateTruncated(I->getOperand(1), Ty); + } + } + break; + } + case Instruction::Shl: + // If we are truncating the result of this SHL, and if it's a shift of a + // constant amount, we can always perform a SHL in a smaller type. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (CI->getLimitedValue(BitWidth) < BitWidth) + return CanEvaluateTruncated(I->getOperand(0), Ty); + } + break; + case Instruction::LShr: + // If this is a truncate of a logical shr, we can truncate it to a smaller + // lshr iff we know that the bits we would otherwise be shifting in are + // already zeros. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + CI->getLimitedValue(BitWidth) < BitWidth) { + return CanEvaluateTruncated(I->getOperand(0), Ty); + } + } + break; + case Instruction::Trunc: + // trunc(trunc(x)) -> trunc(x) + return true; + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + return CanEvaluateTruncated(SI->getTrueValue(), Ty) && + CanEvaluateTruncated(SI->getFalseValue(), Ty); + } + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty)) + return false; + return true; + } + default: + // TODO: Can handle more cases here. + break; + } + + return false; +} + +Instruction *InstCombiner::visitTrunc(TruncInst &CI) { + if (Instruction *Result = commonCastTransforms(CI)) + return Result; + + // See if we can simplify any instructions used by the input whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(CI)) + return &CI; + + Value *Src = CI.getOperand(0); + const Type *DestTy = CI.getType(), *SrcTy = Src->getType(); + + // Attempt to truncate the entire input expression tree to the destination + // type. Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) && + CanEvaluateTruncated(Src, DestTy)) { + + // If this cast is a truncate, evaluting in a different type always + // eliminates the cast, so it is always a win. + DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid cast: " << CI); + Value *Res = EvaluateInDifferentType(Src, DestTy, false); + assert(Res->getType() == DestTy); + return ReplaceInstUsesWith(CI, Res); + } + + // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector. + if (DestTy->getScalarSizeInBits() == 1) { + Constant *One = ConstantInt::get(Src->getType(), 1); + Src = Builder->CreateAnd(Src, One, "tmp"); + Value *Zero = Constant::getNullValue(Src->getType()); + return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); + } + + return 0; +} + +/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations +/// in order to eliminate the icmp. +Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, + bool DoXform) { + // If we are just checking for a icmp eq of a single bit and zext'ing it + // to an integer, then shift the bit to the appropriate place and then + // cast to integer to avoid the comparison. + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) { + const APInt &Op1CV = Op1C->getValue(); + + // zext (x <s 0) to i32 --> x>>u31 true if signbit set. + // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || + (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) { + if (!DoXform) return ICI; + + Value *In = ICI->getOperand(0); + Value *Sh = ConstantInt::get(In->getType(), + In->getType()->getScalarSizeInBits()-1); + In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); + if (In->getType() != CI.getType()) + In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp"); + + if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { + Constant *One = ConstantInt::get(In->getType(), 1); + In = Builder->CreateXor(In, One, In->getName()+".not"); + } + + return ReplaceInstUsesWith(CI, In); + } + + + + // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. + // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. + // zext (X == 1) to i32 --> X iff X has only the low bit set. + // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set. + // zext (X != 0) to i32 --> X iff X has only the low bit set. + // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. + // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. + // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. + if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + // This only works for EQ and NE + ICI->isEquality()) { + // If Op1C some other power of two, convert: + uint32_t BitWidth = Op1C->getType()->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne); + + APInt KnownZeroMask(~KnownZero); + if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? + if (!DoXform) return ICI; + + bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; + if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { + // (X&4) == 2 --> false + // (X&4) != 2 --> true + Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()), + isNE); + Res = ConstantExpr::getZExt(Res, CI.getType()); + return ReplaceInstUsesWith(CI, Res); + } + + uint32_t ShiftAmt = KnownZeroMask.logBase2(); + Value *In = ICI->getOperand(0); + if (ShiftAmt) { + // Perform a logical shr by shiftamt. + // Insert the shift to put the result in the low bit. + In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), + In->getName()+".lobit"); + } + + if ((Op1CV != 0) == isNE) { // Toggle the low bit. + Constant *One = ConstantInt::get(In->getType(), 1); + In = Builder->CreateXor(In, One, "tmp"); + } + + if (CI.getType() == In->getType()) + return ReplaceInstUsesWith(CI, In); + else + return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); + } + } + } + + // icmp ne A, B is equal to xor A, B when A and B only really have one bit. + // It is also profitable to transform icmp eq into not(xor(A, B)) because that + // may lead to additional simplifications. + if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { + if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { + uint32_t BitWidth = ITy->getBitWidth(); + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + + APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); + APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); + ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); + + if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { + APInt KnownBits = KnownZeroLHS | KnownOneLHS; + APInt UnknownBit = ~KnownBits; + if (UnknownBit.countPopulation() == 1) { + if (!DoXform) return ICI; + + Value *Result = Builder->CreateXor(LHS, RHS); + + // Mask off any bits that are set and won't be shifted away. + if (KnownOneLHS.uge(UnknownBit)) + Result = Builder->CreateAnd(Result, + ConstantInt::get(ITy, UnknownBit)); + + // Shift the bit we're testing down to the lsb. + Result = Builder->CreateLShr( + Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); + Result->takeName(ICI); + return ReplaceInstUsesWith(CI, Result); + } + } + } + } + + return 0; +} + +/// CanEvaluateZExtd - Determine if the specified value can be computed in the +/// specified wider type and produce the same low bits. If not, return false. +/// +/// If this function returns true, it can also return a non-zero number of bits +/// (in BitsToClear) which indicates that the value it computes is correct for +/// the zero extend, but that the additional BitsToClear bits need to be zero'd +/// out. For example, to promote something like: +/// +/// %B = trunc i64 %A to i32 +/// %C = lshr i32 %B, 8 +/// %E = zext i32 %C to i64 +/// +/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be +/// set to 8 to indicate that the promoted value needs to have bits 24-31 +/// cleared in addition to bits 32-63. Since an 'and' will be generated to +/// clear the top bits anyway, doing this has no extra cost. +/// +/// This function works on both vectors and scalars. +static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) { + BitsToClear = 0; + if (isa<Constant>(V)) + return true; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // If the input is a truncate from the destination type, we can trivially + // eliminate it, even if it has multiple uses. + // FIXME: This is currently disabled until codegen can handle this without + // pessimizing code, PR5997. + if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) + return true; + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + unsigned Opc = I->getOpcode(), Tmp; + switch (Opc) { + case Instruction::ZExt: // zext(zext(x)) -> zext(x). + case Instruction::SExt: // zext(sext(x)) -> sext(x). + case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x) + return true; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::Shl: + if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) || + !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp)) + return false; + // These can all be promoted if neither operand has 'bits to clear'. + if (BitsToClear == 0 && Tmp == 0) + return true; + + // If the operation is an AND/OR/XOR and the bits to clear are zero in the + // other side, BitsToClear is ok. + if (Tmp == 0 && + (Opc == Instruction::And || Opc == Instruction::Or || + Opc == Instruction::Xor)) { + // We use MaskedValueIsZero here for generality, but the case we care + // about the most is constant RHS. + unsigned VSize = V->getType()->getScalarSizeInBits(); + if (MaskedValueIsZero(I->getOperand(1), + APInt::getHighBitsSet(VSize, BitsToClear))) + return true; + } + + // Otherwise, we don't know how to analyze this BitsToClear case yet. + return false; + + case Instruction::LShr: + // We can promote lshr(x, cst) if we can promote x. This requires the + // ultimate 'and' to clear out the high zero bits we're clearing out though. + if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) { + if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear)) + return false; + BitsToClear += Amt->getZExtValue(); + if (BitsToClear > V->getType()->getScalarSizeInBits()) + BitsToClear = V->getType()->getScalarSizeInBits(); + return true; + } + // Cannot promote variable LSHR. + return false; + case Instruction::Select: + if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) || + !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) || + // TODO: If important, we could handle the case when the BitsToClear are + // known zero in the disagreeing side. + Tmp != BitsToClear) + return false; + return true; + + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear)) + return false; + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) || + // TODO: If important, we could handle the case when the BitsToClear + // are known zero in the disagreeing input. + Tmp != BitsToClear) + return false; + return true; + } + default: + // TODO: Can handle more cases here. + return false; + } +} + +Instruction *InstCombiner::visitZExt(ZExtInst &CI) { + // If this zero extend is only used by a truncate, let the truncate by + // eliminated before we try to optimize this zext. + if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) + return 0; + + // If one of the common conversion will work, do it. + if (Instruction *Result = commonCastTransforms(CI)) + return Result; + + // See if we can simplify any instructions used by the input whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(CI)) + return &CI; + + Value *Src = CI.getOperand(0); + const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + + // Attempt to extend the entire input expression tree to the destination + // type. Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + unsigned BitsToClear; + if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) && + CanEvaluateZExtd(Src, DestTy, BitsToClear)) { + assert(BitsToClear < SrcTy->getScalarSizeInBits() && + "Unreasonable BitsToClear"); + + // Okay, we can transform this! Insert the new expression now. + DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid zero extend: " << CI); + Value *Res = EvaluateInDifferentType(Src, DestTy, false); + assert(Res->getType() == DestTy); + + uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear; + uint32_t DestBitSize = DestTy->getScalarSizeInBits(); + + // If the high bits are already filled with zeros, just replace this + // cast with the result. + if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize, + DestBitSize-SrcBitsKept))) + return ReplaceInstUsesWith(CI, Res); + + // We need to emit an AND to clear the high bits. + Constant *C = ConstantInt::get(Res->getType(), + APInt::getLowBitsSet(DestBitSize, SrcBitsKept)); + return BinaryOperator::CreateAnd(Res, C); + } + + // If this is a TRUNC followed by a ZEXT then we are dealing with integral + // types and if the sizes are just right we can convert this into a logical + // 'and' which will be much cheaper than the pair of casts. + if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast + // TODO: Subsume this into EvaluateInDifferentType. + + // Get the sizes of the types involved. We know that the intermediate type + // will be smaller than A or C, but don't know the relation between A and C. + Value *A = CSrc->getOperand(0); + unsigned SrcSize = A->getType()->getScalarSizeInBits(); + unsigned MidSize = CSrc->getType()->getScalarSizeInBits(); + unsigned DstSize = CI.getType()->getScalarSizeInBits(); + // If we're actually extending zero bits, then if + // SrcSize < DstSize: zext(a & mask) + // SrcSize == DstSize: a & mask + // SrcSize > DstSize: trunc(a) & mask + if (SrcSize < DstSize) { + APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); + Constant *AndConst = ConstantInt::get(A->getType(), AndValue); + Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); + return new ZExtInst(And, CI.getType()); + } + + if (SrcSize == DstSize) { + APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); + return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), + AndValue)); + } + if (SrcSize > DstSize) { + Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp"); + APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); + return BinaryOperator::CreateAnd(Trunc, + ConstantInt::get(Trunc->getType(), + AndValue)); + } + } + + if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) + return transformZExtICmp(ICI, CI); + + BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src); + if (SrcI && SrcI->getOpcode() == Instruction::Or) { + // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one + // of the (zext icmp) will be transformed. + ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0)); + ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1)); + if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && + (transformZExtICmp(LHS, CI, false) || + transformZExtICmp(RHS, CI, false))) { + Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); + Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); + return BinaryOperator::Create(Instruction::Or, LCast, RCast); + } + } + + // zext(trunc(t) & C) -> (t & zext(C)). + if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse()) + if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) + if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) { + Value *TI0 = TI->getOperand(0); + if (TI0->getType() == CI.getType()) + return + BinaryOperator::CreateAnd(TI0, + ConstantExpr::getZExt(C, CI.getType())); + } + + // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). + if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse()) + if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) + if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0))) + if (And->getOpcode() == Instruction::And && And->hasOneUse() && + And->getOperand(1) == C) + if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) { + Value *TI0 = TI->getOperand(0); + if (TI0->getType() == CI.getType()) { + Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); + Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp"); + return BinaryOperator::CreateXor(NewAnd, ZC); + } + } + + // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1 + Value *X; + if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isInteger(1) && + match(SrcI, m_Not(m_Value(X))) && + (!X->hasOneUse() || !isa<CmpInst>(X))) { + Value *New = Builder->CreateZExt(X, CI.getType()); + return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); + } + + return 0; +} + +/// CanEvaluateSExtd - Return true if we can take the specified value +/// and return it as type Ty without inserting any new casts and without +/// changing the value of the common low bits. This is used by code that tries +/// to promote integer operations to a wider types will allow us to eliminate +/// the extension. +/// +/// This function works on both vectors and scalars. +/// +static bool CanEvaluateSExtd(Value *V, const Type *Ty) { + assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() && + "Can't sign extend type to a smaller type"); + // If this is a constant, it can be trivially promoted. + if (isa<Constant>(V)) + return true; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // If this is a truncate from the dest type, we can trivially eliminate it, + // even if it has multiple uses. + // FIXME: This is currently disabled until codegen can handle this without + // pessimizing code, PR5997. + if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) + return true; + + // We can't extend or shrink something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + switch (I->getOpcode()) { + case Instruction::SExt: // sext(sext(x)) -> sext(x) + case Instruction::ZExt: // sext(zext(x)) -> zext(x) + case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x) + return true; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // These operators can all arbitrarily be extended if their inputs can. + return CanEvaluateSExtd(I->getOperand(0), Ty) && + CanEvaluateSExtd(I->getOperand(1), Ty); + + //case Instruction::Shl: TODO + //case Instruction::LShr: TODO + + case Instruction::Select: + return CanEvaluateSExtd(I->getOperand(1), Ty) && + CanEvaluateSExtd(I->getOperand(2), Ty); + + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false; + return true; + } + default: + // TODO: Can handle more cases here. + break; + } + + return false; +} + +Instruction *InstCombiner::visitSExt(SExtInst &CI) { + // If this sign extend is only used by a truncate, let the truncate by + // eliminated before we try to optimize this zext. + if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) + return 0; + + if (Instruction *I = commonCastTransforms(CI)) + return I; + + // See if we can simplify any instructions used by the input whose sole + // purpose is to compute bits we don't care about. + if (SimplifyDemandedInstructionBits(CI)) + return &CI; + + Value *Src = CI.getOperand(0); + const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + + // Canonicalize sign-extend from i1 to a select. + if (Src->getType()->isInteger(1)) + return SelectInst::Create(Src, + Constant::getAllOnesValue(CI.getType()), + Constant::getNullValue(CI.getType())); + + // Attempt to extend the entire input expression tree to the destination + // type. Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) && + CanEvaluateSExtd(Src, DestTy)) { + // Okay, we can transform this! Insert the new expression now. + DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid sign extend: " << CI); + Value *Res = EvaluateInDifferentType(Src, DestTy, true); + assert(Res->getType() == DestTy); + + uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); + uint32_t DestBitSize = DestTy->getScalarSizeInBits(); + + // If the high bits are already filled with sign bit, just replace this + // cast with the result. + if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize) + return ReplaceInstUsesWith(CI, Res); + + // We need to emit a shl + ashr to do the sign extend. + Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); + return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), + ShAmt); + } + + // If the input is a shl/ashr pair of a same constant, then this is a sign + // extension from a smaller value. If we could trust arbitrary bitwidth + // integers, we could turn this into a truncate to the smaller bit and then + // use a sext for the whole extension. Since we don't, look deeper and check + // for a truncate. If the source and dest are the same type, eliminate the + // trunc and extend and just do shifts. For example, turn: + // %a = trunc i32 %i to i8 + // %b = shl i8 %a, 6 + // %c = ashr i8 %b, 6 + // %d = sext i8 %c to i32 + // into: + // %a = shl i32 %i, 30 + // %d = ashr i32 %a, 30 + Value *A = 0; + // TODO: Eventually this could be subsumed by EvaluateInDifferentType. + ConstantInt *BA = 0, *CA = 0; + if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)), + m_ConstantInt(CA))) && + BA == CA && A->getType() == CI.getType()) { + unsigned MidSize = Src->getType()->getScalarSizeInBits(); + unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); + unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; + Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); + A = Builder->CreateShl(A, ShAmtV, CI.getName()); + return BinaryOperator::CreateAShr(A, ShAmtV); + } + + return 0; +} + + +/// FitsInFPType - Return a Constant* for the specified FP constant if it fits +/// in the specified FP type without changing its value. +static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { + bool losesInfo; + APFloat F = CFP->getValueAPF(); + (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); + if (!losesInfo) + return ConstantFP::get(CFP->getContext(), F); + return 0; +} + +/// LookThroughFPExtensions - If this is an fp extension instruction, look +/// through it until we get the source value. +static Value *LookThroughFPExtensions(Value *V) { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (I->getOpcode() == Instruction::FPExt) + return LookThroughFPExtensions(I->getOperand(0)); + + // If this value is a constant, return the constant in the smallest FP type + // that can accurately represent it. This allows us to turn + // (float)((double)X+2.0) into x+2.0f. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { + if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext())) + return V; // No constant folding of this. + // See if the value can be truncated to float and then reextended. + if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle)) + return V; + if (CFP->getType()->isDoubleTy()) + return V; // Won't shrink. + if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble)) + return V; + // Don't try to shrink to various long double types. + } + + return V; +} + +Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { + if (Instruction *I = commonCastTransforms(CI)) + return I; + + // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are + // smaller than the destination type, we can eliminate the truncate by doing + // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well + // as many builtins (sqrt, etc). + BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0)); + if (OpI && OpI->hasOneUse()) { + switch (OpI->getOpcode()) { + default: break; + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + const Type *SrcTy = OpI->getType(); + Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); + Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); + if (LHSTrunc->getType() != SrcTy && + RHSTrunc->getType() != SrcTy) { + unsigned DstSize = CI.getType()->getScalarSizeInBits(); + // If the source types were both smaller than the destination type of + // the cast, do this xform. + if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && + RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { + LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); + RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); + return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); + } + } + break; + } + } + return 0; +} + +Instruction *InstCombiner::visitFPExt(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { + Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); + if (OpI == 0) + return commonCastTransforms(FI); + + // fptoui(uitofp(X)) --> X + // fptoui(sitofp(X)) --> X + // This is safe if the intermediate type has enough bits in its mantissa to + // accurately represent all values of X. For example, do not do this with + // i64->float->i64. This is also safe for sitofp case, because any negative + // 'X' value would cause an undefined result for the fptoui. + if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && + OpI->getOperand(0)->getType() == FI.getType() && + (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ + OpI->getType()->getFPMantissaWidth()) + return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + + return commonCastTransforms(FI); +} + +Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { + Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); + if (OpI == 0) + return commonCastTransforms(FI); + + // fptosi(sitofp(X)) --> X + // fptosi(uitofp(X)) --> X + // This is safe if the intermediate type has enough bits in its mantissa to + // accurately represent all values of X. For example, do not do this with + // i64->float->i64. This is also safe for sitofp case, because any negative + // 'X' value would cause an undefined result for the fptoui. + if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && + OpI->getOperand(0)->getType() == FI.getType() && + (int)FI.getType()->getScalarSizeInBits() <= + OpI->getType()->getFPMantissaWidth()) + return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + + return commonCastTransforms(FI); +} + +Instruction *InstCombiner::visitUIToFP(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitSIToFP(CastInst &CI) { + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { + // If the source integer type is larger than the intptr_t type for + // this target, do a trunc to the intptr_t type, then inttoptr of it. This + // allows the trunc to be exposed to other transforms. Don't do this for + // extending inttoptr's, because we don't know if the target sign or zero + // extends to pointers. + if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() > + TD->getPointerSizeInBits()) { + Value *P = Builder->CreateTrunc(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), "tmp"); + return new IntToPtrInst(P, CI.getType()); + } + + if (Instruction *I = commonCastTransforms(CI)) + return I; + + return 0; +} + +/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) +Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { + Value *Src = CI.getOperand(0); + + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { + // If casting the result of a getelementptr instruction with no offset, turn + // this into a cast of the original pointer! + if (GEP->hasAllZeroIndices()) { + // Changing the cast operand is usually not a good idea but it is safe + // here because the pointer operand is being replaced with another + // pointer operand so the opcode doesn't need to change. + Worklist.Add(GEP); + CI.setOperand(0, GEP->getOperand(0)); + return &CI; + } + + // If the GEP has a single use, and the base pointer is a bitcast, and the + // GEP computes a constant offset, see if we can convert these three + // instructions into fewer. This typically happens with unions and other + // non-type-safe code. + if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) && + GEP->hasAllConstantIndices()) { + // We are guaranteed to get a constant from EmitGEPOffset. + ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP)); + int64_t Offset = OffsetV->getSExtValue(); + + // Get the base pointer input of the bitcast, and the type it points to. + Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); + const Type *GEPIdxTy = + cast<PointerType>(OrigBase->getType())->getElementType(); + SmallVector<Value*, 8> NewIndices; + if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) { + // If we were able to index down into an element, create the GEP + // and bitcast the result. This eliminates one bitcast, potentially + // two. + Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? + Builder->CreateInBoundsGEP(OrigBase, + NewIndices.begin(), NewIndices.end()) : + Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); + NGEP->takeName(GEP); + + if (isa<BitCastInst>(CI)) + return new BitCastInst(NGEP, CI.getType()); + assert(isa<PtrToIntInst>(CI)); + return new PtrToIntInst(NGEP, CI.getType()); + } + } + } + + return commonCastTransforms(CI); +} + +Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { + // If the destination integer type is smaller than the intptr_t type for + // this target, do a ptrtoint to intptr_t then do a trunc. This allows the + // trunc to be exposed to other transforms. Don't do this for extending + // ptrtoint's, because we don't know if the target sign or zero extends its + // pointers. + if (TD && + CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), + "tmp"); + return new TruncInst(P, CI.getType()); + } + + return commonPointerCastTransforms(CI); +} + +Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { + // If the operands are integer typed then apply the integer transforms, + // otherwise just apply the common ones. + Value *Src = CI.getOperand(0); + const Type *SrcTy = Src->getType(); + const Type *DestTy = CI.getType(); + + // Get rid of casts from one type to the same type. These are useless and can + // be replaced by the operand. + if (DestTy == Src->getType()) + return ReplaceInstUsesWith(CI, Src); + + if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { + const PointerType *SrcPTy = cast<PointerType>(SrcTy); + const Type *DstElTy = DstPTy->getElementType(); + const Type *SrcElTy = SrcPTy->getElementType(); + + // If the address spaces don't match, don't eliminate the bitcast, which is + // required for changing types. + if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) + return 0; + + // If we are casting a alloca to a pointer to a type of the same + // size, rewrite the allocation instruction to allocate the "right" type. + // There is no need to modify malloc calls because it is their bitcast that + // needs to be cleaned up. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Src)) + if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) + return V; + + // If the source and destination are pointers, and this cast is equivalent + // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. + // This can enhance SROA and other transforms that want type-safe pointers. + Constant *ZeroUInt = + Constant::getNullValue(Type::getInt32Ty(CI.getContext())); + unsigned NumZeros = 0; + while (SrcElTy != DstElTy && + isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) && + SrcElTy->getNumContainedTypes() /* not "{}" */) { + SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt); + ++NumZeros; + } + + // If we found a path from the src to dest, create the getelementptr now. + if (SrcElTy == DstElTy) { + SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); + return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(),"", + ((Instruction*)NULL)); + } + } + + if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { + if (DestVTy->getNumElements() == 1 && !isa<VectorType>(SrcTy)) { + Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); + return InsertElementInst::Create(UndefValue::get(DestTy), Elem, + Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); + // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) + } + } + + if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { + if (SrcVTy->getNumElements() == 1 && !isa<VectorType>(DestTy)) { + Value *Elem = + Builder->CreateExtractElement(Src, + Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); + return CastInst::Create(Instruction::BitCast, Elem, DestTy); + } + } + + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) { + // Okay, we have (bitcast (shuffle ..)). Check to see if this is + // a bitconvert to a vector with the same # elts. + if (SVI->hasOneUse() && isa<VectorType>(DestTy) && + cast<VectorType>(DestTy)->getNumElements() == + SVI->getType()->getNumElements() && + SVI->getType()->getNumElements() == + cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) { + BitCastInst *Tmp; + // If either of the operands is a cast from CI.getType(), then + // evaluating the shuffle in the casted destination's type will allow + // us to eliminate at least one cast. + if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && + Tmp->getOperand(0)->getType() == DestTy) || + ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && + Tmp->getOperand(0)->getType() == DestTy)) { + Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); + Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); + // Return a new shuffle vector. Use the same element ID's, as we + // know the vector types match #elts. + return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); + } + } + } + + if (isa<PointerType>(SrcTy)) + return commonPointerCastTransforms(CI); + return commonCastTransforms(CI); +} diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp new file mode 100644 index 0000000..e59406c6 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -0,0 +1,2475 @@ +//===- InstCombineCompares.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitICmp and visitFCmp functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// AddOne - Add one to a ConstantInt +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +} +/// SubOne - Subtract one from a ConstantInt +static Constant *SubOne(ConstantInt *C) { + return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); +} + +static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { + return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx)); +} + +static bool HasAddOverflow(ConstantInt *Result, + ConstantInt *In1, ConstantInt *In2, + bool IsSigned) { + if (IsSigned) + if (In2->getValue().isNegative()) + return Result->getValue().sgt(In1->getValue()); + else + return Result->getValue().slt(In1->getValue()); + else + return Result->getValue().ult(In1->getValue()); +} + +/// AddWithOverflow - Compute Result = In1+In2, returning true if the result +/// overflowed for this type. +static bool AddWithOverflow(Constant *&Result, Constant *In1, + Constant *In2, bool IsSigned = false) { + Result = ConstantExpr::getAdd(In1, In2); + + if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); + if (HasAddOverflow(ExtractElement(Result, Idx), + ExtractElement(In1, Idx), + ExtractElement(In2, Idx), + IsSigned)) + return true; + } + return false; + } + + return HasAddOverflow(cast<ConstantInt>(Result), + cast<ConstantInt>(In1), cast<ConstantInt>(In2), + IsSigned); +} + +static bool HasSubOverflow(ConstantInt *Result, + ConstantInt *In1, ConstantInt *In2, + bool IsSigned) { + if (IsSigned) + if (In2->getValue().isNegative()) + return Result->getValue().slt(In1->getValue()); + else + return Result->getValue().sgt(In1->getValue()); + else + return Result->getValue().ugt(In1->getValue()); +} + +/// SubWithOverflow - Compute Result = In1-In2, returning true if the result +/// overflowed for this type. +static bool SubWithOverflow(Constant *&Result, Constant *In1, + Constant *In2, bool IsSigned = false) { + Result = ConstantExpr::getSub(In1, In2); + + if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); + if (HasSubOverflow(ExtractElement(Result, Idx), + ExtractElement(In1, Idx), + ExtractElement(In2, Idx), + IsSigned)) + return true; + } + return false; + } + + return HasSubOverflow(cast<ConstantInt>(Result), + cast<ConstantInt>(In1), cast<ConstantInt>(In2), + IsSigned); +} + +/// isSignBitCheck - Given an exploded icmp instruction, return true if the +/// comparison only checks the sign bit. If it only checks the sign bit, set +/// TrueIfSigned if the result of the comparison is true when the input value is +/// signed. +static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, + bool &TrueIfSigned) { + switch (pred) { + case ICmpInst::ICMP_SLT: // True if LHS s< 0 + TrueIfSigned = true; + return RHS->isZero(); + case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 + TrueIfSigned = true; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_SGT: // True if LHS s> -1 + TrueIfSigned = false; + return RHS->isAllOnesValue(); + case ICmpInst::ICMP_UGT: + // True if LHS u> RHS and RHS == high-bit-mask - 1 + TrueIfSigned = true; + return RHS->getValue() == + APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); + case ICmpInst::ICMP_UGE: + // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = true; + return RHS->getValue().isSignBit(); + default: + return false; + } +} + +// isHighOnes - Return true if the constant is of the form 1+0+. +// This is the same as lowones(~X). +static bool isHighOnes(const ConstantInt *CI) { + return (~CI->getValue() + 1).isPowerOf2(); +} + +/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a +/// set of known zero and one bits, compute the maximum and minimum values that +/// could have the specified known zero and known one bits, returning them in +/// min/max. +static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, + const APInt& KnownOne, + APInt& Min, APInt& Max) { + assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && + KnownZero.getBitWidth() == Min.getBitWidth() && + KnownZero.getBitWidth() == Max.getBitWidth() && + "KnownZero, KnownOne and Min, Max must have equal bitwidth."); + APInt UnknownBits = ~(KnownZero|KnownOne); + + // The minimum value is when all unknown bits are zeros, EXCEPT for the sign + // bit if it is unknown. + Min = KnownOne; + Max = KnownOne|UnknownBits; + + if (UnknownBits.isNegative()) { // Sign bit is unknown + Min.set(Min.getBitWidth()-1); + Max.clear(Max.getBitWidth()-1); + } +} + +// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and +// a set of known zero and one bits, compute the maximum and minimum values that +// could have the specified known zero and known one bits, returning them in +// min/max. +static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, + const APInt &KnownOne, + APInt &Min, APInt &Max) { + assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && + KnownZero.getBitWidth() == Min.getBitWidth() && + KnownZero.getBitWidth() == Max.getBitWidth() && + "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); + APInt UnknownBits = ~(KnownZero|KnownOne); + + // The minimum value is when the unknown bits are all zeros. + Min = KnownOne; + // The maximum value is when the unknown bits are all ones. + Max = KnownOne|UnknownBits; +} + + + +/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern: +/// cmp pred (load (gep GV, ...)), cmpcst +/// where GV is a global variable with a constant initializer. Try to simplify +/// this into some simple computation that does not need the load. For example +/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3". +/// +/// If AndCst is non-null, then the loaded value is masked with that constant +/// before doing the comparison. This handles cases like "A[i]&4 == 0". +Instruction *InstCombiner:: +FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, + CmpInst &ICI, ConstantInt *AndCst) { + // We need TD information to know the pointer size unless this is inbounds. + if (!GEP->isInBounds() && TD == 0) return 0; + + ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer()); + if (Init == 0 || Init->getNumOperands() > 1024) return 0; + + // There are many forms of this optimization we can handle, for now, just do + // the simple index into a single-dimensional array. + // + // Require: GEP GV, 0, i {{, constant indices}} + if (GEP->getNumOperands() < 3 || + !isa<ConstantInt>(GEP->getOperand(1)) || + !cast<ConstantInt>(GEP->getOperand(1))->isZero() || + isa<Constant>(GEP->getOperand(2))) + return 0; + + // Check that indices after the variable are constants and in-range for the + // type they index. Collect the indices. This is typically for arrays of + // structs. + SmallVector<unsigned, 4> LaterIndices; + + const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); + for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { + ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (Idx == 0) return 0; // Variable index. + + uint64_t IdxVal = Idx->getZExtValue(); + if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. + + if (const StructType *STy = dyn_cast<StructType>(EltTy)) + EltTy = STy->getElementType(IdxVal); + else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { + if (IdxVal >= ATy->getNumElements()) return 0; + EltTy = ATy->getElementType(); + } else { + return 0; // Unknown type. + } + + LaterIndices.push_back(IdxVal); + } + + enum { Overdefined = -3, Undefined = -2 }; + + // Variables for our state machines. + + // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form + // "i == 47 | i == 87", where 47 is the first index the condition is true for, + // and 87 is the second (and last) index. FirstTrueElement is -2 when + // undefined, otherwise set to the first true element. SecondTrueElement is + // -2 when undefined, -3 when overdefined and >= 0 when that index is true. + int FirstTrueElement = Undefined, SecondTrueElement = Undefined; + + // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the + // form "i != 47 & i != 87". Same state transitions as for true elements. + int FirstFalseElement = Undefined, SecondFalseElement = Undefined; + + /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these + /// define a state machine that triggers for ranges of values that the index + /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'. + /// This is -2 when undefined, -3 when overdefined, and otherwise the last + /// index in the range (inclusive). We use -2 for undefined here because we + /// use relative comparisons and don't want 0-1 to match -1. + int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined; + + // MagicBitvector - This is a magic bitvector where we set a bit if the + // comparison is true for element 'i'. If there are 64 elements or less in + // the array, this will fully represent all the comparison results. + uint64_t MagicBitvector = 0; + + + // Scan the array and see if one of our patterns matches. + Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); + for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { + Constant *Elt = Init->getOperand(i); + + // If this is indexing an array of structures, get the structure element. + if (!LaterIndices.empty()) + Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(), + LaterIndices.size()); + + // If the element is masked, handle it. + if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); + + // Find out if the comparison would be true or false for the i'th element. + Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, + CompareRHS, TD); + // If the result is undef for this element, ignore it. + if (isa<UndefValue>(C)) { + // Extend range state machines to cover this element in case there is an + // undef in the middle of the range. + if (TrueRangeEnd == (int)i-1) + TrueRangeEnd = i; + if (FalseRangeEnd == (int)i-1) + FalseRangeEnd = i; + continue; + } + + // If we can't compute the result for any of the elements, we have to give + // up evaluating the entire conditional. + if (!isa<ConstantInt>(C)) return 0; + + // Otherwise, we know if the comparison is true or false for this element, + // update our state machines. + bool IsTrueForElt = !cast<ConstantInt>(C)->isZero(); + + // State machine for single/double/range index comparison. + if (IsTrueForElt) { + // Update the TrueElement state machine. + if (FirstTrueElement == Undefined) + FirstTrueElement = TrueRangeEnd = i; // First true element. + else { + // Update double-compare state machine. + if (SecondTrueElement == Undefined) + SecondTrueElement = i; + else + SecondTrueElement = Overdefined; + + // Update range state machine. + if (TrueRangeEnd == (int)i-1) + TrueRangeEnd = i; + else + TrueRangeEnd = Overdefined; + } + } else { + // Update the FalseElement state machine. + if (FirstFalseElement == Undefined) + FirstFalseElement = FalseRangeEnd = i; // First false element. + else { + // Update double-compare state machine. + if (SecondFalseElement == Undefined) + SecondFalseElement = i; + else + SecondFalseElement = Overdefined; + + // Update range state machine. + if (FalseRangeEnd == (int)i-1) + FalseRangeEnd = i; + else + FalseRangeEnd = Overdefined; + } + } + + + // If this element is in range, update our magic bitvector. + if (i < 64 && IsTrueForElt) + MagicBitvector |= 1ULL << i; + + // If all of our states become overdefined, bail out early. Since the + // predicate is expensive, only check it every 8 elements. This is only + // really useful for really huge arrays. + if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && + SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && + FalseRangeEnd == Overdefined) + return 0; + } + + // Now that we've scanned the entire array, emit our new comparison(s). We + // order the state machines in complexity of the generated code. + Value *Idx = GEP->getOperand(2); + + // If the index is larger than the pointer size of the target, truncate the + // index down like the GEP would do implicitly. We don't have to do this for + // an inbounds GEP because the index can't be out of range. + if (!GEP->isInBounds() && + Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits()) + Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext())); + + // If the comparison is only true for one or two elements, emit direct + // comparisons. + if (SecondTrueElement != Overdefined) { + // None true -> false. + if (FirstTrueElement == Undefined) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext())); + + Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); + + // True for one element -> 'i == 47'. + if (SecondTrueElement == Undefined) + return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); + + // True for two elements -> 'i == 47 | i == 72'. + Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); + Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); + Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx); + return BinaryOperator::CreateOr(C1, C2); + } + + // If the comparison is only false for one or two elements, emit direct + // comparisons. + if (SecondFalseElement != Overdefined) { + // None false -> true. + if (FirstFalseElement == Undefined) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext())); + + Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); + + // False for one element -> 'i != 47'. + if (SecondFalseElement == Undefined) + return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); + + // False for two elements -> 'i != 47 & i != 72'. + Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); + Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); + Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); + return BinaryOperator::CreateAnd(C1, C2); + } + + // If the comparison can be replaced with a range comparison for the elements + // where it is true, emit the range check. + if (TrueRangeEnd != Overdefined) { + assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare"); + + // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). + if (FirstTrueElement) { + Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); + Idx = Builder->CreateAdd(Idx, Offs); + } + + Value *End = ConstantInt::get(Idx->getType(), + TrueRangeEnd-FirstTrueElement+1); + return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); + } + + // False range check. + if (FalseRangeEnd != Overdefined) { + assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare"); + // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). + if (FirstFalseElement) { + Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); + Idx = Builder->CreateAdd(Idx, Offs); + } + + Value *End = ConstantInt::get(Idx->getType(), + FalseRangeEnd-FirstFalseElement); + return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); + } + + + // If a 32-bit or 64-bit magic bitvector captures the entire comparison state + // of this load, replace it with computation that does: + // ((magic_cst >> i) & 1) != 0 + if (Init->getNumOperands() <= 32 || + (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) { + const Type *Ty; + if (Init->getNumOperands() <= 32) + Ty = Type::getInt32Ty(Init->getContext()); + else + Ty = Type::getInt64Ty(Init->getContext()); + Value *V = Builder->CreateIntCast(Idx, Ty, false); + V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); + V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); + return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + } + + return 0; +} + + +/// EvaluateGEPOffsetExpression - Return a value that can be used to compare +/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we +/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can +/// be complex, and scales are involved. The above expression would also be +/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). +/// This later form is less amenable to optimization though, and we are allowed +/// to generate the first by knowing that pointer arithmetic doesn't overflow. +/// +/// If we can't emit an optimized form for this expression, this returns null. +/// +static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, + InstCombiner &IC) { + TargetData &TD = *IC.getTargetData(); + gep_type_iterator GTI = gep_type_begin(GEP); + + // Check to see if this gep only has a single variable index. If so, and if + // any constant indices are a multiple of its scale, then we can compute this + // in terms of the scale of the variable index. For example, if the GEP + // implies an offset of "12 + i*4", then we can codegen this as "3 + i", + // because the expression will cross zero at the same point. + unsigned i, e = GEP->getNumOperands(); + int64_t Offset = 0; + for (i = 1; i != e; ++i, ++GTI) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { + // Compute the aggregate offset of constant indices. + if (CI->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size*CI->getSExtValue(); + } + } else { + // Found our variable index. + break; + } + } + + // If there are no variable indices, we must have a constant offset, just + // evaluate it the general way. + if (i == e) return 0; + + Value *VariableIdx = GEP->getOperand(i); + // Determine the scale factor of the variable element. For example, this is + // 4 if the variable index is into an array of i32. + uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); + + // Verify that there are no other variable indices. If so, emit the hard way. + for (++i, ++GTI; i != e; ++i, ++GTI) { + ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (!CI) return 0; + + // Compute the aggregate offset of constant indices. + if (CI->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size*CI->getSExtValue(); + } + } + + // Okay, we know we have a single variable index, which must be a + // pointer/array/vector index. If there is no offset, life is simple, return + // the index. + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + if (Offset == 0) { + // Cast to intptrty in case a truncation occurs. If an extension is needed, + // we don't need to bother extending: the extension won't affect where the + // computation crosses zero. + if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) + VariableIdx = new TruncInst(VariableIdx, + TD.getIntPtrType(VariableIdx->getContext()), + VariableIdx->getName(), &I); + return VariableIdx; + } + + // Otherwise, there is an index. The computation we will do will be modulo + // the pointer size, so get it. + uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); + + Offset &= PtrSizeMask; + VariableScale &= PtrSizeMask; + + // To do this transformation, any constant index must be a multiple of the + // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", + // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a + // multiple of the variable scale. + int64_t NewOffs = Offset / (int64_t)VariableScale; + if (Offset != NewOffs*(int64_t)VariableScale) + return 0; + + // Okay, we can do this evaluation. Start by converting the index to intptr. + const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); + if (VariableIdx->getType() != IntPtrTy) + VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, + true /*SExt*/, + VariableIdx->getName(), &I); + Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); + return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I); +} + +/// FoldGEPICmp - Fold comparisons between a GEP instruction and something +/// else. At this point we know that the GEP is on the LHS of the comparison. +Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, + Instruction &I) { + // Look through bitcasts. + if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS)) + RHS = BCI->getOperand(0); + + Value *PtrBase = GEPLHS->getOperand(0); + if (TD && PtrBase == RHS && GEPLHS->isInBounds()) { + // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). + // This transformation (ignoring the base and scales) is valid because we + // know pointers can't overflow since the gep is inbounds. See if we can + // output an optimized form. + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); + + // If not, synthesize the offset the hard way. + if (Offset == 0) + Offset = EmitGEPOffset(GEPLHS); + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, + Constant::getNullValue(Offset->getType())); + } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) { + // If the base pointers are different, but the indices are the same, just + // compare the base pointer. + if (PtrBase != GEPRHS->getOperand(0)) { + bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); + IndicesTheSame &= GEPLHS->getOperand(0)->getType() == + GEPRHS->getOperand(0)->getType(); + if (IndicesTheSame) + for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) + if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { + IndicesTheSame = false; + break; + } + + // If all indices are the same, just compare the base pointers. + if (IndicesTheSame) + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), + GEPLHS->getOperand(0), GEPRHS->getOperand(0)); + + // Otherwise, the base pointers are different and the indices are + // different, bail out. + return 0; + } + + // If one of the GEPs has all zero indices, recurse. + bool AllZeros = true; + for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) + if (!isa<Constant>(GEPLHS->getOperand(i)) || + !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) { + AllZeros = false; + break; + } + if (AllZeros) + return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), + ICmpInst::getSwappedPredicate(Cond), I); + + // If the other GEP has all zero indices, recurse. + AllZeros = true; + for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) + if (!isa<Constant>(GEPRHS->getOperand(i)) || + !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) { + AllZeros = false; + break; + } + if (AllZeros) + return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); + + if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { + // If the GEPs only differ by one index, compare it. + unsigned NumDifferences = 0; // Keep track of # differences. + unsigned DiffOperand = 0; // The operand that differs. + for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) + if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { + if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != + GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { + // Irreconcilable differences. + NumDifferences = 2; + break; + } else { + if (NumDifferences++) break; + DiffOperand = i; + } + } + + if (NumDifferences == 0) // SAME GEP? + return ReplaceInstUsesWith(I, // No comparison is needed here. + ConstantInt::get(Type::getInt1Ty(I.getContext()), + ICmpInst::isTrueWhenEqual(Cond))); + + else if (NumDifferences == 1) { + Value *LHSV = GEPLHS->getOperand(DiffOperand); + Value *RHSV = GEPRHS->getOperand(DiffOperand); + // Make sure we do a signed comparison here. + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); + } + } + + // Only lower this if the icmp is the only user of the GEP or if we expect + // the result to fold to a constant! + if (TD && + (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && + (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { + // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) + Value *L = EmitGEPOffset(GEPLHS); + Value *R = EmitGEPOffset(GEPRHS); + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); + } + } + return 0; +} + +/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". +Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, + Value *X, ConstantInt *CI, + ICmpInst::Predicate Pred, + Value *TheAdd) { + // If we have X+0, exit early (simplifying logic below) and let it get folded + // elsewhere. icmp X+0, X -> icmp X, X + if (CI->isZero()) { + bool isTrue = ICmpInst::isTrueWhenEqual(Pred); + return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); + } + + // (X+4) == X -> false. + if (Pred == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); + + // (X+4) != X -> true. + if (Pred == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); + + // If this is an instruction (as opposed to constantexpr) get NUW/NSW info. + bool isNUW = false, isNSW = false; + if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) { + isNUW = Add->hasNoUnsignedWrap(); + isNSW = Add->hasNoSignedWrap(); + } + + // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, + // so the values can never be equal. Similiarly for all other "or equals" + // operators. + + // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 + // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 + // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { + // If this is an NUW add, then this is always false. + if (isNUW) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); + + Value *R = + ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI); + return new ICmpInst(ICmpInst::ICMP_UGT, X, R); + } + + // (X+1) >u X --> X <u (0-1) --> X != 255 + // (X+2) >u X --> X <u (0-2) --> X <u 254 + // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { + // If this is an NUW add, then this is always true. + if (isNUW) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); + return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); + } + + unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); + ConstantInt *SMax = ConstantInt::get(X->getContext(), + APInt::getSignedMaxValue(BitWidth)); + + // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127 + // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125 + // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0 + // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 + // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 + // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { + // If this is an NSW add, then we have two cases: if the constant is + // positive, then this is always false, if negative, this is always true. + if (isNSW) { + bool isTrue = CI->getValue().isNegative(); + return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); + } + + return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); + } + + // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 + // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 + // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 + // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 + // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 + // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 + + // If this is an NSW add, then we have two cases: if the constant is + // positive, then this is always true, if negative, this is always false. + if (isNSW) { + bool isTrue = !CI->getValue().isNegative(); + return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); + } + + assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); + Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); + return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); +} + +/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS +/// and CmpRHS are both known to be integer constants. +Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS) { + ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1)); + const APInt &CmpRHSV = CmpRHS->getValue(); + + // FIXME: If the operand types don't match the type of the divide + // then don't attempt this transform. The code below doesn't have the + // logic to deal with a signed divide and an unsigned compare (and + // vice versa). This is because (x /s C1) <s C2 produces different + // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even + // (x /u C1) <u C2. Simply casting the operands and result won't + // work. :( The if statement below tests that condition and bails + // if it finds it. + bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; + if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) + return 0; + if (DivRHS->isZero()) + return 0; // The ProdOV computation fails on divide by zero. + if (DivIsSigned && DivRHS->isAllOnesValue()) + return 0; // The overflow computation also screws up here + if (DivRHS->isOne()) + return 0; // Not worth bothering, and eliminates some funny cases + // with INT_MIN. + + // Compute Prod = CI * DivRHS. We are essentially solving an equation + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. + Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); + + // Determine if the product overflows by seeing if the product is + // not equal to the divide. Make sure we do the same kind of divide + // as in the LHS instruction that we're folding. + bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : + ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; + + // Get the ICmp opcode + ICmpInst::Predicate Pred = ICI.getPredicate(); + + // Figure out the interval that is being checked. For example, a comparison + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // Compute this interval based on the constants involved and the signedness of + // the compare/divide. This computes a half-open interval, keeping track of + // whether either value in the interval overflows. After analysis each + // overflow variable is set to 0 if it's corresponding bound variable is valid + // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. + int LoOverflow = 0, HiOverflow = 0; + Constant *LoBound = 0, *HiBound = 0; + + if (!DivIsSigned) { // udiv + // e.g. X/5 op 3 --> [15, 20) + LoBound = Prod; + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false); + } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. + if (CmpRHSV == 0) { // (X / pos) op 0 + // Can't overflow. e.g. X/2 op 0 --> [-1, 2) + LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS))); + HiBound = DivRHS; + } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos + LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true); + } else { // (X / pos) op neg + // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) + HiBound = AddOne(Prod); + LoOverflow = HiOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) { + ConstantInt* DivNeg = + cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); + LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; + } + } + } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. + if (CmpRHSV == 0) { // (X / neg) op 0 + // e.g. X/-5 op 0 --> [-4, 5) + LoBound = AddOne(DivRHS); + HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); + if (HiBound == DivRHS) { // -INTMIN = INTMIN + HiOverflow = 1; // [INTMIN+1, overflow) + HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN + } + } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos + // e.g. X/-5 op 3 --> [-19, -14) + HiBound = AddOne(Prod); + HiOverflow = LoOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) + LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0; + } else { // (X / neg) op neg + LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) + LoOverflow = HiOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true); + } + + // Dividing by a negative swaps the condition. LT <-> GT + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + Value *X = DivI->getOperand(0); + switch (Pred) { + default: llvm_unreachable("Unhandled icmp opcode!"); + case ICmpInst::ICMP_EQ: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); + case ICmpInst::ICMP_NE: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + if (LoOverflow == +1) // Low bound is greater than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + if (LoOverflow == -1) // Low bound is less than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); + return new ICmpInst(Pred, X, LoBound); + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + if (HiOverflow == +1) // High bound greater than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); + else if (HiOverflow == -1) // High bound less than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + if (Pred == ICmpInst::ICMP_UGT) + return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); + else + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + } +} + + +/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". +/// +Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, + Instruction *LHSI, + ConstantInt *RHS) { + const APInt &RHSV = RHS->getValue(); + + switch (LHSI->getOpcode()) { + case Instruction::Trunc: + if (ICI.isEquality() && LHSI->hasOneUse()) { + // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all + // of the high bits truncated out of x are known. + unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), + SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); + APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); + APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); + ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); + + // If all the high bits are known, we can do this xform. + if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { + // Pull in the high bits from known-ones set. + APInt NewRHS(RHS->getValue()); + NewRHS.zext(SrcBits); + NewRHS |= KnownOne; + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(), NewRHS)); + } + } + break; + + case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) + if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { + // If this is a comparison that tests the signbit (X < 0) or (x > -1), + // fold the xor. + if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || + (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { + Value *CompareVal = LHSI->getOperand(0); + + // If the sign bit of the XorCST is not set, there is no change to + // the operation, just stop using the Xor. + if (!XorCST->getValue().isNegative()) { + ICI.setOperand(0, CompareVal); + Worklist.Add(LHSI); + return &ICI; + } + + // Was the old condition true if the operand is positive? + bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; + + // If so, the new one isn't. + isTrueIfPositive ^= true; + + if (isTrueIfPositive) + return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, + SubOne(RHS)); + else + return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, + AddOne(RHS)); + } + + if (LHSI->hasOneUse()) { + // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) + if (!ICI.isEquality() && XorCST->getValue().isSignBit()) { + const APInt &SignBit = XorCST->getValue(); + ICmpInst::Predicate Pred = ICI.isSigned() + ? ICI.getUnsignedPredicate() + : ICI.getSignedPredicate(); + return new ICmpInst(Pred, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(), + RHSV ^ SignBit)); + } + + // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) + if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { + const APInt &NotSignBit = XorCST->getValue(); + ICmpInst::Predicate Pred = ICI.isSigned() + ? ICI.getUnsignedPredicate() + : ICI.getSignedPredicate(); + Pred = ICI.getSwappedPredicate(Pred); + return new ICmpInst(Pred, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(), + RHSV ^ NotSignBit)); + } + } + } + break; + case Instruction::And: // (icmp pred (and X, AndCST), RHS) + if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) && + LHSI->getOperand(0)->hasOneUse()) { + ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1)); + + // If the LHS is an AND of a truncating cast, we can widen the + // and/compare to be the input width without changing the value + // produced, eliminating a cast. + if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) { + // We can do this transformation if either the AND constant does not + // have its sign bit set or if it is an equality comparison. + // Extending a relational comparison when we're checking the sign + // bit would not work. + if (Cast->hasOneUse() && + (ICI.isEquality() || + (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { + uint32_t BitWidth = + cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth(); + APInt NewCST = AndCST->getValue(); + NewCST.zext(BitWidth); + APInt NewCI = RHSV; + NewCI.zext(BitWidth); + Value *NewAnd = + Builder->CreateAnd(Cast->getOperand(0), + ConstantInt::get(ICI.getContext(), NewCST), + LHSI->getName()); + return new ICmpInst(ICI.getPredicate(), NewAnd, + ConstantInt::get(ICI.getContext(), NewCI)); + } + } + + // If this is: (X >> C1) & C2 != C3 (where any shift and any compare + // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This + // happens a LOT in code produced by the C front-end, for bitfield + // access. + BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); + if (Shift && !Shift->isShift()) + Shift = 0; + + ConstantInt *ShAmt; + ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0; + const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. + const Type *AndTy = AndCST->getType(); // Type of the and. + + // We can fold this as long as we can't shift unknown bits + // into the mask. This can only happen with signed shift + // rights, as they sign-extend. + if (ShAmt) { + bool CanFold = Shift->isLogicalShift(); + if (!CanFold) { + // To test for the bad case of the signed shr, see if any + // of the bits shifted in could be tested after the mask. + uint32_t TyBits = Ty->getPrimitiveSizeInBits(); + int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); + + uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); + if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & + AndCST->getValue()) == 0) + CanFold = true; + } + + if (CanFold) { + Constant *NewCst; + if (Shift->getOpcode() == Instruction::Shl) + NewCst = ConstantExpr::getLShr(RHS, ShAmt); + else + NewCst = ConstantExpr::getShl(RHS, ShAmt); + + // Check to see if we are shifting out any of the bits being + // compared. + if (ConstantExpr::get(Shift->getOpcode(), + NewCst, ShAmt) != RHS) { + // If we shifted bits out, the fold is not going to work out. + // As a special case, check to see if this means that the + // result is always true or false now. + if (ICI.getPredicate() == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, + ConstantInt::getFalse(ICI.getContext())); + if (ICI.getPredicate() == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, + ConstantInt::getTrue(ICI.getContext())); + } else { + ICI.setOperand(1, NewCst); + Constant *NewAndCST; + if (Shift->getOpcode() == Instruction::Shl) + NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); + else + NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); + LHSI->setOperand(1, NewAndCST); + LHSI->setOperand(0, Shift->getOperand(0)); + Worklist.Add(Shift); // Shift is dead. + return &ICI; + } + } + } + + // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is + // preferable because it allows the C<<Y expression to be hoisted out + // of a loop if Y is invariant and X is not. + if (Shift && Shift->hasOneUse() && RHSV == 0 && + ICI.isEquality() && !Shift->isArithmeticShift() && + !isa<Constant>(Shift->getOperand(0))) { + // Compute C << Y. + Value *NS; + if (Shift->getOpcode() == Instruction::LShr) { + NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp"); + } else { + // Insert a logical shift. + NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp"); + } + + // Compute X & (C << Y). + Value *NewAnd = + Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); + + ICI.setOperand(0, NewAnd); + return &ICI; + } + } + + // Try to optimize things like "A[i]&42 == 0" to index computations. + if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) { + if (GetElementPtrInst *GEP = + dyn_cast<GetElementPtrInst>(LI->getOperand(0))) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) { + ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1)); + if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C)) + return Res; + } + } + break; + + case Instruction::Or: { + if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse()) + break; + Value *P, *Q; + if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { + // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 + // -> and (icmp eq P, null), (icmp eq Q, null). + + Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P, + Constant::getNullValue(P->getType())); + Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q, + Constant::getNullValue(Q->getType())); + Instruction *Op; + if (ICI.getPredicate() == ICmpInst::ICMP_EQ) + Op = BinaryOperator::CreateAnd(ICIP, ICIQ); + else + Op = BinaryOperator::CreateOr(ICIP, ICIQ); + return Op; + } + break; + } + + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) + ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); + if (!ShAmt) break; + + uint32_t TypeBits = RHSV.getBitWidth(); + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + if (ShAmt->uge(TypeBits)) + break; + + if (ICI.isEquality()) { + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + Constant *Comp = + ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), + ShAmt); + if (Comp != RHS) {// Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = + ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + if (LHSI->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + Constant *Mask = + ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, + TypeBits-ShAmtVal)); + + Value *And = + Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); + return new ICmpInst(ICI.getPredicate(), And, + ConstantInt::get(ICI.getContext(), + RHSV.lshr(ShAmtVal))); + } + } + + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. + bool TrueIfSigned = false; + if (LHSI->hasOneUse() && + isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { + // (X << 31) <s 0 --> (X&1) != 0 + Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) << + (TypeBits-ShAmt->getZExtValue()-1)); + Value *And = + Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); + return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, + And, Constant::getNullValue(And->getType())); + } + break; + } + + case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) + case Instruction::AShr: { + // Only handle equality comparisons of shift-by-constant. + ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); + if (!ShAmt || !ICI.isEquality()) break; + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + uint32_t TypeBits = RHSV.getBitWidth(); + if (ShAmt->uge(TypeBits)) + break; + + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + APInt Comp = RHSV << ShAmtVal; + if (LHSI->getOpcode() == Instruction::LShr) + Comp = Comp.lshr(ShAmtVal); + else + Comp = Comp.ashr(ShAmtVal); + + if (Comp != RHSV) { // Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + // Otherwise, check to see if the bits shifted out are known to be zero. + // If so, we can compare against the unshifted value: + // (X & 4) >> 1 == 2 --> (X & 4) == 4. + if (LHSI->hasOneUse() && + MaskedValueIsZero(LHSI->getOperand(0), + APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getShl(RHS, ShAmt)); + } + + if (LHSI->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = ConstantInt::get(ICI.getContext(), Val); + + Value *And = Builder->CreateAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); + return new ICmpInst(ICI.getPredicate(), And, + ConstantExpr::getShl(RHS, ShAmt)); + } + break; + } + + case Instruction::SDiv: + case Instruction::UDiv: + // Fold: icmp pred ([us]div X, C1), C2 -> range test + // Fold this div into the comparison, producing a range check. + // Determine, based on the divide type, what the range is being + // checked. If there is an overflow on the low or high side, remember + // it, otherwise compute the range [low, hi) bounding the new value. + // See: InsertRangeTest above for the kinds of replacements possible. + if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) + if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI), + DivRHS)) + return R; + break; + + case Instruction::Add: + // Fold: icmp pred (add X, C1), C2 + if (!ICI.isEquality()) { + ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1)); + if (!LHSC) break; + const APInt &LHSV = LHSC->getValue(); + + ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV) + .subtract(LHSV); + + if (ICI.isSigned()) { + if (CR.getLower().isSignBit()) { + return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(),CR.getUpper())); + } else if (CR.getUpper().isSignBit()) { + return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(),CR.getLower())); + } + } else { + if (CR.getLower().isMinValue()) { + return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(),CR.getUpper())); + } else if (CR.getUpper().isMinValue()) { + return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), + ConstantInt::get(ICI.getContext(),CR.getLower())); + } + } + } + break; + } + + // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. + if (ICI.isEquality()) { + bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + + // If the first operand is (add|sub|and|or|xor|rem) with a constant, and + // the second operand is a constant, simplify a bit. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) { + switch (BO->getOpcode()) { + case Instruction::SRem: + // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. + if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){ + const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue(); + if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { + Value *NewRem = + Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), + BO->getName()); + return new ICmpInst(ICI.getPredicate(), NewRem, + Constant::getNullValue(BO->getType())); + } + } + break; + case Instruction::Add: + // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. + if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) { + if (BO->hasOneUse()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + ConstantExpr::getSub(RHS, BOp1C)); + } else if (RHSV == 0) { + // Replace ((add A, B) != 0) with (A != -B) if A or B is + // efficiently invertible, or if the add has just this one use. + Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); + + if (Value *NegVal = dyn_castNegVal(BOp1)) + return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); + else if (Value *NegVal = dyn_castNegVal(BOp0)) + return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); + else if (BO->hasOneUse()) { + Value *Neg = Builder->CreateNeg(BOp1); + Neg->takeName(BO); + return new ICmpInst(ICI.getPredicate(), BOp0, Neg); + } + } + break; + case Instruction::Xor: + // For the xor case, we can xor two constants together, eliminating + // the explicit xor. + if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + ConstantExpr::getXor(RHS, BOC)); + + // FALLTHROUGH + case Instruction::Sub: + // Replace (([sub|xor] A, B) != 0) with (A != B) + if (RHSV == 0) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + BO->getOperand(1)); + break; + + case Instruction::Or: + // If bits are being or'd in that are not present in the constant we + // are comparing against, then the comparison could never succeed! + if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { + Constant *NotCI = ConstantExpr::getNot(RHS); + if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) + return ReplaceInstUsesWith(ICI, + ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + isICMP_NE)); + } + break; + + case Instruction::And: + if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { + // If bits are being compared against that are and'd out, then the + // comparison can never succeed! + if ((RHSV & ~BOC->getValue()) != 0) + return ReplaceInstUsesWith(ICI, + ConstantInt::get(Type::getInt1Ty(ICI.getContext()), + isICMP_NE)); + + // If we have ((X & C) == C), turn it into ((X & C) != 0). + if (RHS == BOC && RHSV.isPowerOf2()) + return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : + ICmpInst::ICMP_NE, LHSI, + Constant::getNullValue(RHS->getType())); + + // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 + if (BOC->getValue().isSignBit()) { + Value *X = BO->getOperand(0); + Constant *Zero = Constant::getNullValue(X->getType()); + ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; + return new ICmpInst(pred, X, Zero); + } + + // ((X & ~7) == 0) --> X < 8 + if (RHSV == 0 && isHighOnes(BOC)) { + Value *X = BO->getOperand(0); + Constant *NegX = ConstantExpr::getNeg(BOC); + ICmpInst::Predicate pred = isICMP_NE ? + ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; + return new ICmpInst(pred, X, NegX); + } + } + default: break; + } + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { + // Handle icmp {eq|ne} <intrinsic>, intcst. + switch (II->getIntrinsicID()) { + case Intrinsic::bswap: + Worklist.Add(II); + ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap())); + return &ICI; + case Intrinsic::ctlz: + case Intrinsic::cttz: + // ctz(A) == bitwidth(a) -> A == 0 and likewise for != + if (RHSV == RHS->getType()->getBitWidth()) { + Worklist.Add(II); + ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0)); + return &ICI; + } + break; + case Intrinsic::ctpop: + // popcount(A) == 0 -> A == 0 and likewise for != + if (RHS->isZero()) { + Worklist.Add(II); + ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(1, RHS); + return &ICI; + } + break; + default: + break; + } + } + } + return 0; +} + +/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). +/// We only handle extending casts so far. +/// +Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { + const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0)); + Value *LHSCIOp = LHSCI->getOperand(0); + const Type *SrcTy = LHSCIOp->getType(); + const Type *DestTy = LHSCI->getType(); + Value *RHSCIOp; + + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // integer type is the same size as the pointer type. + if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && + TD->getPointerSizeInBits() == + cast<IntegerType>(DestTy)->getBitWidth()) { + Value *RHSOp = 0; + if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { + RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); + } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) { + RHSOp = RHSC->getOperand(0); + // If the pointer types don't match, insert a bitcast. + if (LHSCIOp->getType() != RHSOp->getType()) + RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); + } + + if (RHSOp) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); + } + + // The code below only handles extension cast instructions, so far. + // Enforce this. + if (LHSCI->getOpcode() != Instruction::ZExt && + LHSCI->getOpcode() != Instruction::SExt) + return 0; + + bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; + bool isSignedCmp = ICI.isSigned(); + + if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) { + // Not an extension from the same type? + RHSCIOp = CI->getOperand(0); + if (RHSCIOp->getType() != LHSCIOp->getType()) + return 0; + + // If the signedness of the two casts doesn't agree (i.e. one is a sext + // and the other is a zext), then we can't handle this. + if (CI->getOpcode() != LHSCI->getOpcode()) + return 0; + + // Deal with equality cases early. + if (ICI.isEquality()) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); + + // A signed comparison of sign extended values simplifies into a + // signed comparison. + if (isSignedCmp && isSignedExt) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); + + // The other three cases all fold into an unsigned comparison. + return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp); + } + + // If we aren't dealing with a constant on the RHS, exit early + ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1)); + if (!CI) + return 0; + + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DestTy. + Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); + Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), + Res1, DestTy); + + // If the re-extended constant didn't change... + if (Res2 == CI) { + // Deal with equality cases early. + if (ICI.isEquality()) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); + + // A signed comparison of sign extended values simplifies into a + // signed comparison. + if (isSignedExt && isSignedCmp) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); + + // The other three cases all fold into an unsigned comparison. + return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); + } + + // The re-extended constant changed so the constant cannot be represented + // in the shorter type. Consequently, we cannot emit a simple comparison. + + // First, handle some easy cases. We know the result cannot be equal at this + // point so handle the ICI.isEquality() cases + if (ICI.getPredicate() == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); + if (ICI.getPredicate() == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); + + // Evaluate the comparison for LT (we invert for GT below). LE and GE cases + // should have been folded away previously and not enter in here. + Value *Result; + if (isSignedCmp) { + // We're performing a signed comparison. + if (cast<ConstantInt>(CI)->getValue().isNegative()) + Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false + else + Result = ConstantInt::getTrue(ICI.getContext()); // X < (large) --> true + } else { + // We're performing an unsigned comparison. + if (isSignedExt) { + // We're performing an unsigned comp with a sign extended value. + // This is true if the input is >= 0. [aka >s -1] + Constant *NegOne = Constant::getAllOnesValue(SrcTy); + Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); + } else { + // Unsigned extend & unsigned compare -> always true. + Result = ConstantInt::getTrue(ICI.getContext()); + } + } + + // Finally, return the value computed. + if (ICI.getPredicate() == ICmpInst::ICMP_ULT || + ICI.getPredicate() == ICmpInst::ICMP_SLT) + return ReplaceInstUsesWith(ICI, Result); + + assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || + ICI.getPredicate()==ICmpInst::ICMP_SGT) && + "ICmp should be folded!"); + if (Constant *CI = dyn_cast<Constant>(Result)) + return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); + return BinaryOperator::CreateNot(Result); +} + + + +Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { + bool Changed = false; + + /// Orders the operands of the compare so that they are listed from most + /// complex to least complex. This puts constants before unary operators, + /// before binary operators. + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { + I.swapOperands(); + Changed = true; + } + + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + const Type *Ty = Op0->getType(); + + // icmp's with boolean values can always be turned into bitwise operations + if (Ty == Type::getInt1Ty(I.getContext())) { + switch (I.getPredicate()) { + default: llvm_unreachable("Invalid icmp instruction!"); + case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) + Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); + return BinaryOperator::CreateNot(Xor); + } + case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B + return BinaryOperator::CreateXor(Op0, Op1); + + case ICmpInst::ICMP_UGT: + std::swap(Op0, Op1); // Change icmp ugt -> icmp ult + // FALL THROUGH + case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); + return BinaryOperator::CreateAnd(Not, Op1); + } + case ICmpInst::ICMP_SGT: + std::swap(Op0, Op1); // Change icmp sgt -> icmp slt + // FALL THROUGH + case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); + return BinaryOperator::CreateAnd(Not, Op0); + } + case ICmpInst::ICMP_UGE: + std::swap(Op0, Op1); // Change icmp uge -> icmp ule + // FALL THROUGH + case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); + return BinaryOperator::CreateOr(Not, Op1); + } + case ICmpInst::ICMP_SGE: + std::swap(Op0, Op1); // Change icmp sge -> icmp sle + // FALL THROUGH + case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); + return BinaryOperator::CreateOr(Not, Op0); + } + } + } + + unsigned BitWidth = 0; + if (TD) + BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); + else if (Ty->isIntOrIntVector()) + BitWidth = Ty->getScalarSizeInBits(); + + bool isSignBit = false; + + // See if we are doing a comparison with a constant. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + Value *A = 0, *B = 0; + + // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) + if (I.isEquality() && CI->isZero() && + match(Op0, m_Sub(m_Value(A), m_Value(B)))) { + // (icmp cond A B) if cond is equality + return new ICmpInst(I.getPredicate(), A, B); + } + + // If we have an icmp le or icmp ge instruction, turn it into the + // appropriate icmp lt or icmp gt instruction. This allows us to rely on + // them being folded in the code below. The SimplifyICmpInst code has + // already handled the edge cases for us, so we just assert on them. + switch (I.getPredicate()) { + default: break; + case ICmpInst::ICMP_ULE: + assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE + return new ICmpInst(ICmpInst::ICMP_ULT, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()+1)); + case ICmpInst::ICMP_SLE: + assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()+1)); + case ICmpInst::ICMP_UGE: + assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE + return new ICmpInst(ICmpInst::ICMP_UGT, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()-1)); + case ICmpInst::ICMP_SGE: + assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()-1)); + } + + // If this comparison is a normal comparison, it demands all + // bits, if it is a sign bit comparison, it only demands the sign bit. + bool UnusedBit; + isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); + } + + // See if we can fold the comparison based on range information we can get + // by checking whether bits are known to be zero or one in the input. + if (BitWidth != 0) { + APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); + APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); + + if (SimplifyDemandedBits(I.getOperandUse(0), + isSignBit ? APInt::getSignBit(BitWidth) + : APInt::getAllOnesValue(BitWidth), + Op0KnownZero, Op0KnownOne, 0)) + return &I; + if (SimplifyDemandedBits(I.getOperandUse(1), + APInt::getAllOnesValue(BitWidth), + Op1KnownZero, Op1KnownOne, 0)) + return &I; + + // Given the known and unknown bits, compute a range that the LHS could be + // in. Compute the Min, Max and RHS values based on the known bits. For the + // EQ and NE we use unsigned values. + APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); + APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); + if (I.isSigned()) { + ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, + Op0Min, Op0Max); + ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, + Op1Min, Op1Max); + } else { + ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, + Op0Min, Op0Max); + ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, + Op1Min, Op1Max); + } + + // If Min and Max are known to be the same, then SimplifyDemandedBits + // figured out that the LHS is a constant. Just constant fold this now so + // that code below can assume that Min != Max. + if (!isa<Constant>(Op0) && Op0Min == Op0Max) + return new ICmpInst(I.getPredicate(), + ConstantInt::get(I.getContext(), Op0Min), Op1); + if (!isa<Constant>(Op1) && Op1Min == Op1Max) + return new ICmpInst(I.getPredicate(), Op0, + ConstantInt::get(I.getContext(), Op1Min)); + + // Based on the range information we know about the LHS, see if we can + // simplify this comparison. For example, (x&4) < 8 is always true. + switch (I.getPredicate()) { + default: llvm_unreachable("Unknown icmp opcode!"); + case ICmpInst::ICMP_EQ: + if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + case ICmpInst::ICMP_NE: + if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + break; + case ICmpInst::ICMP_ULT: + if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()-1)); + + // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear + if (CI->isMinValue(true)) + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, + Constant::getAllOnesValue(Op0->getType())); + } + break; + case ICmpInst::ICMP_UGT: + if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + + if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()+1)); + + // (x >u 2147483647) -> (x <s 0) -> true if sign bit set + if (CI->isMaxValue(true)) + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, + Constant::getNullValue(Op0->getType())); + } + break; + case ICmpInst::ICMP_SLT: + if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()-1)); + } + break; + case ICmpInst::ICMP_SGT: + if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + + if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(CI->getContext(), CI->getValue()+1)); + } + break; + case ICmpInst::ICMP_SGE: + assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); + if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + case ICmpInst::ICMP_SLE: + assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); + if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + case ICmpInst::ICMP_UGE: + assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); + if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + case ICmpInst::ICMP_ULE: + assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); + if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + } + + // Turn a signed comparison into an unsigned one if both operands + // are known to have the same sign. + if (I.isSigned() && + ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || + (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) + return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); + } + + // Test if the ICmpInst instruction is used exclusively by a select as + // part of a minimum or maximum operation. If so, refrain from doing + // any other folding. This helps out other analyses which understand + // non-obfuscated minimum and maximum idioms, such as ScalarEvolution + // and CodeGen. And in this case, at least one of the comparison + // operands has at least one user besides the compare (the select), + // which would often largely negate the benefit of folding anyway. + if (I.hasOneUse()) + if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin())) + if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || + (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) + return 0; + + // See if we are doing a comparison between a constant and an instruction that + // can be folded into the comparison. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { + // Since the RHS is a ConstantInt (CI), if the left hand side is an + // instruction, see if that instruction also has constants so that the + // instruction can be folded into the icmp + if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) + if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) + return Res; + } + + // Handle icmp with constant (but not simple integer constant) RHS + if (Constant *RHSC = dyn_cast<Constant>(Op1)) { + if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) + switch (LHSI->getOpcode()) { + case Instruction::GetElementPtr: + // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null + if (RHSC->isNullValue() && + cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices()) + return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), + Constant::getNullValue(LHSI->getOperand(0)->getType())); + break; + case Instruction::PHI: + // Only fold icmp into the PHI if the phi and icmp are in the same + // block. If in the same block, we're encouraging jump threading. If + // not, we are just pessimizing the code by making an i1 phi. + if (LHSI->getParent() == I.getParent()) + if (Instruction *NV = FoldOpIntoPhi(I, true)) + return NV; + break; + case Instruction::Select: { + // If either operand of the select is a constant, we can fold the + // comparison into the select arms, which will cause one to be + // constant folded and the select turned into a bitwise or. + Value *Op1 = 0, *Op2 = 0; + if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) + Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) + Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + + // We only want to perform this transformation if it will not lead to + // additional code. This is true if either both sides of the select + // fold to a constant (in which case the icmp is replaced with a select + // which will usually simplify) or this is the only user of the + // select (in which case we are trading a select+icmp for a simpler + // select+icmp). + if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { + if (!Op1) + Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), + RHSC, I.getName()); + if (!Op2) + Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), + RHSC, I.getName()); + return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); + } + break; + } + case Instruction::Call: + // If we have (malloc != null), and if the malloc has a single use, we + // can assume it is successful and remove the malloc. + if (isMalloc(LHSI) && LHSI->hasOneUse() && + isa<ConstantPointerNull>(RHSC)) { + // Need to explicitly erase malloc call here, instead of adding it to + // Worklist, because it won't get DCE'd from the Worklist since + // isInstructionTriviallyDead() returns false for function calls. + // It is OK to replace LHSI/MallocCall with Undef because the + // instruction that uses it will be erased via Worklist. + if (extractMallocCall(LHSI)) { + LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType())); + EraseInstFromFunction(*LHSI); + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(I.getContext()), + !I.isTrueWhenEqual())); + } + if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI)) + if (MallocCall->hasOneUse()) { + MallocCall->replaceAllUsesWith( + UndefValue::get(MallocCall->getType())); + EraseInstFromFunction(*MallocCall); + Worklist.Add(LHSI); // The malloc's bitcast use. + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(I.getContext()), + !I.isTrueWhenEqual())); + } + } + break; + case Instruction::IntToPtr: + // icmp pred inttoptr(X), null -> icmp pred X, 0 + if (RHSC->isNullValue() && TD && + TD->getIntPtrType(RHSC->getContext()) == + LHSI->getOperand(0)->getType()) + return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), + Constant::getNullValue(LHSI->getOperand(0)->getType())); + break; + + case Instruction::Load: + // Try to optimize things like "A[i] > 4" to index computations. + if (GetElementPtrInst *GEP = + dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !cast<LoadInst>(LHSI)->isVolatile()) + if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) + return Res; + } + break; + } + } + + // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) + if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) + return NI; + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) + if (Instruction *NI = FoldGEPICmp(GEP, Op0, + ICmpInst::getSwappedPredicate(I.getPredicate()), I)) + return NI; + + // Test to see if the operands of the icmp are casted versions of other + // values. If the ptr->ptr cast can be stripped off both arguments, we do so + // now. + if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { + if (isa<PointerType>(Op0->getType()) && + (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { + // We keep moving the cast from the left operand over to the right + // operand, where it can often be eliminated completely. + Op0 = CI->getOperand(0); + + // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast + // so eliminate it as well. + if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1)) + Op1 = CI2->getOperand(0); + + // If Op1 is a constant, we can fold the cast into the constant. + if (Op0->getType() != Op1->getType()) { + if (Constant *Op1C = dyn_cast<Constant>(Op1)) { + Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); + } else { + // Otherwise, cast the RHS right before the icmp + Op1 = Builder->CreateBitCast(Op1, Op0->getType()); + } + } + return new ICmpInst(I.getPredicate(), Op0, Op1); + } + } + + if (isa<CastInst>(Op0)) { + // Handle the special case of: icmp (cast bool to X), <cst> + // This comes up when you have code like + // int X = A < B; + // if (X) ... + // For generality, we handle any zero-extension of any operand comparison + // with a constant or another cast from the same type. + if (isa<Constant>(Op1) || isa<CastInst>(Op1)) + if (Instruction *R = visitICmpInstWithCastAndCast(I)) + return R; + } + + // See if it's the same type of instruction on the left and right. + if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { + if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { + if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() && + Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) { + switch (Op0I->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b + return new ICmpInst(I.getPredicate(), Op0I->getOperand(0), + Op1I->getOperand(0)); + // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { + if (CI->getValue().isSignBit()) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + return new ICmpInst(Pred, Op0I->getOperand(0), + Op1I->getOperand(0)); + } + + if (CI->getValue().isMaxSignedValue()) { + ICmpInst::Predicate Pred = I.isSigned() + ? I.getUnsignedPredicate() + : I.getSignedPredicate(); + Pred = I.getSwappedPredicate(Pred); + return new ICmpInst(Pred, Op0I->getOperand(0), + Op1I->getOperand(0)); + } + } + break; + case Instruction::Mul: + if (!I.isEquality()) + break; + + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { + // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask + // Mask = -1 >> count-trailing-zeros(Cst). + if (!CI->isZero() && !CI->isOne()) { + const APInt &AP = CI->getValue(); + ConstantInt *Mask = ConstantInt::get(I.getContext(), + APInt::getLowBitsSet(AP.getBitWidth(), + AP.getBitWidth() - + AP.countTrailingZeros())); + Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); + Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); + return new ICmpInst(I.getPredicate(), And1, And2); + } + } + break; + } + } + } + } + + // ~x < ~y --> y < x + { Value *A, *B; + if (match(Op0, m_Not(m_Value(A))) && + match(Op1, m_Not(m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, A); + } + + if (I.isEquality()) { + Value *A, *B, *C, *D; + + // -x == -y --> x == y + if (match(Op0, m_Neg(m_Value(A))) && + match(Op1, m_Neg(m_Value(B)))) + return new ICmpInst(I.getPredicate(), A, B); + + if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { + if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 + Value *OtherVal = A == Op1 ? B : A; + return new ICmpInst(I.getPredicate(), OtherVal, + Constant::getNullValue(A->getType())); + } + + if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { + // A^c1 == C^c2 --> A == C^(c1^c2) + ConstantInt *C1, *C2; + if (match(B, m_ConstantInt(C1)) && + match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { + Constant *NC = ConstantInt::get(I.getContext(), + C1->getValue() ^ C2->getValue()); + Value *Xor = Builder->CreateXor(C, NC, "tmp"); + return new ICmpInst(I.getPredicate(), A, Xor); + } + + // A^B == A^D -> B == D + if (A == C) return new ICmpInst(I.getPredicate(), B, D); + if (A == D) return new ICmpInst(I.getPredicate(), B, C); + if (B == C) return new ICmpInst(I.getPredicate(), A, D); + if (B == D) return new ICmpInst(I.getPredicate(), A, C); + } + } + + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) { + // A == (A^B) -> B == 0 + Value *OtherVal = A == Op0 ? B : A; + return new ICmpInst(I.getPredicate(), OtherVal, + Constant::getNullValue(A->getType())); + } + + // (A-B) == A -> B == 0 + if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, + Constant::getNullValue(B->getType())); + + // A == (A-B) -> B == 0 + if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) + return new ICmpInst(I.getPredicate(), B, + Constant::getNullValue(B->getType())); + + // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 + if (Op0->hasOneUse() && Op1->hasOneUse() && + match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_And(m_Value(C), m_Value(D)))) { + Value *X = 0, *Y = 0, *Z = 0; + + if (A == C) { + X = B; Y = D; Z = A; + } else if (A == D) { + X = B; Y = C; Z = A; + } else if (B == C) { + X = A; Y = D; Z = B; + } else if (B == D) { + X = A; Y = C; Z = B; + } + + if (X) { // Build (X^Y) & Z + Op1 = Builder->CreateXor(X, Y, "tmp"); + Op1 = Builder->CreateAnd(Op1, Z, "tmp"); + I.setOperand(0, Op1); + I.setOperand(1, Constant::getNullValue(Op1->getType())); + return &I; + } + } + } + + { + Value *X; ConstantInt *Cst; + // icmp X+Cst, X + if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) + return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0); + + // icmp X, X+Cst + if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) + return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1); + } + return Changed ? &I : 0; +} + + + + + + +/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. +/// +Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, + Instruction *LHSI, + Constant *RHSC) { + if (!isa<ConstantFP>(RHSC)) return 0; + const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); + + // Get the width of the mantissa. We don't want to hack on conversions that + // might lose information from the integer, e.g. "i64 -> float" + int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); + if (MantissaWidth == -1) return 0; // Unknown. + + // Check to see that the input is converted from an integer type that is small + // enough that preserves all bits. TODO: check here for "known" sign bits. + // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. + unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); + + // If this is a uitofp instruction, we need an extra bit to hold the sign. + bool LHSUnsigned = isa<UIToFPInst>(LHSI); + if (LHSUnsigned) + ++InputSize; + + // If the conversion would lose info, don't hack on this. + if ((int)InputSize > MantissaWidth) + return 0; + + // Otherwise, we can potentially simplify the comparison. We know that it + // will always come through as an integer value and we know the constant is + // not a NAN (it would have been previously simplified). + assert(!RHS.isNaN() && "NaN comparison not already folded!"); + + ICmpInst::Predicate Pred; + switch (I.getPredicate()) { + default: llvm_unreachable("Unexpected predicate!"); + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_OEQ: + Pred = ICmpInst::ICMP_EQ; + break; + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_OGT: + Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; + break; + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OGE: + Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; + break; + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_OLT: + Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; + break; + case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_OLE: + Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; + break; + case FCmpInst::FCMP_UNE: + case FCmpInst::FCMP_ONE: + Pred = ICmpInst::ICMP_NE; + break; + case FCmpInst::FCMP_ORD: + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + case FCmpInst::FCMP_UNO: + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + } + + const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); + + // Now we know that the APFloat is a normal number, zero or inf. + + // See if the FP constant is too large for the integer. For example, + // comparing an i8 to 300.0. + unsigned IntWidth = IntTy->getScalarSizeInBits(); + + if (!LHSUnsigned) { + // If the RHS value is > SignedMax, fold the comparison. This handles +INF + // and large values. + APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false); + SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || + Pred == ICmpInst::ICMP_SLE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + } + } else { + // If the RHS value is > UnsignedMax, fold the comparison. This handles + // +INF and large values. + APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false); + UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, + APFloat::rmNearestTiesToEven); + if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || + Pred == ICmpInst::ICMP_ULE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + } + } + + if (!LHSUnsigned) { + // See if the RHS value is < SignedMin. + APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); + SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || + Pred == ICmpInst::ICMP_SGE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + } + } + + // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or + // [0, UMAX], but it may still be fractional. See if it is fractional by + // casting the FP value to the integer value and back, checking for equality. + // Don't do this for zero, because -0.0 is not fractional. + Constant *RHSInt = LHSUnsigned + ? ConstantExpr::getFPToUI(RHSC, IntTy) + : ConstantExpr::getFPToSI(RHSC, IntTy); + if (!RHS.isZero()) { + bool Equal = LHSUnsigned + ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC + : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; + if (!Equal) { + // If we had a comparison against a fractional value, we have to adjust + // the compare predicate and sometimes the value. RHSC is rounded towards + // zero at this point. + switch (Pred) { + default: llvm_unreachable("Unexpected integer comparison!"); + case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + case ICmpInst::ICMP_ULE: + // (float)int <= 4.4 --> int <= 4 + // (float)int <= -4.4 --> false + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + break; + case ICmpInst::ICMP_SLE: + // (float)int <= 4.4 --> int <= 4 + // (float)int <= -4.4 --> int < -4 + if (RHS.isNegative()) + Pred = ICmpInst::ICMP_SLT; + break; + case ICmpInst::ICMP_ULT: + // (float)int < -4.4 --> false + // (float)int < 4.4 --> int <= 4 + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + Pred = ICmpInst::ICMP_ULE; + break; + case ICmpInst::ICMP_SLT: + // (float)int < -4.4 --> int < -4 + // (float)int < 4.4 --> int <= 4 + if (!RHS.isNegative()) + Pred = ICmpInst::ICMP_SLE; + break; + case ICmpInst::ICMP_UGT: + // (float)int > 4.4 --> int > 4 + // (float)int > -4.4 --> true + if (RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + break; + case ICmpInst::ICMP_SGT: + // (float)int > 4.4 --> int > 4 + // (float)int > -4.4 --> int >= -4 + if (RHS.isNegative()) + Pred = ICmpInst::ICMP_SGE; + break; + case ICmpInst::ICMP_UGE: + // (float)int >= -4.4 --> true + // (float)int >= 4.4 --> int > 4 + if (!RHS.isNegative()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + Pred = ICmpInst::ICMP_UGT; + break; + case ICmpInst::ICMP_SGE: + // (float)int >= -4.4 --> int >= -4 + // (float)int >= 4.4 --> int > 4 + if (!RHS.isNegative()) + Pred = ICmpInst::ICMP_SGT; + break; + } + } + } + + // Lower this FP comparison into an appropriate integer version of the + // comparison. + return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); +} + +Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { + bool Changed = false; + + /// Orders the operands of the compare so that they are listed from most + /// complex to least complex. This puts constants before unary operators, + /// before binary operators. + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { + I.swapOperands(); + Changed = true; + } + + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Simplify 'fcmp pred X, X' + if (Op0 == Op1) { + switch (I.getPredicate()) { + default: llvm_unreachable("Unknown predicate!"); + case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) + case FCmpInst::FCMP_ULT: // True if unordered or less than + case FCmpInst::FCMP_UGT: // True if unordered or greater than + case FCmpInst::FCMP_UNE: // True if unordered or not equal + // Canonicalize these to be 'fcmp uno %X, 0.0'. + I.setPredicate(FCmpInst::FCMP_UNO); + I.setOperand(1, Constant::getNullValue(Op0->getType())); + return &I; + + case FCmpInst::FCMP_ORD: // True if ordered (no nans) + case FCmpInst::FCMP_OEQ: // True if ordered and equal + case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal + case FCmpInst::FCMP_OLE: // True if ordered and less than or equal + // Canonicalize these to be 'fcmp ord %X, 0.0'. + I.setPredicate(FCmpInst::FCMP_ORD); + I.setOperand(1, Constant::getNullValue(Op0->getType())); + return &I; + } + } + + // Handle fcmp with constant RHS + if (Constant *RHSC = dyn_cast<Constant>(Op1)) { + if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) + switch (LHSI->getOpcode()) { + case Instruction::PHI: + // Only fold fcmp into the PHI if the phi and fcmp are in the same + // block. If in the same block, we're encouraging jump threading. If + // not, we are just pessimizing the code by making an i1 phi. + if (LHSI->getParent() == I.getParent()) + if (Instruction *NV = FoldOpIntoPhi(I, true)) + return NV; + break; + case Instruction::SIToFP: + case Instruction::UIToFP: + if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC)) + return NV; + break; + case Instruction::Select: { + // If either operand of the select is a constant, we can fold the + // comparison into the select arms, which will cause one to be + // constant folded and the select turned into a bitwise or. + Value *Op1 = 0, *Op2 = 0; + if (LHSI->hasOneUse()) { + if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) { + // Fold the known value into the constant operand. + Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); + // Insert a new FCmp of the other select operand. + Op2 = Builder->CreateFCmp(I.getPredicate(), + LHSI->getOperand(2), RHSC, I.getName()); + } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { + // Fold the known value into the constant operand. + Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); + // Insert a new FCmp of the other select operand. + Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), + RHSC, I.getName()); + } + } + + if (Op1) + return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); + break; + } + case Instruction::Load: + if (GetElementPtrInst *GEP = + dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer() && + !cast<LoadInst>(LHSI)->isVolatile()) + if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I)) + return Res; + } + break; + } + } + + return Changed ? &I : 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp new file mode 100644 index 0000000..6c0ecc9 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -0,0 +1,613 @@ +//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for load, store and alloca. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumDeadStore, "Number of dead stores eliminated"); + +Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { + // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 + if (AI.isArrayAllocation()) { // Check C != 1 + if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { + const Type *NewTy = + ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); + assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); + AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); + New->setAlignment(AI.getAlignment()); + + // Scan to the end of the allocation instructions, to skip over a block of + // allocas if possible...also skip interleaved debug info + // + BasicBlock::iterator It = New; + while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; + + // Now that I is pointing to the first non-allocation-inst in the block, + // insert our getelementptr instruction... + // + Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext())); + Value *Idx[2]; + Idx[0] = NullIdx; + Idx[1] = NullIdx; + Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, + New->getName()+".sub", It); + + // Now make everything use the getelementptr instead of the original + // allocation. + return ReplaceInstUsesWith(AI, V); + } else if (isa<UndefValue>(AI.getArraySize())) { + return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); + } + } + + if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) { + // If alloca'ing a zero byte object, replace the alloca with a null pointer. + // Note that we only do this for alloca's, because malloc should allocate + // and return a unique pointer, even for a zero byte allocation. + if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) + return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); + + // If the alignment is 0 (unspecified), assign it the preferred alignment. + if (AI.getAlignment() == 0) + AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); + } + + return 0; +} + + +/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. +static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, + const TargetData *TD) { + User *CI = cast<User>(LI.getOperand(0)); + Value *CastOp = CI->getOperand(0); + + const PointerType *DestTy = cast<PointerType>(CI->getType()); + const Type *DestPTy = DestTy->getElementType(); + if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { + + // If the address spaces don't match, don't eliminate the cast. + if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) + return 0; + + const Type *SrcPTy = SrcTy->getElementType(); + + if (DestPTy->isInteger() || isa<PointerType>(DestPTy) || + isa<VectorType>(DestPTy)) { + // If the source is an array, the code below will not succeed. Check to + // see if a trivial 'gep P, 0, 0' will help matters. Only do this for + // constants. + if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) + if (Constant *CSrc = dyn_cast<Constant>(CastOp)) + if (ASrcTy->getNumElements() != 0) { + Value *Idxs[2]; + Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); + Idxs[1] = Idxs[0]; + CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); + SrcTy = cast<PointerType>(CastOp->getType()); + SrcPTy = SrcTy->getElementType(); + } + + if (IC.getTargetData() && + (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || + isa<VectorType>(SrcPTy)) && + // Do not allow turning this into a load of an integer, which is then + // casted to a pointer, this pessimizes pointer analysis a lot. + (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) && + IC.getTargetData()->getTypeSizeInBits(SrcPTy) == + IC.getTargetData()->getTypeSizeInBits(DestPTy)) { + + // Okay, we are casting from one integer or pointer type to another of + // the same size. Instead of casting the pointer before the load, cast + // the result of the loaded value. + Value *NewLoad = + IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); + // Now cast the result of the load. + return new BitCastInst(NewLoad, LI.getType()); + } + } + } + return 0; +} + +Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { + Value *Op = LI.getOperand(0); + + // Attempt to improve the alignment. + if (TD) { + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); + if (KnownAlign > + (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : + LI.getAlignment())) + LI.setAlignment(KnownAlign); + } + + // load (cast X) --> cast (load X) iff safe. + if (isa<CastInst>(Op)) + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) + return Res; + + // None of the following transforms are legal for volatile loads. + if (LI.isVolatile()) return 0; + + // Do really simple store-to-load forwarding and load CSE, to catch cases + // where there are several consequtive memory accesses to the same location, + // separated by a few arithmetic operations. + BasicBlock::iterator BBI = &LI; + if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) + return ReplaceInstUsesWith(LI, AvailableVal); + + // load(gep null, ...) -> unreachable + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { + const Value *GEPI0 = GEPI->getOperand(0); + // TODO: Consider a target hook for valid address spaces for this xform. + if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ + // Insert a new store to null instruction before the load to indicate + // that this code is not reachable. We do this instead of inserting + // an unreachable instruction directly because we cannot modify the + // CFG. + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + } + + // load null/undef -> unreachable + // TODO: Consider a target hook for valid address spaces for this xform. + if (isa<UndefValue>(Op) || + (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { + // Insert a new store to null instruction before the load to indicate that + // this code is not reachable. We do this instead of inserting an + // unreachable instruction directly because we cannot modify the CFG. + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); + } + + // Instcombine load (constantexpr_cast global) -> cast (load global) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) + if (CE->isCast()) + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) + return Res; + + if (Op->hasOneUse()) { + // Change select and PHI nodes to select values instead of addresses: this + // helps alias analysis out a lot, allows many others simplifications, and + // exposes redundancy in the code. + // + // Note that we cannot do the transformation unless we know that the + // introduced loads cannot trap! Something like this is valid as long as + // the condition is always false: load (select bool %C, int* null, int* %G), + // but it would not be valid if we transformed it to load from null + // unconditionally. + // + if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { + // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). + if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) && + isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { + Value *V1 = Builder->CreateLoad(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"); + Value *V2 = Builder->CreateLoad(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"); + return SelectInst::Create(SI->getCondition(), V1, V2); + } + + // load (select (cond, null, P)) -> load P + if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) + if (C->isNullValue()) { + LI.setOperand(0, SI->getOperand(2)); + return &LI; + } + + // load (select (cond, P, null)) -> load P + if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) + if (C->isNullValue()) { + LI.setOperand(0, SI->getOperand(1)); + return &LI; + } + } + } + return 0; +} + +/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P +/// when possible. This makes it generally easy to do alias analysis and/or +/// SROA/mem2reg of the memory object. +static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { + User *CI = cast<User>(SI.getOperand(1)); + Value *CastOp = CI->getOperand(0); + + const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); + const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); + if (SrcTy == 0) return 0; + + const Type *SrcPTy = SrcTy->getElementType(); + + if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy)) + return 0; + + /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" + /// to its first element. This allows us to handle things like: + /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) + /// on 32-bit hosts. + SmallVector<Value*, 4> NewGEPIndices; + + // If the source is an array, the code below will not succeed. Check to + // see if a trivial 'gep P, 0, 0' will help matters. Only do this for + // constants. + if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) { + // Index through pointer. + Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); + NewGEPIndices.push_back(Zero); + + while (1) { + if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) { + if (!STy->getNumElements()) /* Struct can be empty {} */ + break; + NewGEPIndices.push_back(Zero); + SrcPTy = STy->getElementType(0); + } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { + NewGEPIndices.push_back(Zero); + SrcPTy = ATy->getElementType(); + } else { + break; + } + } + + SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); + } + + if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy)) + return 0; + + // If the pointers point into different address spaces or if they point to + // values with different sizes, we can't do the transformation. + if (!IC.getTargetData() || + SrcTy->getAddressSpace() != + cast<PointerType>(CI->getType())->getAddressSpace() || + IC.getTargetData()->getTypeSizeInBits(SrcPTy) != + IC.getTargetData()->getTypeSizeInBits(DestPTy)) + return 0; + + // Okay, we are casting from one integer or pointer type to another of + // the same size. Instead of casting the pointer before + // the store, cast the value to be stored. + Value *NewCast; + Value *SIOp0 = SI.getOperand(0); + Instruction::CastOps opcode = Instruction::BitCast; + const Type* CastSrcTy = SIOp0->getType(); + const Type* CastDstTy = SrcPTy; + if (isa<PointerType>(CastDstTy)) { + if (CastSrcTy->isInteger()) + opcode = Instruction::IntToPtr; + } else if (isa<IntegerType>(CastDstTy)) { + if (isa<PointerType>(SIOp0->getType())) + opcode = Instruction::PtrToInt; + } + + // SIOp0 is a pointer to aggregate and this is a store to the first field, + // emit a GEP to index into its first field. + if (!NewGEPIndices.empty()) + CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), + NewGEPIndices.end()); + + NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, + SIOp0->getName()+".c"); + return new StoreInst(NewCast, CastOp); +} + +/// equivalentAddressValues - Test if A and B will obviously have the same +/// value. This includes recognizing that %t0 and %t1 will have the same +/// value in code like this: +/// %t0 = getelementptr \@a, 0, 3 +/// store i32 0, i32* %t0 +/// %t1 = getelementptr \@a, 0, 3 +/// %t2 = load i32* %t1 +/// +static bool equivalentAddressValues(Value *A, Value *B) { + // Test if the values are trivially equivalent. + if (A == B) return true; + + // Test if the values come form identical arithmetic instructions. + // This uses isIdenticalToWhenDefined instead of isIdenticalTo because + // its only used to compare two uses within the same basic block, which + // means that they'll always either have the same value or one of them + // will have an undefined value. + if (isa<BinaryOperator>(A) || + isa<CastInst>(A) || + isa<PHINode>(A) || + isa<GetElementPtrInst>(A)) + if (Instruction *BI = dyn_cast<Instruction>(B)) + if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) + return true; + + // Otherwise they may not be equivalent. + return false; +} + +// If this instruction has two uses, one of which is a llvm.dbg.declare, +// return the llvm.dbg.declare. +DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { + if (!V->hasNUses(2)) + return 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI)) + return DI; + if (isa<BitCastInst>(UI) && UI->hasOneUse()) { + if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin())) + return DI; + } + } + return 0; +} + +Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { + Value *Val = SI.getOperand(0); + Value *Ptr = SI.getOperand(1); + + // If the RHS is an alloca with a single use, zapify the store, making the + // alloca dead. + // If the RHS is an alloca with a two uses, the other one being a + // llvm.dbg.declare, zapify the store and the declare, making the + // alloca dead. We must do this to prevent declare's from affecting + // codegen. + if (!SI.isVolatile()) { + if (Ptr->hasOneUse()) { + if (isa<AllocaInst>(Ptr)) + return EraseInstFromFunction(SI); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { + if (isa<AllocaInst>(GEP->getOperand(0))) { + if (GEP->getOperand(0)->hasOneUse()) + return EraseInstFromFunction(SI); + if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { + EraseInstFromFunction(*DI); + return EraseInstFromFunction(SI); + } + } + } + } + if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { + EraseInstFromFunction(*DI); + return EraseInstFromFunction(SI); + } + } + + // Attempt to improve the alignment. + if (TD) { + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); + if (KnownAlign > + (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : + SI.getAlignment())) + SI.setAlignment(KnownAlign); + } + + // Do really simple DSE, to catch cases where there are several consecutive + // stores to the same location, separated by a few arithmetic operations. This + // situation often occurs with bitfield accesses. + BasicBlock::iterator BBI = &SI; + for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; + --ScanInsts) { + --BBI; + // Don't count debug info directives, lest they affect codegen, + // and we skip pointer-to-pointer bitcasts, which are NOPs. + // It is necessary for correctness to skip those that feed into a + // llvm.dbg.declare, as these are not present when debugging is off. + if (isa<DbgInfoIntrinsic>(BBI) || + (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) { + ScanInsts++; + continue; + } + + if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { + // Prev store isn't volatile, and stores to the same location? + if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), + SI.getOperand(1))) { + ++NumDeadStore; + ++BBI; + EraseInstFromFunction(*PrevSI); + continue; + } + break; + } + + // If this is a load, we have to stop. However, if the loaded value is from + // the pointer we're loading and is producing the pointer we're storing, + // then *this* store is dead (X = load P; store X -> P). + if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && + !SI.isVolatile()) + return EraseInstFromFunction(SI); + + // Otherwise, this is a load from some other location. Stores before it + // may not be dead. + break; + } + + // Don't skip over loads or things that can modify memory. + if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) + break; + } + + + if (SI.isVolatile()) return 0; // Don't hack volatile stores. + + // store X, null -> turns into 'unreachable' in SimplifyCFG + if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { + if (!isa<UndefValue>(Val)) { + SI.setOperand(0, UndefValue::get(Val->getType())); + if (Instruction *U = dyn_cast<Instruction>(Val)) + Worklist.Add(U); // Dropped a use. + } + return 0; // Do not modify these! + } + + // store undef, Ptr -> noop + if (isa<UndefValue>(Val)) + return EraseInstFromFunction(SI); + + // If the pointer destination is a cast, see if we can fold the cast into the + // source instead. + if (isa<CastInst>(Ptr)) + if (Instruction *Res = InstCombineStoreToCast(*this, SI)) + return Res; + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + if (CE->isCast()) + if (Instruction *Res = InstCombineStoreToCast(*this, SI)) + return Res; + + + // If this store is the last instruction in the basic block (possibly + // excepting debug info instructions and the pointer bitcasts that feed + // into them), and if the block ends with an unconditional branch, try + // to move it to the successor block. + BBI = &SI; + do { + ++BBI; + } while (isa<DbgInfoIntrinsic>(BBI) || + (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))); + if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) + if (BI->isUnconditional()) + if (SimplifyStoreAtEndOfBlock(SI)) + return 0; // xform done! + + return 0; +} + +/// SimplifyStoreAtEndOfBlock - Turn things like: +/// if () { *P = v1; } else { *P = v2 } +/// into a phi node with a store in the successor. +/// +/// Simplify things like: +/// *P = v1; if () { *P = v2; } +/// into a phi node with a store in the successor. +/// +bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { + BasicBlock *StoreBB = SI.getParent(); + + // Check to see if the successor block has exactly two incoming edges. If + // so, see if the other predecessor contains a store to the same location. + // if so, insert a PHI node (if needed) and move the stores down. + BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); + + // Determine whether Dest has exactly two predecessors and, if so, compute + // the other predecessor. + pred_iterator PI = pred_begin(DestBB); + BasicBlock *OtherBB = 0; + if (*PI != StoreBB) + OtherBB = *PI; + ++PI; + if (PI == pred_end(DestBB)) + return false; + + if (*PI != StoreBB) { + if (OtherBB) + return false; + OtherBB = *PI; + } + if (++PI != pred_end(DestBB)) + return false; + + // Bail out if all the relevant blocks aren't distinct (this can happen, + // for example, if SI is in an infinite loop) + if (StoreBB == DestBB || OtherBB == DestBB) + return false; + + // Verify that the other block ends in a branch and is not otherwise empty. + BasicBlock::iterator BBI = OtherBB->getTerminator(); + BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); + if (!OtherBr || BBI == OtherBB->begin()) + return false; + + // If the other block ends in an unconditional branch, check for the 'if then + // else' case. there is an instruction before the branch. + StoreInst *OtherStore = 0; + if (OtherBr->isUnconditional()) { + --BBI; + // Skip over debugging info. + while (isa<DbgInfoIntrinsic>(BBI) || + (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) { + if (BBI==OtherBB->begin()) + return false; + --BBI; + } + // If this isn't a store, isn't a store to the same location, or if the + // alignments differ, bail out. + OtherStore = dyn_cast<StoreInst>(BBI); + if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || + OtherStore->getAlignment() != SI.getAlignment()) + return false; + } else { + // Otherwise, the other block ended with a conditional branch. If one of the + // destinations is StoreBB, then we have the if/then case. + if (OtherBr->getSuccessor(0) != StoreBB && + OtherBr->getSuccessor(1) != StoreBB) + return false; + + // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an + // if/then triangle. See if there is a store to the same ptr as SI that + // lives in OtherBB. + for (;; --BBI) { + // Check to see if we find the matching store. + if ((OtherStore = dyn_cast<StoreInst>(BBI))) { + if (OtherStore->getOperand(1) != SI.getOperand(1) || + OtherStore->getAlignment() != SI.getAlignment()) + return false; + break; + } + // If we find something that may be using or overwriting the stored + // value, or if we run out of instructions, we can't do the xform. + if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || + BBI == OtherBB->begin()) + return false; + } + + // In order to eliminate the store in OtherBr, we have to + // make sure nothing reads or overwrites the stored value in + // StoreBB. + for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { + // FIXME: This should really be AA driven. + if (I->mayReadFromMemory() || I->mayWriteToMemory()) + return false; + } + } + + // Insert a PHI node now if we need it. + Value *MergedVal = OtherStore->getOperand(0); + if (MergedVal != SI.getOperand(0)) { + PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); + PN->reserveOperandSpace(2); + PN->addIncoming(SI.getOperand(0), SI.getParent()); + PN->addIncoming(OtherStore->getOperand(0), OtherBB); + MergedVal = InsertNewInstBefore(PN, DestBB->front()); + } + + // Advance to a place where it is safe to insert the new store and + // insert it. + BBI = DestBB->getFirstNonPHI(); + InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), + OtherStore->isVolatile(), + SI.getAlignment()), *BBI); + + // Nuke the old stores. + EraseInstFromFunction(SI); + EraseInstFromFunction(*OtherStore); + return true; +} diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp new file mode 100644 index 0000000..6afc0cd --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -0,0 +1,695 @@ +//===- InstCombineMulDivRem.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv, +// srem, urem, frem. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// SubOne - Subtract one from a ConstantInt. +static Constant *SubOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue()-1); +} + +/// MultiplyOverflows - True if the multiply can not be expressed in an int +/// this size. +static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { + uint32_t W = C1->getBitWidth(); + APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); + if (sign) { + LHSExt.sext(W * 2); + RHSExt.sext(W * 2); + } else { + LHSExt.zext(W * 2); + RHSExt.zext(W * 2); + } + + APInt MulExt = LHSExt * RHSExt; + + if (!sign) + return MulExt.ugt(APInt::getLowBitsSet(W * 2, W)); + + APInt Min = APInt::getSignedMinValue(W).sext(W * 2); + APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); + return MulExt.slt(Min) || MulExt.sgt(Max); +} + +Instruction *InstCombiner::visitMul(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (isa<UndefValue>(Op1)) // undef * X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // Simplify mul instructions with a constant RHS. + if (Constant *Op1C = dyn_cast<Constant>(Op1)) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) { + + // ((X << C1)*C2) == (X * (C2 << C1)) + if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) + if (SI->getOpcode() == Instruction::Shl) + if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) + return BinaryOperator::CreateMul(SI->getOperand(0), + ConstantExpr::getShl(CI, ShOp)); + + if (CI->isZero()) + return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 + if (CI->equalsInt(1)) // X * 1 == X + return ReplaceInstUsesWith(I, Op0); + if (CI->isAllOnesValue()) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); + + const APInt& Val = cast<ConstantInt>(CI)->getValue(); + if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C + return BinaryOperator::CreateShl(Op0, + ConstantInt::get(Op0->getType(), Val.logBase2())); + } + } else if (isa<VectorType>(Op1C->getType())) { + if (Op1C->isNullValue()) + return ReplaceInstUsesWith(I, Op1C); + + if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { + if (Op1V->isAllOnesValue()) // X * -1 == 0 - X + return BinaryOperator::CreateNeg(Op0, I.getName()); + + // As above, vector X*splat(1.0) -> X in all defined cases. + if (Constant *Splat = Op1V->getSplatValue()) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat)) + if (CI->equalsInt(1)) + return ReplaceInstUsesWith(I, Op0); + } + } + } + + if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) + if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && + isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) { + // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. + Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); + Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); + return BinaryOperator::CreateAdd(Add, C1C2); + + } + + // Try to fold constant mul into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castNegVal(Op1)) + return BinaryOperator::CreateMul(Op0v, Op1v); + + // (X / Y) * Y = X - (X % Y) + // (X / Y) * -Y = (X % Y) - X + { + Value *Op1C = Op1; + BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0); + if (!BO || + (BO->getOpcode() != Instruction::UDiv && + BO->getOpcode() != Instruction::SDiv)) { + Op1C = Op0; + BO = dyn_cast<BinaryOperator>(Op1); + } + Value *Neg = dyn_castNegVal(Op1C); + if (BO && BO->hasOneUse() && + (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && + (BO->getOpcode() == Instruction::UDiv || + BO->getOpcode() == Instruction::SDiv)) { + Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); + + // If the division is exact, X % Y is zero. + if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO)) + if (SDiv->isExact()) { + if (Op1BO == Op1C) + return ReplaceInstUsesWith(I, Op0BO); + return BinaryOperator::CreateNeg(Op0BO); + } + + Value *Rem; + if (BO->getOpcode() == Instruction::UDiv) + Rem = Builder->CreateURem(Op0BO, Op1BO); + else + Rem = Builder->CreateSRem(Op0BO, Op1BO); + Rem->takeName(BO); + + if (Op1BO == Op1C) + return BinaryOperator::CreateSub(Op0BO, Rem); + return BinaryOperator::CreateSub(Rem, Op0BO); + } + } + + /// i1 mul -> i1 and. + if (I.getType()->isInteger(1)) + return BinaryOperator::CreateAnd(Op0, Op1); + + // X*(1 << Y) --> X << Y + // (1 << Y)*X --> X << Y + { + Value *Y; + if (match(Op0, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op1, Y); + if (match(Op1, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op0, Y); + } + + // If one of the operands of the multiply is a cast from a boolean value, then + // we know the bool is either zero or one, so this is a 'masking' multiply. + // X * Y (where Y is 0 or 1) -> X & (0-Y) + if (!isa<VectorType>(I.getType())) { + // -2 is "-1 << 1" so it is all bits set except the low one. + APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); + + Value *BoolCast = 0, *OtherOp = 0; + if (MaskedValueIsZero(Op0, Negative2)) + BoolCast = Op0, OtherOp = Op1; + else if (MaskedValueIsZero(Op1, Negative2)) + BoolCast = Op1, OtherOp = Op0; + + if (BoolCast) { + Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), + BoolCast, "tmp"); + return BinaryOperator::CreateAnd(V, OtherOp); + } + } + + return Changed ? &I : 0; +} + +Instruction *InstCombiner::visitFMul(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Simplify mul instructions with a constant RHS... + if (Constant *Op1C = dyn_cast<Constant>(Op1)) { + if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) { + // "In IEEE floating point, x*1 is not equivalent to x for nans. However, + // ANSI says we can drop signals, so we can do this anyway." (from GCC) + if (Op1F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' + } else if (isa<VectorType>(Op1C->getType())) { + if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { + // As above, vector X*splat(1.0) -> X in all defined cases. + if (Constant *Splat = Op1V->getSplatValue()) { + if (ConstantFP *F = dyn_cast<ConstantFP>(Splat)) + if (F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); + } + } + } + + // Try to fold constant mul into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castFNegVal(Op1)) + return BinaryOperator::CreateFMul(Op0v, Op1v); + + return Changed ? &I : 0; +} + +/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select +/// instruction. +bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { + SelectInst *SI = cast<SelectInst>(I.getOperand(1)); + + // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y + int NonNullOperand = -1; + if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1))) + if (ST->isNullValue()) + NonNullOperand = 2; + // div/rem X, (Cond ? Y : 0) -> div/rem X, Y + if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2))) + if (ST->isNullValue()) + NonNullOperand = 1; + + if (NonNullOperand == -1) + return false; + + Value *SelectCond = SI->getOperand(0); + + // Change the div/rem to use 'Y' instead of the select. + I.setOperand(1, SI->getOperand(NonNullOperand)); + + // Okay, we know we replace the operand of the div/rem with 'Y' with no + // problem. However, the select, or the condition of the select may have + // multiple uses. Based on our knowledge that the operand must be non-zero, + // propagate the known value for the select into other uses of it, and + // propagate a known value of the condition into its other users. + + // If the select and condition only have a single use, don't bother with this, + // early exit. + if (SI->use_empty() && SelectCond->hasOneUse()) + return true; + + // Scan the current block backward, looking for other uses of SI. + BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); + + while (BBI != BBFront) { + --BBI; + // If we found a call to a function, we can't assume it will return, so + // information from below it cannot be propagated above it. + if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI)) + break; + + // Replace uses of the select or its condition with the known values. + for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); + I != E; ++I) { + if (*I == SI) { + *I = SI->getOperand(NonNullOperand); + Worklist.Add(BBI); + } else if (*I == SelectCond) { + *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) : + ConstantInt::getFalse(BBI->getContext()); + Worklist.Add(BBI); + } + } + + // If we past the instruction, quit looking for it. + if (&*BBI == SI) + SI = 0; + if (&*BBI == SelectCond) + SelectCond = 0; + + // If we ran out of things to eliminate, break out of the loop. + if (SelectCond == 0 && SI == 0) + break; + + } + return true; +} + + +/// This function implements the transforms on div instructions that work +/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is +/// used by the visitors to those instructions. +/// @brief Transforms common to all three div instructions +Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // undef / X -> 0 for integer. + // undef / X -> undef for FP (the undef could be a snan). + if (isa<UndefValue>(Op0)) { + if (Op0->getType()->isFPOrFPVector()) + return ReplaceInstUsesWith(I, Op0); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + + // X / undef -> undef + if (isa<UndefValue>(Op1)) + return ReplaceInstUsesWith(I, Op1); + + return 0; +} + +/// This function implements the transforms common to both integer division +/// instructions (udiv and sdiv). It is called by the visitors to those integer +/// division instructions. +/// @brief Common integer divide transforms +Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // (sdiv X, X) --> 1 (udiv X, X) --> 1 + if (Op0 == Op1) { + if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) { + Constant *CI = ConstantInt::get(Ty->getElementType(), 1); + std::vector<Constant*> Elts(Ty->getNumElements(), CI); + return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); + } + + Constant *CI = ConstantInt::get(I.getType(), 1); + return ReplaceInstUsesWith(I, CI); + } + + if (Instruction *Common = commonDivTransforms(I)) + return Common; + + // Handle cases involving: [su]div X, (select Cond, Y, Z) + // This does not apply for fdiv. + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { + // div X, 1 == X + if (RHS->equalsInt(1)) + return ReplaceInstUsesWith(I, Op0); + + // (X / C1) / C2 -> X / (C1*C2) + if (Instruction *LHS = dyn_cast<Instruction>(Op0)) + if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) + if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) { + if (MultiplyOverflows(RHS, LHSRHS, + I.getOpcode()==Instruction::SDiv)) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + else + return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), + ConstantExpr::getMul(RHS, LHSRHS)); + } + + if (!RHS->isZero()) { // avoid X udiv 0 + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + } + + // 0 / X == 0, we don't need to preserve faults! + if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0)) + if (LHS->equalsInt(0)) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // It can't be division by zero, hence it must be division by one. + if (I.getType()->isInteger(1)) + return ReplaceInstUsesWith(I, Op0); + + if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { + if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue())) + // div X, 1 == X + if (X->isOne()) + return ReplaceInstUsesWith(I, Op0); + } + + return 0; +} + +Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Handle the integer div common cases + if (Instruction *Common = commonIDivTransforms(I)) + return Common; + + if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) { + // X udiv C^2 -> X >> C + // Check to see if this is an unsigned division with an exact power of 2, + // if so, convert to a right shift. + if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 + return BinaryOperator::CreateLShr(Op0, + ConstantInt::get(Op0->getType(), C->getValue().logBase2())); + + // X udiv C, where C >= signbit + if (C->getValue().isNegative()) { + Value *IC = Builder->CreateICmpULT( Op0, C); + return SelectInst::Create(IC, Constant::getNullValue(I.getType()), + ConstantInt::get(I.getType(), 1)); + } + } + + // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) + if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) { + if (RHSI->getOpcode() == Instruction::Shl && + isa<ConstantInt>(RHSI->getOperand(0))) { + const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue(); + if (C1.isPowerOf2()) { + Value *N = RHSI->getOperand(1); + const Type *NTy = N->getType(); + if (uint32_t C2 = C1.logBase2()) + N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp"); + return BinaryOperator::CreateLShr(Op0, N); + } + } + } + + // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) + // where C1&C2 are powers of two. + if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) + if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) + if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { + const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); + if (TVA.isPowerOf2() && FVA.isPowerOf2()) { + // Compute the shift amounts + uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); + // Construct the "on true" case of the select + Constant *TC = ConstantInt::get(Op0->getType(), TSA); + Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t"); + + // Construct the "on false" case of the select + Constant *FC = ConstantInt::get(Op0->getType(), FSA); + Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f"); + + // construct the select instruction and return it. + return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); + } + } + return 0; +} + +Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Handle the integer div common cases + if (Instruction *Common = commonIDivTransforms(I)) + return Common; + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { + // sdiv X, -1 == -X + if (RHS->isAllOnesValue()) + return BinaryOperator::CreateNeg(Op0); + + // sdiv X, C --> ashr X, log2(C) + if (cast<SDivOperator>(&I)->isExact() && + RHS->getValue().isNonNegative() && + RHS->getValue().isPowerOf2()) { + Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), + RHS->getValue().exactLogBase2()); + return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); + } + + // -X/C --> X/-C provided the negation doesn't overflow. + if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) + if (isa<Constant>(Sub->getOperand(0)) && + cast<Constant>(Sub->getOperand(0))->isNullValue() && + Sub->hasNoSignedWrap()) + return BinaryOperator::CreateSDiv(Sub->getOperand(1), + ConstantExpr::getNeg(RHS)); + } + + // If the sign bits of both operands are zero (i.e. we can prove they are + // unsigned inputs), turn this into a udiv. + if (I.getType()->isInteger()) { + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); + if (MaskedValueIsZero(Op0, Mask)) { + if (MaskedValueIsZero(Op1, Mask)) { + // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } + ConstantInt *ShiftedInt; + if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && + ShiftedInt->getValue().isPowerOf2()) { + // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) + // Safe because the only negative value (1 << Y) can take on is + // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have + // the sign bit set. + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { + return commonDivTransforms(I); +} + +/// This function implements the transforms on rem instructions that work +/// regardless of the kind of rem instruction it is (urem, srem, or frem). It +/// is used by the visitors to those instructions. +/// @brief Transforms common to all three rem instructions +Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (isa<UndefValue>(Op0)) { // undef % X -> 0 + if (I.getType()->isFPOrFPVector()) + return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + if (isa<UndefValue>(Op1)) + return ReplaceInstUsesWith(I, Op1); // X % undef -> undef + + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + return 0; +} + +/// This function implements the transforms common to both integer remainder +/// instructions (urem and srem). It is called by the visitors to those integer +/// remainder instructions. +/// @brief Common integer remainder transforms +Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Instruction *common = commonRemTransforms(I)) + return common; + + // 0 % X == 0 for integer, we don't need to preserve faults! + if (Constant *LHS = dyn_cast<Constant>(Op0)) + if (LHS->isNullValue()) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { + // X % 0 == undef, we don't need to preserve faults! + if (RHS->equalsInt(0)) + return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); + + if (RHS->equalsInt(1)) // X % 1 == 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { + if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + } else if (isa<PHINode>(Op0I)) { + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + // See if we can fold away this rem instruction. + if (SimplifyDemandedInstructionBits(I)) + return &I; + } + } + + return 0; +} + +Instruction *InstCombiner::visitURem(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Instruction *common = commonIRemTransforms(I)) + return common; + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { + // X urem C^2 -> X and C + // Check to see if this is an unsigned remainder with an exact power of 2, + // if so, convert to a bitwise and. + if (ConstantInt *C = dyn_cast<ConstantInt>(RHS)) + if (C->getValue().isPowerOf2()) + return BinaryOperator::CreateAnd(Op0, SubOne(C)); + } + + if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) { + // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) + if (RHSI->getOpcode() == Instruction::Shl && + isa<ConstantInt>(RHSI->getOperand(0))) { + if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) { + Constant *N1 = Constant::getAllOnesValue(I.getType()); + Value *Add = Builder->CreateAdd(RHSI, N1, "tmp"); + return BinaryOperator::CreateAnd(Op0, Add); + } + } + } + + // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) + // where C1&C2 are powers of two. + if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) { + if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) + if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { + // STO == 0 and SFO == 0 handled above. + if ((STO->getValue().isPowerOf2()) && + (SFO->getValue().isPowerOf2())) { + Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), + SI->getName()+".t"); + Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), + SI->getName()+".f"); + return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitSRem(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Handle the integer rem common cases + if (Instruction *Common = commonIRemTransforms(I)) + return Common; + + if (Value *RHSNeg = dyn_castNegVal(Op1)) + if (!isa<Constant>(RHSNeg) || + (isa<ConstantInt>(RHSNeg) && + cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) { + // X % -Y -> X % Y + Worklist.AddValue(I.getOperand(1)); + I.setOperand(1, RHSNeg); + return &I; + } + + // If the sign bits of both operands are zero (i.e. we can prove they are + // unsigned inputs), turn this into a urem. + if (I.getType()->isInteger()) { + APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); + if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { + // X srem Y -> X urem Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateURem(Op0, Op1, I.getName()); + } + } + + // If it's a constant vector, flip any negative values positive. + if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) { + unsigned VWidth = RHSV->getNumOperands(); + + bool hasNegative = false; + for (unsigned i = 0; !hasNegative && i != VWidth; ++i) + if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) + if (RHS->getValue().isNegative()) + hasNegative = true; + + if (hasNegative) { + std::vector<Constant *> Elts(VWidth); + for (unsigned i = 0; i != VWidth; ++i) { + if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { + if (RHS->getValue().isNegative()) + Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); + else + Elts[i] = RHS; + } + } + + Constant *NewRHSV = ConstantVector::get(Elts); + if (NewRHSV != RHSV) { + Worklist.AddValue(I.getOperand(1)); + I.setOperand(1, NewRHSV); + return &I; + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitFRem(BinaryOperator &I) { + return commonRemTransforms(I); +} + diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp new file mode 100644 index 0000000..bb7632f --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -0,0 +1,841 @@ +//===- InstCombinePHI.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitPHINode function. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] +/// and if a/b/c and the add's all have a single use, turn this into a phi +/// and a single binop. +Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { + Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); + assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)); + unsigned Opc = FirstInst->getOpcode(); + Value *LHSVal = FirstInst->getOperand(0); + Value *RHSVal = FirstInst->getOperand(1); + + const Type *LHSType = LHSVal->getType(); + const Type *RHSType = RHSVal->getType(); + + // Scan to see if all operands are the same opcode, and all have one use. + for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { + Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); + if (!I || I->getOpcode() != Opc || !I->hasOneUse() || + // Verify type of the LHS matches so we don't fold cmp's of different + // types or GEP's with different index types. + I->getOperand(0)->getType() != LHSType || + I->getOperand(1)->getType() != RHSType) + return 0; + + // If they are CmpInst instructions, check their predicates + if (Opc == Instruction::ICmp || Opc == Instruction::FCmp) + if (cast<CmpInst>(I)->getPredicate() != + cast<CmpInst>(FirstInst)->getPredicate()) + return 0; + + // Keep track of which operand needs a phi node. + if (I->getOperand(0) != LHSVal) LHSVal = 0; + if (I->getOperand(1) != RHSVal) RHSVal = 0; + } + + // If both LHS and RHS would need a PHI, don't do this transformation, + // because it would increase the number of PHIs entering the block, + // which leads to higher register pressure. This is especially + // bad when the PHIs are in the header of a loop. + if (!LHSVal && !RHSVal) + return 0; + + // Otherwise, this is safe to transform! + + Value *InLHS = FirstInst->getOperand(0); + Value *InRHS = FirstInst->getOperand(1); + PHINode *NewLHS = 0, *NewRHS = 0; + if (LHSVal == 0) { + NewLHS = PHINode::Create(LHSType, + FirstInst->getOperand(0)->getName() + ".pn"); + NewLHS->reserveOperandSpace(PN.getNumOperands()/2); + NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); + InsertNewInstBefore(NewLHS, PN); + LHSVal = NewLHS; + } + + if (RHSVal == 0) { + NewRHS = PHINode::Create(RHSType, + FirstInst->getOperand(1)->getName() + ".pn"); + NewRHS->reserveOperandSpace(PN.getNumOperands()/2); + NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); + InsertNewInstBefore(NewRHS, PN); + RHSVal = NewRHS; + } + + // Add all operands to the new PHIs. + if (NewLHS || NewRHS) { + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i)); + if (NewLHS) { + Value *NewInLHS = InInst->getOperand(0); + NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i)); + } + if (NewRHS) { + Value *NewInRHS = InInst->getOperand(1); + NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i)); + } + } + } + + if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) + return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); + CmpInst *CIOp = cast<CmpInst>(FirstInst); + return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + LHSVal, RHSVal); +} + +Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { + GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0)); + + SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), + FirstInst->op_end()); + // This is true if all GEP bases are allocas and if all indices into them are + // constants. + bool AllBasePointersAreAllocas = true; + + // We don't want to replace this phi if the replacement would require + // more than one phi, which leads to higher register pressure. This is + // especially bad when the PHIs are in the header of a loop. + bool NeededPhi = false; + + // Scan to see if all operands are the same opcode, and all have one use. + for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { + GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); + if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || + GEP->getNumOperands() != FirstInst->getNumOperands()) + return 0; + + // Keep track of whether or not all GEPs are of alloca pointers. + if (AllBasePointersAreAllocas && + (!isa<AllocaInst>(GEP->getOperand(0)) || + !GEP->hasAllConstantIndices())) + AllBasePointersAreAllocas = false; + + // Compare the operand lists. + for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { + if (FirstInst->getOperand(op) == GEP->getOperand(op)) + continue; + + // Don't merge two GEPs when two operands differ (introducing phi nodes) + // if one of the PHIs has a constant for the index. The index may be + // substantially cheaper to compute for the constants, so making it a + // variable index could pessimize the path. This also handles the case + // for struct indices, which must always be constant. + if (isa<ConstantInt>(FirstInst->getOperand(op)) || + isa<ConstantInt>(GEP->getOperand(op))) + return 0; + + if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) + return 0; + + // If we already needed a PHI for an earlier operand, and another operand + // also requires a PHI, we'd be introducing more PHIs than we're + // eliminating, which increases register pressure on entry to the PHI's + // block. + if (NeededPhi) + return 0; + + FixedOperands[op] = 0; // Needs a PHI. + NeededPhi = true; + } + } + + // If all of the base pointers of the PHI'd GEPs are from allocas, don't + // bother doing this transformation. At best, this will just save a bit of + // offset calculation, but all the predecessors will have to materialize the + // stack address into a register anyway. We'd actually rather *clone* the + // load up into the predecessors so that we have a load of a gep of an alloca, + // which can usually all be folded into the load. + if (AllBasePointersAreAllocas) + return 0; + + // Otherwise, this is safe to transform. Insert PHI nodes for each operand + // that is variable. + SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size()); + + bool HasAnyPHIs = false; + for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { + if (FixedOperands[i]) continue; // operand doesn't need a phi. + Value *FirstOp = FirstInst->getOperand(i); + PHINode *NewPN = PHINode::Create(FirstOp->getType(), + FirstOp->getName()+".pn"); + InsertNewInstBefore(NewPN, PN); + + NewPN->reserveOperandSpace(e); + NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); + OperandPhis[i] = NewPN; + FixedOperands[i] = NewPN; + HasAnyPHIs = true; + } + + + // Add all operands to the new PHIs. + if (HasAnyPHIs) { + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i)); + BasicBlock *InBB = PN.getIncomingBlock(i); + + for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) + if (PHINode *OpPhi = OperandPhis[op]) + OpPhi->addIncoming(InGEP->getOperand(op), InBB); + } + } + + Value *Base = FixedOperands[0]; + return cast<GEPOperator>(FirstInst)->isInBounds() ? + GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, + FixedOperands.end()) : + GetElementPtrInst::Create(Base, FixedOperands.begin()+1, + FixedOperands.end()); +} + + +/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to +/// sink the load out of the block that defines it. This means that it must be +/// obvious the value of the load is not changed from the point of the load to +/// the end of the block it is in. +/// +/// Finally, it is safe, but not profitable, to sink a load targetting a +/// non-address-taken alloca. Doing so will cause us to not promote the alloca +/// to a register. +static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { + BasicBlock::iterator BBI = L, E = L->getParent()->end(); + + for (++BBI; BBI != E; ++BBI) + if (BBI->mayWriteToMemory()) + return false; + + // Check for non-address taken alloca. If not address-taken already, it isn't + // profitable to do this xform. + if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) { + bool isAddressTaken = false; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) { + if (isa<LoadInst>(UI)) continue; + if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + // If storing TO the alloca, then the address isn't taken. + if (SI->getOperand(1) == AI) continue; + } + isAddressTaken = true; + break; + } + + if (!isAddressTaken && AI->isStaticAlloca()) + return false; + } + + // If this load is a load from a GEP with a constant offset from an alloca, + // then we don't want to sink it. In its present form, it will be + // load [constant stack offset]. Sinking it will cause us to have to + // materialize the stack addresses in each predecessor in a register only to + // do a shared load from register in the successor. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0))) + if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0))) + if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) + return false; + + return true; +} + +Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { + LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0)); + + // When processing loads, we need to propagate two bits of information to the + // sunk load: whether it is volatile, and what its alignment is. We currently + // don't sink loads when some have their alignment specified and some don't. + // visitLoadInst will propagate an alignment onto the load when TD is around, + // and if TD isn't around, we can't handle the mixed case. + bool isVolatile = FirstLI->isVolatile(); + unsigned LoadAlignment = FirstLI->getAlignment(); + + // We can't sink the load if the loaded value could be modified between the + // load and the PHI. + if (FirstLI->getParent() != PN.getIncomingBlock(0) || + !isSafeAndProfitableToSinkLoad(FirstLI)) + return 0; + + // If the PHI is of volatile loads and the load block has multiple + // successors, sinking it would remove a load of the volatile value from + // the path through the other successor. + if (isVolatile && + FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) + return 0; + + // Check to see if all arguments are the same operation. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); + if (!LI || !LI->hasOneUse()) + return 0; + + // We can't sink the load if the loaded value could be modified between + // the load and the PHI. + if (LI->isVolatile() != isVolatile || + LI->getParent() != PN.getIncomingBlock(i) || + !isSafeAndProfitableToSinkLoad(LI)) + return 0; + + // If some of the loads have an alignment specified but not all of them, + // we can't do the transformation. + if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) + return 0; + + LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); + + // If the PHI is of volatile loads and the load block has multiple + // successors, sinking it would remove a load of the volatile value from + // the path through the other successor. + if (isVolatile && + LI->getParent()->getTerminator()->getNumSuccessors() != 1) + return 0; + } + + // Okay, they are all the same operation. Create a new PHI node of the + // correct type, and PHI together all of the LHS's of the instructions. + PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), + PN.getName()+".in"); + NewPN->reserveOperandSpace(PN.getNumOperands()/2); + + Value *InVal = FirstLI->getOperand(0); + NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); + + // Add all operands to the new PHI. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0); + if (NewInVal != InVal) + InVal = 0; + NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); + } + + Value *PhiVal; + if (InVal) { + // The new PHI unions all of the same values together. This is really + // common, so we handle it intelligently here for compile-time speed. + PhiVal = InVal; + delete NewPN; + } else { + InsertNewInstBefore(NewPN, PN); + PhiVal = NewPN; + } + + // If this was a volatile load that we are merging, make sure to loop through + // and mark all the input loads as non-volatile. If we don't do this, we will + // insert a new volatile load and the old ones will not be deletable. + if (isVolatile) + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false); + + return new LoadInst(PhiVal, "", isVolatile, LoadAlignment); +} + + + +/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" +/// operator and they all are only used by the PHI, PHI together their +/// inputs, and do the operation once, to the result of the PHI. +Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { + Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); + + if (isa<GetElementPtrInst>(FirstInst)) + return FoldPHIArgGEPIntoPHI(PN); + if (isa<LoadInst>(FirstInst)) + return FoldPHIArgLoadIntoPHI(PN); + + // Scan the instruction, looking for input operations that can be folded away. + // If all input operands to the phi are the same instruction (e.g. a cast from + // the same type or "+42") we can pull the operation through the PHI, reducing + // code size and simplifying code. + Constant *ConstantOp = 0; + const Type *CastSrcTy = 0; + + if (isa<CastInst>(FirstInst)) { + CastSrcTy = FirstInst->getOperand(0)->getType(); + + // Be careful about transforming integer PHIs. We don't want to pessimize + // the code by turning an i32 into an i1293. + if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) { + if (!ShouldChangeType(PN.getType(), CastSrcTy)) + return 0; + } + } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) { + // Can fold binop, compare or shift here if the RHS is a constant, + // otherwise call FoldPHIArgBinOpIntoPHI. + ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); + if (ConstantOp == 0) + return FoldPHIArgBinOpIntoPHI(PN); + } else { + return 0; // Cannot fold this operation. + } + + // Check to see if all arguments are the same operation. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); + if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) + return 0; + if (CastSrcTy) { + if (I->getOperand(0)->getType() != CastSrcTy) + return 0; // Cast operation must match. + } else if (I->getOperand(1) != ConstantOp) { + return 0; + } + } + + // Okay, they are all the same operation. Create a new PHI node of the + // correct type, and PHI together all of the LHS's of the instructions. + PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), + PN.getName()+".in"); + NewPN->reserveOperandSpace(PN.getNumOperands()/2); + + Value *InVal = FirstInst->getOperand(0); + NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); + + // Add all operands to the new PHI. + for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { + Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0); + if (NewInVal != InVal) + InVal = 0; + NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); + } + + Value *PhiVal; + if (InVal) { + // The new PHI unions all of the same values together. This is really + // common, so we handle it intelligently here for compile-time speed. + PhiVal = InVal; + delete NewPN; + } else { + InsertNewInstBefore(NewPN, PN); + PhiVal = NewPN; + } + + // Insert and return the new operation. + if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) + return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); + + if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) + return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); + + CmpInst *CIOp = cast<CmpInst>(FirstInst); + return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + PhiVal, ConstantOp); +} + +/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle +/// that is dead. +static bool DeadPHICycle(PHINode *PN, + SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) { + if (PN->use_empty()) return true; + if (!PN->hasOneUse()) return false; + + // Remember this node, and if we find the cycle, return. + if (!PotentiallyDeadPHIs.insert(PN)) + return true; + + // Don't scan crazily complex things. + if (PotentiallyDeadPHIs.size() == 16) + return false; + + if (PHINode *PU = dyn_cast<PHINode>(PN->use_back())) + return DeadPHICycle(PU, PotentiallyDeadPHIs); + + return false; +} + +/// PHIsEqualValue - Return true if this phi node is always equal to +/// NonPhiInVal. This happens with mutually cyclic phi nodes like: +/// z = some value; x = phi (y, z); y = phi (x, z) +static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, + SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) { + // See if we already saw this PHI node. + if (!ValueEqualPHIs.insert(PN)) + return true; + + // Don't scan crazily complex things. + if (ValueEqualPHIs.size() == 16) + return false; + + // Scan the operands to see if they are either phi nodes or are equal to + // the value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Op = PN->getIncomingValue(i); + if (PHINode *OpPN = dyn_cast<PHINode>(Op)) { + if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) + return false; + } else if (Op != NonPhiInVal) + return false; + } + + return true; +} + + +namespace { +struct PHIUsageRecord { + unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) + unsigned Shift; // The amount shifted. + Instruction *Inst; // The trunc instruction. + + PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) + : PHIId(pn), Shift(Sh), Inst(User) {} + + bool operator<(const PHIUsageRecord &RHS) const { + if (PHIId < RHS.PHIId) return true; + if (PHIId > RHS.PHIId) return false; + if (Shift < RHS.Shift) return true; + if (Shift > RHS.Shift) return false; + return Inst->getType()->getPrimitiveSizeInBits() < + RHS.Inst->getType()->getPrimitiveSizeInBits(); + } +}; + +struct LoweredPHIRecord { + PHINode *PN; // The PHI that was lowered. + unsigned Shift; // The amount shifted. + unsigned Width; // The width extracted. + + LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) + : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} + + // Ctor form used by DenseMap. + LoweredPHIRecord(PHINode *pn, unsigned Sh) + : PN(pn), Shift(Sh), Width(0) {} +}; +} + +namespace llvm { + template<> + struct DenseMapInfo<LoweredPHIRecord> { + static inline LoweredPHIRecord getEmptyKey() { + return LoweredPHIRecord(0, 0); + } + static inline LoweredPHIRecord getTombstoneKey() { + return LoweredPHIRecord(0, 1); + } + static unsigned getHashValue(const LoweredPHIRecord &Val) { + return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ + (Val.Width>>3); + } + static bool isEqual(const LoweredPHIRecord &LHS, + const LoweredPHIRecord &RHS) { + return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && + LHS.Width == RHS.Width; + } + }; + template <> + struct isPodLike<LoweredPHIRecord> { static const bool value = true; }; +} + + +/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an +/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If +/// so, we split the PHI into the various pieces being extracted. This sort of +/// thing is introduced when SROA promotes an aggregate to large integer values. +/// +/// TODO: The user of the trunc may be an bitcast to float/double/vector or an +/// inttoptr. We should produce new PHIs in the right type. +/// +Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { + // PHIUsers - Keep track of all of the truncated values extracted from a set + // of PHIs, along with their offset. These are the things we want to rewrite. + SmallVector<PHIUsageRecord, 16> PHIUsers; + + // PHIs are often mutually cyclic, so we keep track of a whole set of PHI + // nodes which are extracted from. PHIsToSlice is a set we use to avoid + // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to + // check the uses of (to ensure they are all extracts). + SmallVector<PHINode*, 8> PHIsToSlice; + SmallPtrSet<PHINode*, 8> PHIsInspected; + + PHIsToSlice.push_back(&FirstPhi); + PHIsInspected.insert(&FirstPhi); + + for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { + PHINode *PN = PHIsToSlice[PHIId]; + + // Scan the input list of the PHI. If any input is an invoke, and if the + // input is defined in the predecessor, then we won't be split the critical + // edge which is required to insert a truncate. Because of this, we have to + // bail out. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); + if (II == 0) continue; + if (II->getParent() != PN->getIncomingBlock(i)) + continue; + + // If we have a phi, and if it's directly in the predecessor, then we have + // a critical edge where we need to put the truncate. Since we can't + // split the edge in instcombine, we have to bail out. + return 0; + } + + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + // If the user is a PHI, inspect its uses recursively. + if (PHINode *UserPN = dyn_cast<PHINode>(User)) { + if (PHIsInspected.insert(UserPN)) + PHIsToSlice.push_back(UserPN); + continue; + } + + // Truncates are always ok. + if (isa<TruncInst>(User)) { + PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); + continue; + } + + // Otherwise it must be a lshr which can only be used by one trunc. + if (User->getOpcode() != Instruction::LShr || + !User->hasOneUse() || !isa<TruncInst>(User->use_back()) || + !isa<ConstantInt>(User->getOperand(1))) + return 0; + + unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue(); + PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); + } + } + + // If we have no users, they must be all self uses, just nuke the PHI. + if (PHIUsers.empty()) + return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); + + // If this phi node is transformable, create new PHIs for all the pieces + // extracted out of it. First, sort the users by their offset and size. + array_pod_sort(PHIUsers.begin(), PHIUsers.end()); + + DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; + for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) + errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; + ); + + // PredValues - This is a temporary used when rewriting PHI nodes. It is + // hoisted out here to avoid construction/destruction thrashing. + DenseMap<BasicBlock*, Value*> PredValues; + + // ExtractedVals - Each new PHI we introduce is saved here so we don't + // introduce redundant PHIs. + DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals; + + for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { + unsigned PHIId = PHIUsers[UserI].PHIId; + PHINode *PN = PHIsToSlice[PHIId]; + unsigned Offset = PHIUsers[UserI].Shift; + const Type *Ty = PHIUsers[UserI].Inst->getType(); + + PHINode *EltPHI; + + // If we've already lowered a user like this, reuse the previously lowered + // value. + if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { + + // Otherwise, Create the new PHI node for this user. + EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); + assert(EltPHI->getType() != PN->getType() && + "Truncate didn't shrink phi?"); + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + Value *&PredVal = PredValues[Pred]; + + // If we already have a value for this predecessor, reuse it. + if (PredVal) { + EltPHI->addIncoming(PredVal, Pred); + continue; + } + + // Handle the PHI self-reuse case. + Value *InVal = PN->getIncomingValue(i); + if (InVal == PN) { + PredVal = EltPHI; + EltPHI->addIncoming(PredVal, Pred); + continue; + } + + if (PHINode *InPHI = dyn_cast<PHINode>(PN)) { + // If the incoming value was a PHI, and if it was one of the PHIs we + // already rewrote it, just use the lowered value. + if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { + PredVal = Res; + EltPHI->addIncoming(PredVal, Pred); + continue; + } + } + + // Otherwise, do an extract in the predecessor. + Builder->SetInsertPoint(Pred, Pred->getTerminator()); + Value *Res = InVal; + if (Offset) + Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), + Offset), "extract"); + Res = Builder->CreateTrunc(Res, Ty, "extract.t"); + PredVal = Res; + EltPHI->addIncoming(Res, Pred); + + // If the incoming value was a PHI, and if it was one of the PHIs we are + // rewriting, we will ultimately delete the code we inserted. This + // means we need to revisit that PHI to make sure we extract out the + // needed piece. + if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i))) + if (PHIsInspected.count(OldInVal)) { + unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), + OldInVal)-PHIsToSlice.begin(); + PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, + cast<Instruction>(Res))); + ++UserE; + } + } + PredValues.clear(); + + DEBUG(errs() << " Made element PHI for offset " << Offset << ": " + << *EltPHI << '\n'); + ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; + } + + // Replace the use of this piece with the PHI node. + ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); + } + + // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) + // with undefs. + Value *Undef = UndefValue::get(FirstPhi.getType()); + for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) + ReplaceInstUsesWith(*PHIsToSlice[i], Undef); + return ReplaceInstUsesWith(FirstPhi, Undef); +} + +// PHINode simplification +// +Instruction *InstCombiner::visitPHINode(PHINode &PN) { + // If LCSSA is around, don't mess with Phi nodes + if (MustPreserveLCSSA) return 0; + + if (Value *V = PN.hasConstantValue()) + return ReplaceInstUsesWith(PN, V); + + // If all PHI operands are the same operation, pull them through the PHI, + // reducing code size. + if (isa<Instruction>(PN.getIncomingValue(0)) && + isa<Instruction>(PN.getIncomingValue(1)) && + cast<Instruction>(PN.getIncomingValue(0))->getOpcode() == + cast<Instruction>(PN.getIncomingValue(1))->getOpcode() && + // FIXME: The hasOneUse check will fail for PHIs that use the value more + // than themselves more than once. + PN.getIncomingValue(0)->hasOneUse()) + if (Instruction *Result = FoldPHIArgOpIntoPHI(PN)) + return Result; + + // If this is a trivial cycle in the PHI node graph, remove it. Basically, if + // this PHI only has a single use (a PHI), and if that PHI only has one use (a + // PHI)... break the cycle. + if (PN.hasOneUse()) { + Instruction *PHIUser = cast<Instruction>(PN.use_back()); + if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) { + SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs; + PotentiallyDeadPHIs.insert(&PN); + if (DeadPHICycle(PU, PotentiallyDeadPHIs)) + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); + } + + // If this phi has a single use, and if that use just computes a value for + // the next iteration of a loop, delete the phi. This occurs with unused + // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this + // common case here is good because the only other things that catch this + // are induction variable analysis (sometimes) and ADCE, which is only run + // late. + if (PHIUser->hasOneUse() && + (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) && + PHIUser->use_back() == &PN) { + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); + } + } + + // We sometimes end up with phi cycles that non-obviously end up being the + // same value, for example: + // z = some value; x = phi (y, z); y = phi (x, z) + // where the phi nodes don't necessarily need to be in the same block. Do a + // quick check to see if the PHI node only contains a single non-phi value, if + // so, scan to see if the phi cycle is actually equal to that value. + { + unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); + // Scan for the first non-phi operand. + while (InValNo != NumOperandVals && + isa<PHINode>(PN.getIncomingValue(InValNo))) + ++InValNo; + + if (InValNo != NumOperandVals) { + Value *NonPhiInVal = PN.getOperand(InValNo); + + // Scan the rest of the operands to see if there are any conflicts, if so + // there is no need to recursively scan other phis. + for (++InValNo; InValNo != NumOperandVals; ++InValNo) { + Value *OpVal = PN.getIncomingValue(InValNo); + if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal)) + break; + } + + // If we scanned over all operands, then we have one unique value plus + // phi values. Scan PHI nodes to see if they all merge in each other or + // the value. + if (InValNo == NumOperandVals) { + SmallPtrSet<PHINode*, 16> ValueEqualPHIs; + if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) + return ReplaceInstUsesWith(PN, NonPhiInVal); + } + } + } + + // If there are multiple PHIs, sort their operands so that they all list + // the blocks in the same order. This will help identical PHIs be eliminated + // by other passes. Other passes shouldn't depend on this for correctness + // however. + PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin()); + if (&PN != FirstPN) + for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *BBA = PN.getIncomingBlock(i); + BasicBlock *BBB = FirstPN->getIncomingBlock(i); + if (BBA != BBB) { + Value *VA = PN.getIncomingValue(i); + unsigned j = PN.getBasicBlockIndex(BBB); + Value *VB = PN.getIncomingValue(j); + PN.setIncomingBlock(i, BBB); + PN.setIncomingValue(i, VB); + PN.setIncomingBlock(j, BBA); + PN.setIncomingValue(j, VA); + // NOTE: Instcombine normally would want us to "return &PN" if we + // modified any of the operands of an instruction. However, since we + // aren't adding or removing uses (just rearranging them) we don't do + // this in this case. + } + } + + // If this is an integer PHI and we know that it has an illegal type, see if + // it is only used by trunc or trunc(lshr) operations. If so, we split the + // PHI into the various pieces being extracted. This sort of thing is + // introduced when SROA promotes an aggregate to a single large integer type. + if (isa<IntegerType>(PN.getType()) && TD && + !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) + if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) + return Res; + + return 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp new file mode 100644 index 0000000..18b2dff --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -0,0 +1,703 @@ +//===- InstCombineSelect.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitSelect function. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, +/// returning the kind and providing the out parameter results if we +/// successfully match. +static SelectPatternFlavor +MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { + SelectInst *SI = dyn_cast<SelectInst>(V); + if (SI == 0) return SPF_UNKNOWN; + + ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); + if (ICI == 0) return SPF_UNKNOWN; + + LHS = ICI->getOperand(0); + RHS = ICI->getOperand(1); + + // (icmp X, Y) ? X : Y + if (SI->getTrueValue() == ICI->getOperand(0) && + SI->getFalseValue() == ICI->getOperand(1)) { + switch (ICI->getPredicate()) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMAX; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMAX; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMIN; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMIN; + } + } + + // (icmp X, Y) ? Y : X + if (SI->getTrueValue() == ICI->getOperand(1) && + SI->getFalseValue() == ICI->getOperand(0)) { + switch (ICI->getPredicate()) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMIN; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMIN; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMAX; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMAX; + } + } + + // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) + + return SPF_UNKNOWN; +} + + +/// GetSelectFoldableOperands - We want to turn code that looks like this: +/// %C = or %A, %B +/// %D = select %cond, %C, %A +/// into: +/// %C = select %cond, %B, 0 +/// %D = or %A, %C +/// +/// Assuming that the specified instruction is an operand to the select, return +/// a bitmask indicating which operands of this instruction are foldable if they +/// equal the other incoming value of the select. +/// +static unsigned GetSelectFoldableOperands(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return 3; // Can fold through either operand. + case Instruction::Sub: // Can only fold on the amount subtracted. + case Instruction::Shl: // Can only fold on the shift amount. + case Instruction::LShr: + case Instruction::AShr: + return 1; + default: + return 0; // Cannot fold + } +} + +/// GetSelectFoldableConstant - For the same transformation as the previous +/// function, return the identity constant that goes into the select. +static Constant *GetSelectFoldableConstant(Instruction *I) { + switch (I->getOpcode()) { + default: llvm_unreachable("This cannot happen!"); + case Instruction::Add: + case Instruction::Sub: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + return Constant::getNullValue(I->getType()); + case Instruction::And: + return Constant::getAllOnesValue(I->getType()); + case Instruction::Mul: + return ConstantInt::get(I->getType(), 1); + } +} + +/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI +/// have the same opcode and only one use each. Try to simplify this. +Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, + Instruction *FI) { + if (TI->getNumOperands() == 1) { + // If this is a non-volatile load or a cast from the same type, + // merge. + if (TI->isCast()) { + if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) + return 0; + } else { + return 0; // unknown unary op. + } + + // Fold this by inserting a select from the input values. + SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), + FI->getOperand(0), SI.getName()+".v"); + InsertNewInstBefore(NewSI, SI); + return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, + TI->getType()); + } + + // Only handle binary operators here. + if (!isa<BinaryOperator>(TI)) + return 0; + + // Figure out if the operations have any operands in common. + Value *MatchOp, *OtherOpT, *OtherOpF; + bool MatchIsOpZero; + if (TI->getOperand(0) == FI->getOperand(0)) { + MatchOp = TI->getOperand(0); + OtherOpT = TI->getOperand(1); + OtherOpF = FI->getOperand(1); + MatchIsOpZero = true; + } else if (TI->getOperand(1) == FI->getOperand(1)) { + MatchOp = TI->getOperand(1); + OtherOpT = TI->getOperand(0); + OtherOpF = FI->getOperand(0); + MatchIsOpZero = false; + } else if (!TI->isCommutative()) { + return 0; + } else if (TI->getOperand(0) == FI->getOperand(1)) { + MatchOp = TI->getOperand(0); + OtherOpT = TI->getOperand(1); + OtherOpF = FI->getOperand(0); + MatchIsOpZero = true; + } else if (TI->getOperand(1) == FI->getOperand(0)) { + MatchOp = TI->getOperand(1); + OtherOpT = TI->getOperand(0); + OtherOpF = FI->getOperand(1); + MatchIsOpZero = true; + } else { + return 0; + } + + // If we reach here, they do have operations in common. + SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT, + OtherOpF, SI.getName()+".v"); + InsertNewInstBefore(NewSI, SI); + + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) { + if (MatchIsOpZero) + return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI); + else + return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); + } + llvm_unreachable("Shouldn't get here"); + return 0; +} + +static bool isSelect01(Constant *C1, Constant *C2) { + ConstantInt *C1I = dyn_cast<ConstantInt>(C1); + if (!C1I) + return false; + ConstantInt *C2I = dyn_cast<ConstantInt>(C2); + if (!C2I) + return false; + return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne()); +} + +/// FoldSelectIntoOp - Try fold the select into one of the operands to +/// facilitate further optimization. +Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, + Value *FalseVal) { + // See the comment above GetSelectFoldableOperands for a description of the + // transformation we are doing here. + if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) { + if (TVI->hasOneUse() && TVI->getNumOperands() == 2 && + !isa<Constant>(FalseVal)) { + if (unsigned SFO = GetSelectFoldableOperands(TVI)) { + unsigned OpToFold = 0; + if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { + OpToFold = 1; + } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { + OpToFold = 2; + } + + if (OpToFold) { + Constant *C = GetSelectFoldableConstant(TVI); + Value *OOp = TVI->getOperand(2-OpToFold); + // Avoid creating select between 2 constants unless it's selecting + // between 0 and 1. + if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { + Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); + InsertNewInstBefore(NewSel, SI); + NewSel->takeName(TVI); + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI)) + return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); + llvm_unreachable("Unknown instruction!!"); + } + } + } + } + } + + if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) { + if (FVI->hasOneUse() && FVI->getNumOperands() == 2 && + !isa<Constant>(TrueVal)) { + if (unsigned SFO = GetSelectFoldableOperands(FVI)) { + unsigned OpToFold = 0; + if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { + OpToFold = 1; + } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { + OpToFold = 2; + } + + if (OpToFold) { + Constant *C = GetSelectFoldableConstant(FVI); + Value *OOp = FVI->getOperand(2-OpToFold); + // Avoid creating select between 2 constants unless it's selecting + // between 0 and 1. + if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { + Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); + InsertNewInstBefore(NewSel, SI); + NewSel->takeName(FVI); + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI)) + return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); + llvm_unreachable("Unknown instruction!!"); + } + } + } + } + } + + return 0; +} + +/// visitSelectInstWithICmp - Visit a SelectInst that has an +/// ICmpInst as its first operand. +/// +Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, + ICmpInst *ICI) { + bool Changed = false; + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + + // Check cases where the comparison is with a constant that + // can be adjusted to fit the min/max idiom. We may edit ICI in + // place here, so make sure the select is the only user. + if (ICI->hasOneUse()) + if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) { + switch (Pred) { + default: break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: { + // X < MIN ? T : F --> F + if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) + return ReplaceInstUsesWith(SI, FalseVal); + // X < C ? X : C-1 --> X > C-1 ? C-1 : X + Constant *AdjustedRHS = + ConstantInt::get(CI->getContext(), CI->getValue()-1); + if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || + (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { + Pred = ICmpInst::getSwappedPredicate(Pred); + CmpRHS = AdjustedRHS; + std::swap(FalseVal, TrueVal); + ICI->setPredicate(Pred); + ICI->setOperand(1, CmpRHS); + SI.setOperand(1, TrueVal); + SI.setOperand(2, FalseVal); + Changed = true; + } + break; + } + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: { + // X > MAX ? T : F --> F + if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) + return ReplaceInstUsesWith(SI, FalseVal); + // X > C ? X : C+1 --> X < C+1 ? C+1 : X + Constant *AdjustedRHS = + ConstantInt::get(CI->getContext(), CI->getValue()+1); + if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || + (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { + Pred = ICmpInst::getSwappedPredicate(Pred); + CmpRHS = AdjustedRHS; + std::swap(FalseVal, TrueVal); + ICI->setPredicate(Pred); + ICI->setOperand(1, CmpRHS); + SI.setOperand(1, TrueVal); + SI.setOperand(2, FalseVal); + Changed = true; + } + break; + } + } + + // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed + // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed + CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; + if (match(TrueVal, m_ConstantInt<-1>()) && + match(FalseVal, m_ConstantInt<0>())) + Pred = ICI->getPredicate(); + else if (match(TrueVal, m_ConstantInt<0>()) && + match(FalseVal, m_ConstantInt<-1>())) + Pred = CmpInst::getInversePredicate(ICI->getPredicate()); + + if (Pred != CmpInst::BAD_ICMP_PREDICATE) { + // If we are just checking for a icmp eq of a single bit and zext'ing it + // to an integer, then shift the bit to the appropriate place and then + // cast to integer to avoid the comparison. + const APInt &Op1CV = CI->getValue(); + + // sext (x <s 0) to i32 --> x>>s31 true if signbit set. + // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. + if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) || + (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { + Value *In = ICI->getOperand(0); + Value *Sh = ConstantInt::get(In->getType(), + In->getType()->getScalarSizeInBits()-1); + In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, + In->getName()+".lobit"), + *ICI); + if (In->getType() != SI.getType()) + In = CastInst::CreateIntegerCast(In, SI.getType(), + true/*SExt*/, "tmp", ICI); + + if (Pred == ICmpInst::ICMP_SGT) + In = InsertNewInstBefore(BinaryOperator::CreateNot(In, + In->getName()+".not"), *ICI); + + return ReplaceInstUsesWith(SI, In); + } + } + } + + if (CmpLHS == TrueVal && CmpRHS == FalseVal) { + // Transform (X == Y) ? X : Y -> Y + if (Pred == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(SI, FalseVal); + // Transform (X != Y) ? X : Y -> X + if (Pred == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(SI, TrueVal); + /// NOTE: if we wanted to, this is where to detect integer MIN/MAX + + } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) { + // Transform (X == Y) ? Y : X -> X + if (Pred == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(SI, FalseVal); + // Transform (X != Y) ? Y : X -> Y + if (Pred == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(SI, TrueVal); + /// NOTE: if we wanted to, this is where to detect integer MIN/MAX + } + return Changed ? &SI : 0; +} + + +/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a +/// PHI node (but the two may be in different blocks). See if the true/false +/// values (V) are live in all of the predecessor blocks of the PHI. For +/// example, cases like this cannot be mapped: +/// +/// X = phi [ C1, BB1], [C2, BB2] +/// Y = add +/// Z = select X, Y, 0 +/// +/// because Y is not live in BB1/BB2. +/// +static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, + const SelectInst &SI) { + // If the value is a non-instruction value like a constant or argument, it + // can always be mapped. + const Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return true; + + // If V is a PHI node defined in the same block as the condition PHI, we can + // map the arguments. + const PHINode *CondPHI = cast<PHINode>(SI.getCondition()); + + if (const PHINode *VP = dyn_cast<PHINode>(I)) + if (VP->getParent() == CondPHI->getParent()) + return true; + + // Otherwise, if the PHI and select are defined in the same block and if V is + // defined in a different block, then we can transform it. + if (SI.getParent() == CondPHI->getParent() && + I->getParent() != CondPHI->getParent()) + return true; + + // Otherwise we have a 'hard' case and we can't tell without doing more + // detailed dominator based analysis, punt. + return false; +} + +/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: +/// SPF2(SPF1(A, B), C) +Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, + SelectPatternFlavor SPF1, + Value *A, Value *B, + Instruction &Outer, + SelectPatternFlavor SPF2, Value *C) { + if (C == A || C == B) { + // MAX(MAX(A, B), B) -> MAX(A, B) + // MIN(MIN(a, b), a) -> MIN(a, b) + if (SPF1 == SPF2) + return ReplaceInstUsesWith(Outer, Inner); + + // MAX(MIN(a, b), a) -> a + // MIN(MAX(a, b), a) -> a + if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || + (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || + (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || + (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) + return ReplaceInstUsesWith(Outer, C); + } + + // TODO: MIN(MIN(A, 23), 97) + return 0; +} + + + + +Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { + Value *CondVal = SI.getCondition(); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + + // select true, X, Y -> X + // select false, X, Y -> Y + if (ConstantInt *C = dyn_cast<ConstantInt>(CondVal)) + return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal); + + // select C, X, X -> X + if (TrueVal == FalseVal) + return ReplaceInstUsesWith(SI, TrueVal); + + if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X + return ReplaceInstUsesWith(SI, FalseVal); + if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X + return ReplaceInstUsesWith(SI, TrueVal); + if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y + if (isa<Constant>(TrueVal)) + return ReplaceInstUsesWith(SI, TrueVal); + else + return ReplaceInstUsesWith(SI, FalseVal); + } + + if (SI.getType()->isInteger(1)) { + if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) { + if (C->getZExtValue()) { + // Change: A = select B, true, C --> A = or B, C + return BinaryOperator::CreateOr(CondVal, FalseVal); + } else { + // Change: A = select B, false, C --> A = and !B, C + Value *NotCond = + InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, + "not."+CondVal->getName()), SI); + return BinaryOperator::CreateAnd(NotCond, FalseVal); + } + } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { + if (C->getZExtValue() == false) { + // Change: A = select B, C, false --> A = and B, C + return BinaryOperator::CreateAnd(CondVal, TrueVal); + } else { + // Change: A = select B, C, true --> A = or !B, C + Value *NotCond = + InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, + "not."+CondVal->getName()), SI); + return BinaryOperator::CreateOr(NotCond, TrueVal); + } + } + + // select a, b, a -> a&b + // select a, a, b -> a|b + if (CondVal == TrueVal) + return BinaryOperator::CreateOr(CondVal, FalseVal); + else if (CondVal == FalseVal) + return BinaryOperator::CreateAnd(CondVal, TrueVal); + } + + // Selecting between two integer constants? + if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal)) + if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) { + // select C, 1, 0 -> zext C to int + if (FalseValC->isZero() && TrueValC->getValue() == 1) { + return CastInst::Create(Instruction::ZExt, CondVal, SI.getType()); + } else if (TrueValC->isZero() && FalseValC->getValue() == 1) { + // select C, 0, 1 -> zext !C to int + Value *NotCond = + InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, + "not."+CondVal->getName()), SI); + return CastInst::Create(Instruction::ZExt, NotCond, SI.getType()); + } + + if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) { + // If one of the constants is zero (we know they can't both be) and we + // have an icmp instruction with zero, and we have an 'and' with the + // non-constant value, eliminate this whole mess. This corresponds to + // cases like this: ((X & 27) ? 27 : 0) + if (TrueValC->isZero() || FalseValC->isZero()) + if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) && + cast<Constant>(IC->getOperand(1))->isNullValue()) + if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0))) + if (ICA->getOpcode() == Instruction::And && + isa<ConstantInt>(ICA->getOperand(1)) && + (ICA->getOperand(1) == TrueValC || + ICA->getOperand(1) == FalseValC) && + cast<ConstantInt>(ICA->getOperand(1))->getValue().isPowerOf2()) { + // Okay, now we know that everything is set up, we just don't + // know whether we have a icmp_ne or icmp_eq and whether the + // true or false val is the zero. + bool ShouldNotVal = !TrueValC->isZero(); + ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; + Value *V = ICA; + if (ShouldNotVal) + V = InsertNewInstBefore(BinaryOperator::Create( + Instruction::Xor, V, ICA->getOperand(1)), SI); + return ReplaceInstUsesWith(SI, V); + } + } + } + + // See if we are selecting two values based on a comparison of the two values. + if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) { + if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { + // Transform (X == Y) ? X : Y -> Y + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, FalseVal); + } + // Transform (X != Y) ? X : Y -> X + if (FCI->getPredicate() == FCmpInst::FCMP_ONE) + return ReplaceInstUsesWith(SI, TrueVal); + // NOTE: if we wanted to, this is where to detect MIN/MAX + + } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ + // Transform (X == Y) ? Y : X -> X + if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { + // This is not safe in general for floating point: + // consider X== -0, Y== +0. + // It becomes safe if either operand is a nonzero constant. + ConstantFP *CFPt, *CFPf; + if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && + !CFPt->getValueAPF().isZero()) || + ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && + !CFPf->getValueAPF().isZero())) + return ReplaceInstUsesWith(SI, FalseVal); + } + // Transform (X != Y) ? Y : X -> Y + if (FCI->getPredicate() == FCmpInst::FCMP_ONE) + return ReplaceInstUsesWith(SI, TrueVal); + // NOTE: if we wanted to, this is where to detect MIN/MAX + } + // NOTE: if we wanted to, this is where to detect ABS + } + + // See if we are selecting two values based on a comparison of the two values. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal)) + if (Instruction *Result = visitSelectInstWithICmp(SI, ICI)) + return Result; + + if (Instruction *TI = dyn_cast<Instruction>(TrueVal)) + if (Instruction *FI = dyn_cast<Instruction>(FalseVal)) + if (TI->hasOneUse() && FI->hasOneUse()) { + Instruction *AddOp = 0, *SubOp = 0; + + // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) + if (TI->getOpcode() == FI->getOpcode()) + if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) + return IV; + + // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is + // even legal for FP. + if ((TI->getOpcode() == Instruction::Sub && + FI->getOpcode() == Instruction::Add) || + (TI->getOpcode() == Instruction::FSub && + FI->getOpcode() == Instruction::FAdd)) { + AddOp = FI; SubOp = TI; + } else if ((FI->getOpcode() == Instruction::Sub && + TI->getOpcode() == Instruction::Add) || + (FI->getOpcode() == Instruction::FSub && + TI->getOpcode() == Instruction::FAdd)) { + AddOp = TI; SubOp = FI; + } + + if (AddOp) { + Value *OtherAddOp = 0; + if (SubOp->getOperand(0) == AddOp->getOperand(0)) { + OtherAddOp = AddOp->getOperand(1); + } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { + OtherAddOp = AddOp->getOperand(0); + } + + if (OtherAddOp) { + // So at this point we know we have (Y -> OtherAddOp): + // select C, (add X, Y), (sub X, Z) + Value *NegVal; // Compute -Z + if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) { + NegVal = ConstantExpr::getNeg(C); + } else { + NegVal = InsertNewInstBefore( + BinaryOperator::CreateNeg(SubOp->getOperand(1), + "tmp"), SI); + } + + Value *NewTrueOp = OtherAddOp; + Value *NewFalseOp = NegVal; + if (AddOp != TI) + std::swap(NewTrueOp, NewFalseOp); + Instruction *NewSel = + SelectInst::Create(CondVal, NewTrueOp, + NewFalseOp, SI.getName() + ".p"); + + NewSel = InsertNewInstBefore(NewSel, SI); + return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); + } + } + } + + // See if we can fold the select into one of our operands. + if (SI.getType()->isInteger()) { + if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) + return FoldI; + + // MAX(MAX(a, b), a) -> MAX(a, b) + // MIN(MIN(a, b), a) -> MIN(a, b) + // MAX(MIN(a, b), a) -> a + // MIN(MAX(a, b), a) -> a + Value *LHS, *RHS, *LHS2, *RHS2; + if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) { + if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2)) + if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, + SI, SPF, RHS)) + return R; + if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2)) + if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2, + SI, SPF, LHS)) + return R; + } + + // TODO. + // ABS(-X) -> ABS(X) + // ABS(ABS(X)) -> ABS(X) + } + + // See if we can fold the select into a phi node if the condition is a select. + if (isa<PHINode>(SI.getCondition())) + // The true/false values have to be live in the PHI predecessor's blocks. + if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && + CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) + if (Instruction *NV = FoldOpIntoPhi(SI)) + return NV; + + if (BinaryOperator::isNot(CondVal)) { + SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); + SI.setOperand(1, FalseVal); + SI.setOperand(2, TrueVal); + return &SI; + } + + return 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp new file mode 100644 index 0000000..fe91da1 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -0,0 +1,427 @@ +//===- InstCombineShifts.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the visitShl, visitLShr, and visitAShr functions. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace PatternMatch; + +Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { + assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // shl X, 0 == X and shr X, 0 == X + // shl 0, X == 0 and shr 0, X == 0 + if (Op1 == Constant::getNullValue(Op1->getType()) || + Op0 == Constant::getNullValue(Op0->getType())) + return ReplaceInstUsesWith(I, Op0); + + if (isa<UndefValue>(Op0)) { + if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef + return ReplaceInstUsesWith(I, Op0); + else // undef << X -> 0, undef >>u X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + if (isa<UndefValue>(Op1)) { + if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X + return ReplaceInstUsesWith(I, Op0); + else // X << undef, X >>u undef -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + } + + // See if we can fold away this shift. + if (SimplifyDemandedInstructionBits(I)) + return &I; + + // Try to fold constant and into select arguments. + if (isa<Constant>(Op0)) + if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + + if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) + if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) + return Res; + return 0; +} + +Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + BinaryOperator &I) { + bool isLeftShift = I.getOpcode() == Instruction::Shl; + + // See if we can simplify any instructions used by the instruction whose sole + // purpose is to compute bits we don't care about. + uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); + + // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate + // a signed shift. + // + if (Op1->uge(TypeBits)) { + if (I.getOpcode() != Instruction::AShr) + return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); + else { + I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); + return &I; + } + } + + // ((X*C1) << C2) == (X * (C1 << C2)) + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) + if (BO->getOpcode() == Instruction::Mul && isLeftShift) + if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) + return BinaryOperator::CreateMul(BO->getOperand(0), + ConstantExpr::getShl(BOOp, Op1)); + + // Try to fold constant and into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + + // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) + if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { + Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); + // If 'shift2' is an ashr, we would have to get the sign bit into a funny + // place. Don't try to do this transformation in this case. Also, we + // require that the input operand is a shift-by-constant so that we have + // confidence that the shifts will get folded together. We could do this + // xform in more cases, but it is unlikely to be profitable. + if (TrOp && I.isLogicalShift() && TrOp->isShift() && + isa<ConstantInt>(TrOp->getOperand(1))) { + // Okay, we'll do this xform. Make the shift of shift. + Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); + // (shift2 (shift1 & 0x00FF), c2) + Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); + + // For logical shifts, the truncation has the effect of making the high + // part of the register be zeros. Emulate this by inserting an AND to + // clear the top bits as needed. This 'and' will usually be zapped by + // other xforms later if dead. + unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); + unsigned DstSize = TI->getType()->getScalarSizeInBits(); + APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); + + // The mask we constructed says what the trunc would do if occurring + // between the shifts. We want to know the effect *after* the second + // shift. We know that it is a logical shift by a constant, so adjust the + // mask as appropriate. + if (I.getOpcode() == Instruction::Shl) + MaskV <<= Op1->getZExtValue(); + else { + assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); + MaskV = MaskV.lshr(Op1->getZExtValue()); + } + + // shift1 & 0x00FF + Value *And = Builder->CreateAnd(NSh, + ConstantInt::get(I.getContext(), MaskV), + TI->getName()); + + // Return the value truncated to the interesting size. + return new TruncInst(And, I.getType()); + } + } + + if (Op0->hasOneUse()) { + if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { + // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) + Value *V1, *V2; + ConstantInt *CC; + switch (Op0BO->getOpcode()) { + default: break; + case Instruction::Add: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // These operators commute. + // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) + if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && + match(Op0BO->getOperand(1), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + } + + // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) + Value *Op0BOOp1 = Op0BO->getOperand(1); + if (isLeftShift && Op0BOOp1->hasOneUse() && + match(Op0BOOp1, + m_And(m_Shr(m_Value(V1), m_Specific(Op1)), + m_ConstantInt(CC))) && + cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, + Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); + return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); + } + } + + // FALL THROUGH. + case Instruction::Sub: { + // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) + if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && + match(Op0BO->getOperand(0), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); + uint32_t Op1Val = Op1->getLimitedValue(TypeBits); + return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), + APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + } + + // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) + if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && + match(Op0BO->getOperand(0), + m_And(m_Shr(m_Value(V1), m_Value(V2)), + m_ConstantInt(CC))) && V2 == Op1 && + cast<BinaryOperator>(Op0BO->getOperand(0)) + ->getOperand(0)->hasOneUse()) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); + + return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); + } + + break; + } + } + + + // If the operand is an bitwise operator with a constant RHS, and the + // shift is the only use, we can pull it out of the shift. + if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { + bool isValid = true; // Valid only for And, Or, Xor + bool highBitSet = false; // Transform if high bit of constant set? + + switch (Op0BO->getOpcode()) { + default: isValid = false; break; // Do not perform transform! + case Instruction::Add: + isValid = isLeftShift; + break; + case Instruction::Or: + case Instruction::Xor: + highBitSet = false; + break; + case Instruction::And: + highBitSet = true; + break; + } + + // If this is a signed shift right, and the high bit is modified + // by the logical operation, do not perform the transformation. + // The highBitSet boolean indicates the value of the high bit of + // the constant which would cause it to be modified for this + // operation. + // + if (isValid && I.getOpcode() == Instruction::AShr) + isValid = Op0C->getValue()[TypeBits-1] == highBitSet; + + if (isValid) { + Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); + + Value *NewShift = + Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); + NewShift->takeName(Op0BO); + + return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, + NewRHS); + } + } + } + } + + // Find out if this is a shift of a shift by a constant. + BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); + if (ShiftOp && !ShiftOp->isShift()) + ShiftOp = 0; + + if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { + ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); + uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); + uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); + assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); + if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. + Value *X = ShiftOp->getOperand(0); + + uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. + + const IntegerType *Ty = cast<IntegerType>(I.getType()); + + // Check for (X << c1) << c2 and (X >> c1) >> c2 + if (I.getOpcode() == ShiftOp->getOpcode()) { + // If this is oversized composite shift, then unsigned shifts get 0, ashr + // saturates. + if (AmtSum >= TypeBits) { + if (I.getOpcode() != Instruction::AShr) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. + } + + return BinaryOperator::Create(I.getOpcode(), X, + ConstantInt::get(Ty, AmtSum)); + } + + if (ShiftOp->getOpcode() == Instruction::LShr && + I.getOpcode() == Instruction::AShr) { + if (AmtSum >= TypeBits) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + + // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. + return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); + } + + if (ShiftOp->getOpcode() == Instruction::AShr && + I.getOpcode() == Instruction::LShr) { + // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. + if (AmtSum >= TypeBits) + AmtSum = TypeBits-1; + + Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(), Mask)); + } + + // Okay, if we get here, one shift must be left, and the other shift must be + // right. See if the amounts are equal. + if (ShiftAmt1 == ShiftAmt2) { + // If we have ((X >>? C) << C), turn this into X & (-1 << C). + if (I.getOpcode() == Instruction::Shl) { + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::CreateAnd(X, + ConstantInt::get(I.getContext(),Mask)); + } + // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). + if (I.getOpcode() == Instruction::LShr) { + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); + return BinaryOperator::CreateAnd(X, + ConstantInt::get(I.getContext(), Mask)); + } + } else if (ShiftAmt1 < ShiftAmt2) { + uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; + + // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) + if (I.getOpcode() == Instruction::Shl) { + assert(ShiftOp->getOpcode() == Instruction::LShr || + ShiftOp->getOpcode() == Instruction::AShr); + Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) + if (I.getOpcode() == Instruction::LShr) { + assert(ShiftOp->getOpcode() == Instruction::Shl); + Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. + } else { + assert(ShiftAmt2 < ShiftAmt1); + uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; + + // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) + if (I.getOpcode() == Instruction::Shl) { + assert(ShiftOp->getOpcode() == Instruction::LShr || + ShiftOp->getOpcode() == Instruction::AShr); + Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, + ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) + if (I.getOpcode() == Instruction::LShr) { + assert(ShiftOp->getOpcode() == Instruction::Shl); + Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(I.getContext(),Mask)); + } + + // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. + } + } + return 0; +} + +Instruction *InstCombiner::visitShl(BinaryOperator &I) { + return commonShiftTransforms(I); +} + +Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + return commonShiftTransforms(I); +} + +Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Instruction *R = commonShiftTransforms(I)) + return R; + + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) { + // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) + if (CSI->isAllOnesValue()) + return ReplaceInstUsesWith(I, CSI); + } + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + // If the input is a SHL by the same constant (ashr (shl X, C), C), then we + // have a sign-extend idiom. If the input value is known to already be sign + // extended enough, delete the extension. + Value *X; + if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) && + ComputeNumSignBits(X) > Op1C->getZExtValue()) + return ReplaceInstUsesWith(I, X); + } + + // See if we can turn a signed shr into an unsigned shr. + if (MaskedValueIsZero(Op0, + APInt::getSignBit(I.getType()->getScalarSizeInBits()))) + return BinaryOperator::CreateLShr(Op0, Op1); + + // Arithmetic shifting an all-sign-bit value is a no-op. + unsigned NumSignBits = ComputeNumSignBits(Op0); + if (NumSignBits == Op0->getType()->getScalarSizeInBits()) + return ReplaceInstUsesWith(I, Op0); + + return 0; +} + diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp new file mode 100644 index 0000000..74a1b68 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -0,0 +1,1106 @@ +//===- InstCombineSimplifyDemanded.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains logic for simplifying instructions based on information +// about how they are used. +// +//===----------------------------------------------------------------------===// + + +#include "InstCombine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/IntrinsicInst.h" + +using namespace llvm; + + +/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// specified instruction is a constant integer. If so, check to see if there +/// are any bits set in the constant that are not demanded. If so, shrink the +/// constant and return true. +static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, + APInt Demanded) { + assert(I && "No instruction?"); + assert(OpNo < I->getNumOperands() && "Operand index too large"); + + // If the operand is not a constant integer, nothing to do. + ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo)); + if (!OpC) return false; + + // If there are no bits set that aren't demanded, nothing to do. + Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); + if ((~Demanded & OpC->getValue()) == 0) + return false; + + // This instruction is producing bits that are not demanded. Shrink the RHS. + Demanded &= OpC->getValue(); + I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded)); + return true; +} + + + +/// SimplifyDemandedInstructionBits - Inst is an integer instruction that +/// SimplifyDemandedBits knows about. See if the instruction has any +/// properties that allow us to simplify its operands. +bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { + unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); + + Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, + KnownZero, KnownOne, 0); + if (V == 0) return false; + if (V == &Inst) return true; + ReplaceInstUsesWith(Inst, V); + return true; +} + +/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the +/// specified instruction operand if possible, updating it in place. It returns +/// true if it made any change and false otherwise. +bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) { + Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, + KnownZero, KnownOne, Depth); + if (NewVal == 0) return false; + U = NewVal; + return true; +} + + +/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler +/// value based on the demanded bits. When this function is called, it is known +/// that only the bits set in DemandedMask of the result of V are ever used +/// downstream. Consequently, depending on the mask and V, it may be possible +/// to replace V with a constant or one of its operands. In such cases, this +/// function does the replacement and returns true. In all other cases, it +/// returns false after analyzing the expression and setting KnownOne and known +/// to be one in the expression. KnownZero contains all the bits that are known +/// to be zero in the expression. These are provided to potentially allow the +/// caller (which might recursively be SimplifyDemandedBits itself) to simplify +/// the expression. KnownOne and KnownZero always follow the invariant that +/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that +/// the bits in KnownOne and KnownZero may only be accurate for those bits set +/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero +/// and KnownOne must all be the same. +/// +/// This returns null if it did not change anything and it permits no +/// simplification. This returns V itself if it did some simplification of V's +/// operands based on the information about what bits are demanded. This returns +/// some other non-null value if it found out that V is equal to another value +/// in the context where the specified bits are demanded, but not for all users. +Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) { + assert(V != 0 && "Null pointer of Value???"); + assert(Depth <= 6 && "Limit Search Depth"); + uint32_t BitWidth = DemandedMask.getBitWidth(); + const Type *VTy = V->getType(); + assert((TD || !isa<PointerType>(VTy)) && + "SimplifyDemandedBits needs to know bit widths!"); + assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && + (!VTy->isIntOrIntVector() || + VTy->getScalarSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "Value *V, DemandedMask, KnownZero and KnownOne " + "must have same BitWidth"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getValue() & DemandedMask; + KnownZero = ~KnownOne & DemandedMask; + return 0; + } + if (isa<ConstantPointerNull>(V)) { + // We know all of the bits for a constant! + KnownOne.clear(); + KnownZero = DemandedMask; + return 0; + } + + KnownZero.clear(); + KnownOne.clear(); + if (DemandedMask == 0) { // Not demanding any bits from V. + if (isa<UndefValue>(V)) + return 0; + return UndefValue::get(VTy); + } + + if (Depth == 6) // Limit search depth. + return 0; + + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) { + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + return 0; // Only analyze instructions. + } + + // If there are multiple uses of this value and we aren't at the root, then + // we can't do any simplifications of the operands, because DemandedMask + // only reflects the bits demanded by *one* of the users. + if (Depth != 0 && !I->hasOneUse()) { + // Despite the fact that we can't simplify this instruction in all User's + // context, we can at least compute the knownzero/knownone bits, and we can + // do simplifications that apply to *just* the one user if we know that + // this instruction has a simpler value in that context. + if (I->getOpcode() == Instruction::And) { + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(I->getOperand(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero, + LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and' in this + // context. + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + (DemandedMask & ~LHSKnownZero)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + (DemandedMask & ~RHSKnownZero)) + return I->getOperand(1); + + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) + return Constant::getNullValue(VTy); + + } else if (I->getOpcode() == Instruction::Or) { + // We can simplify (X|Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + // If either the LHS or the RHS are One, the result is One. + ComputeMaskedBits(I->getOperand(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, + LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known zero on one side, return the + // other. These bits cannot contribute to the result of the 'or' in this + // context. + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + (DemandedMask & ~LHSKnownOne)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + (DemandedMask & ~RHSKnownOne)) + return I->getOperand(1); + + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + (DemandedMask & (~RHSKnownZero))) + return I->getOperand(0); + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + (DemandedMask & (~LHSKnownZero))) + return I->getOperand(1); + } + + // Compute the KnownZero/KnownOne bits to simplify things downstream. + ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); + return 0; + } + + // If this is the root being simplified, allow it to have multiple uses, + // just set the DemandedMask to all bits so that we can try to simplify the + // operands. This allows visitTruncInst (for example) to simplify the + // operand of a trunc without duplicating all the logic below. + if (Depth == 0 && !V->hasOneUse()) + DemandedMask = APInt::getAllOnesValue(BitWidth); + + switch (I->getOpcode()) { + default: + ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + break; + case Instruction::And: + // If either the LHS or the RHS are Zero, the result is zero. + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + (DemandedMask & ~LHSKnownZero)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + (DemandedMask & ~RHSKnownZero)) + return I->getOperand(1); + + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) + return Constant::getNullValue(VTy); + + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) + return I; + + // Output known-1 bits are only known if set in both the LHS & RHS. + RHSKnownOne &= LHSKnownOne; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + RHSKnownZero |= LHSKnownZero; + break; + case Instruction::Or: + // If either the LHS or the RHS are One, the result is One. + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + (DemandedMask & ~LHSKnownOne)) + return I->getOperand(0); + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + (DemandedMask & ~RHSKnownOne)) + return I->getOperand(1); + + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + (DemandedMask & (~RHSKnownZero))) + return I->getOperand(0); + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + (DemandedMask & (~LHSKnownZero))) + return I->getOperand(1); + + // If the RHS is a constant, see if we can simplify it. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return I; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + RHSKnownZero &= LHSKnownZero; + // Output known-1 are known to be set if set in either the LHS | RHS. + RHSKnownOne |= LHSKnownOne; + break; + case Instruction::Xor: { + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((DemandedMask & RHSKnownZero) == DemandedMask) + return I->getOperand(0); + if ((DemandedMask & LHSKnownZero) == DemandedMask) + return I->getOperand(1); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | + (RHSKnownOne & LHSKnownOne); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | + (RHSKnownOne & LHSKnownZero); + + // If all of the demanded bits are known to be zero on one side or the + // other, turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { + Instruction *Or = + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstBefore(Or, *I); + } + + // If all of the demanded bits on one side are known, and all of the set + // bits on that side are also known to be set on the other side, turn this + // into an AND, as we know the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { + // all known + if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { + Constant *AndC = Constant::getIntegerValue(VTy, + ~RHSKnownOne & DemandedMask); + Instruction *And = + BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); + return InsertNewInstBefore(And, *I); + } + } + + // If the RHS is a constant, see if we can simplify it. + // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return I; + + // If our LHS is an 'and' and if it has one use, and if any of the bits we + // are flipping are known to be set, then the xor is just resetting those + // bits to zero. We can just knock out bits from the 'and' and the 'xor', + // simplifying both of them. + if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) + if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && + isa<ConstantInt>(I->getOperand(1)) && + isa<ConstantInt>(LHSInst->getOperand(1)) && + (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { + ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1)); + ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1)); + APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); + + Constant *AndC = + ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); + Instruction *NewAnd = + BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); + InsertNewInstBefore(NewAnd, *I); + + Constant *XorC = + ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); + Instruction *NewXor = + BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); + return InsertNewInstBefore(NewXor, *I); + } + + + RHSKnownZero = KnownZeroOut; + RHSKnownOne = KnownOneOut; + break; + } + case Instruction::Select: + if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (ShrinkDemandedConstant(I, 1, DemandedMask) || + ShrinkDemandedConstant(I, 2, DemandedMask)) + return I; + + // Only known if known in both the LHS and RHS. + RHSKnownOne &= LHSKnownOne; + RHSKnownZero &= LHSKnownZero; + break; + case Instruction::Trunc: { + unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); + DemandedMask.zext(truncBf); + RHSKnownZero.zext(truncBf); + RHSKnownOne.zext(truncBf); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + DemandedMask.trunc(BitWidth); + RHSKnownZero.trunc(BitWidth); + RHSKnownOne.trunc(BitWidth); + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + break; + } + case Instruction::BitCast: + if (!I->getOperand(0)->getType()->isIntOrIntVector()) + return false; // vector->int or fp->int? + + if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { + if (const VectorType *SrcVTy = + dyn_cast<VectorType>(I->getOperand(0)->getType())) { + if (DstVTy->getNumElements() != SrcVTy->getNumElements()) + // Don't touch a bitcast between vectors of different element counts. + return false; + } else + // Don't touch a scalar-to-vector bitcast. + return false; + } else if (isa<VectorType>(I->getOperand(0)->getType())) + // Don't touch a vector-to-scalar bitcast. + return false; + + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + break; + case Instruction::ZExt: { + // Compute the bits in the result that are not present in the input. + unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); + + DemandedMask.trunc(SrcBitWidth); + RHSKnownZero.trunc(SrcBitWidth); + RHSKnownOne.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + DemandedMask.zext(BitWidth); + RHSKnownZero.zext(BitWidth); + RHSKnownOne.zext(BitWidth); + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + // The top bits are known to be zero. + RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + break; + } + case Instruction::SExt: { + // Compute the bits in the result that are not present in the input. + unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); + + APInt InputDemandedBits = DemandedMask & + APInt::getLowBitsSet(BitWidth, SrcBitWidth); + + APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); + // If any of the sign extended bits are demanded, we know that the sign + // bit is demanded. + if ((NewBits & DemandedMask) != 0) + InputDemandedBits.set(SrcBitWidth-1); + + InputDemandedBits.trunc(SrcBitWidth); + RHSKnownZero.trunc(SrcBitWidth); + RHSKnownOne.trunc(SrcBitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + InputDemandedBits.zext(BitWidth); + RHSKnownZero.zext(BitWidth); + RHSKnownOne.zext(BitWidth); + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, or if the NewBits are not demanded + // convert this into a zero extension. + if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { + // Convert to ZExt cast + CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); + return InsertNewInstBefore(NewCast, *I); + } else if (RHSKnownOne[SrcBitWidth-1]) { // Input sign bit known set + RHSKnownOne |= NewBits; + } + break; + } + case Instruction::Add: { + // Figure out what the input bits are. If the top bits of the and result + // are not demanded, then the add doesn't demand them from its input + // either. + unsigned NLZ = DemandedMask.countLeadingZeros(); + + // If there is a constant on the RHS, there are a variety of xformations + // we can do. + if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { + // If null, this should be simplified elsewhere. Some of the xforms here + // won't work if the RHS is zero. + if (RHS->isZero()) + break; + + // If the top bit of the output is demanded, demand everything from the + // input. Otherwise, we demand all the input bits except NLZ top bits. + APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ)); + + // Find information about known zero/one bits in the input. + if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + + // If the RHS of the add has bits set that can't affect the input, reduce + // the constant. + if (ShrinkDemandedConstant(I, 1, InDemandedBits)) + return I; + + // Avoid excess work. + if (LHSKnownZero == 0 && LHSKnownOne == 0) + break; + + // Turn it into OR if input bits are zero. + if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { + Instruction *Or = + BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), + I->getName()); + return InsertNewInstBefore(Or, *I); + } + + // We can say something about the output known-zero and known-one bits, + // depending on potential carries from the input constant and the + // unknowns. For example if the LHS is known to have at most the 0x0F0F0 + // bits set and the RHS constant is 0x01001, then we know we have a known + // one mask of 0x00001 and a known zero mask of 0xE0F0E. + + // To compute this, we first compute the potential carry bits. These are + // the bits which may be modified. I'm not aware of a better way to do + // this scan. + const APInt &RHSVal = RHS->getValue(); + APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal)); + + // Now that we know which bits have carries, compute the known-1/0 sets. + + // Bits are known one if they are known zero in one operand and one in the + // other, and there is no input carry. + RHSKnownOne = ((LHSKnownZero & RHSVal) | + (LHSKnownOne & ~RHSVal)) & ~CarryBits; + + // Bits are known zero if they are known zero in both operands and there + // is no input carry. + RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; + } else { + // If the high-bits of this ADD are not demanded, then it does not demand + // the high bits of its LHS or RHS. + if (DemandedMask[BitWidth-1] == 0) { + // Right fill the mask of bits for this ADD to demand the most + // significant bit and all those below it. + APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + } + } + break; + } + case Instruction::Sub: + // If the high-bits of this SUB are not demanded, then it does not demand + // the high bits of its LHS or RHS. + if (DemandedMask[BitWidth-1] == 0) { + // Right fill the mask of bits for this SUB to demand the most + // significant bit and all those below it. + uint32_t NLZ = DemandedMask.countLeadingZeros(); + APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + } + // Otherwise just hand the sub off to ComputeMaskedBits to fill in + // the known zeros and ones. + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + break; + case Instruction::Shl: + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + RHSKnownZero <<= ShiftAmt; + RHSKnownOne <<= ShiftAmt; + // low bits known zero. + if (ShiftAmt) + RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::LShr: + // For a logical shift right + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Unsigned shift right. + APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); + RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); + if (ShiftAmt) { + // Compute the new bits that are at the top now. + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + RHSKnownZero |= HighBits; // high bits known zero. + } + } + break; + case Instruction::AShr: + // If this is an arithmetic shift right and only the low-bit is set, we can + // always convert this into a logical shr, even if the shift amount is + // variable. The low bit of the shift cannot be an input sign bit unless + // the shift amount is >= the size of the datatype, which is undefined. + if (DemandedMask == 1) { + // Perform the logical shift right. + Instruction *NewVal = BinaryOperator::CreateLShr( + I->getOperand(0), I->getOperand(1), I->getName()); + return InsertNewInstBefore(NewVal, *I); + } + + // If the sign bit is the only bit demanded by this ashr, then there is no + // need to do it, the shift doesn't change the high bit. + if (DemandedMask.isSignBit()) + return I->getOperand(0); + + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Signed shift right. + APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); + // If any of the "high bits" are demanded, we should set the sign bit as + // demanded. + if (DemandedMask.countLeadingZeros() <= ShiftAmt) + DemandedMaskIn.set(BitWidth-1); + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + RHSKnownZero, RHSKnownOne, Depth+1)) + return I; + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + // Compute the new bits that are at the top now. + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); + RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); + + // Handle the sign bits. + APInt SignBit(APInt::getSignBit(BitWidth)); + // Adjust to where it is now in the mask. + SignBit = APIntOps::lshr(SignBit, ShiftAmt); + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || + (HighBits & ~DemandedMask) == HighBits) { + // Perform the logical shift right. + Instruction *NewVal = BinaryOperator::CreateLShr( + I->getOperand(0), SA, I->getName()); + return InsertNewInstBefore(NewVal, *I); + } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one. + RHSKnownOne |= HighBits; + } + } + break; + case Instruction::SRem: + if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + APInt RA = Rem->getValue().abs(); + if (RA.isPowerOf2()) { + if (DemandedMask.ult(RA)) // srem won't affect demanded bits + return I->getOperand(0); + + APInt LowBits = RA - 1; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, + LHSKnownZero, LHSKnownOne, Depth+1)) + return I; + + if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) + LHSKnownZero |= ~LowBits; + + KnownZero |= LHSKnownZero & DemandedMask; + + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + } + } + break; + case Instruction::URem: { + APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, + KnownZero2, KnownOne2, Depth+1) || + SimplifyDemandedBits(I->getOperandUse(1), AllOnes, + KnownZero2, KnownOne2, Depth+1)) + return I; + + unsigned Leaders = KnownZero2.countLeadingOnes(); + Leaders = std::max(Leaders, + KnownZero2.countLeadingOnes()); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; + break; + } + case Instruction::Call: + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::bswap: { + // If the only bits demanded come from one byte of the bswap result, + // just shift the input byte into position to eliminate the bswap. + unsigned NLZ = DemandedMask.countLeadingZeros(); + unsigned NTZ = DemandedMask.countTrailingZeros(); + + // Round NTZ down to the next byte. If we have 11 trailing zeros, then + // we need all the bits down to bit 8. Likewise, round NLZ. If we + // have 14 leading zeros, round to 8. + NLZ &= ~7; + NTZ &= ~7; + // If we need exactly one byte, we can do this transformation. + if (BitWidth-NLZ-NTZ == 8) { + unsigned ResultBit = NTZ; + unsigned InputBit = BitWidth-NTZ-8; + + // Replace this with either a left or right shift to get the byte into + // the right place. + Instruction *NewVal; + if (InputBit > ResultBit) + NewVal = BinaryOperator::CreateLShr(I->getOperand(1), + ConstantInt::get(I->getType(), InputBit-ResultBit)); + else + NewVal = BinaryOperator::CreateShl(I->getOperand(1), + ConstantInt::get(I->getType(), ResultBit-InputBit)); + NewVal->takeName(I); + return InsertNewInstBefore(NewVal, *I); + } + + // TODO: Could compute known zero/one bits based on the input. + break; + } + } + } + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + break; + } + + // If the client is only demanding bits that we know, return the known + // constant. + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) + return Constant::getIntegerValue(VTy, RHSKnownOne); + return false; +} + + +/// SimplifyDemandedVectorElts - The specified value produces a vector with +/// any number of elements. DemandedElts contains the set of elements that are +/// actually used by the caller. This method analyzes which elements of the +/// operand are undef and returns that information in UndefElts. +/// +/// If the information about demanded elements can be used to simplify the +/// operation, the operation is simplified, then the resultant value is +/// returned. This returns null if no change was made. +Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, + APInt& UndefElts, + unsigned Depth) { + unsigned VWidth = cast<VectorType>(V->getType())->getNumElements(); + APInt EltMask(APInt::getAllOnesValue(VWidth)); + assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); + + if (isa<UndefValue>(V)) { + // If the entire vector is undefined, just return this info. + UndefElts = EltMask; + return 0; + } else if (DemandedElts == 0) { // If nothing is demanded, provide undef. + UndefElts = EltMask; + return UndefValue::get(V->getType()); + } + + UndefElts = 0; + if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) { + const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); + Constant *Undef = UndefValue::get(EltTy); + + std::vector<Constant*> Elts; + for (unsigned i = 0; i != VWidth; ++i) + if (!DemandedElts[i]) { // If not demanded, set to undef. + Elts.push_back(Undef); + UndefElts.set(i); + } else if (isa<UndefValue>(CP->getOperand(i))) { // Already undef. + Elts.push_back(Undef); + UndefElts.set(i); + } else { // Otherwise, defined. + Elts.push_back(CP->getOperand(i)); + } + + // If we changed the constant, return it. + Constant *NewCP = ConstantVector::get(Elts); + return NewCP != CP ? NewCP : 0; + } else if (isa<ConstantAggregateZero>(V)) { + // Simplify the CAZ to a ConstantVector where the non-demanded elements are + // set to undef. + + // Check if this is identity. If so, return 0 since we are not simplifying + // anything. + if (DemandedElts == ((1ULL << VWidth) -1)) + return 0; + + const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); + Constant *Zero = Constant::getNullValue(EltTy); + Constant *Undef = UndefValue::get(EltTy); + std::vector<Constant*> Elts; + for (unsigned i = 0; i != VWidth; ++i) { + Constant *Elt = DemandedElts[i] ? Zero : Undef; + Elts.push_back(Elt); + } + UndefElts = DemandedElts ^ EltMask; + return ConstantVector::get(Elts); + } + + // Limit search depth. + if (Depth == 10) + return 0; + + // If multiple users are using the root value, procede with + // simplification conservatively assuming that all elements + // are needed. + if (!V->hasOneUse()) { + // Quit if we find multiple users of a non-root value though. + // They'll be handled when it's their turn to be visited by + // the main instcombine process. + if (Depth != 0) + // TODO: Just compute the UndefElts information recursively. + return 0; + + // Conservatively assume that all elements are needed. + DemandedElts = EltMask; + } + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return 0; // Only analyze instructions. + + bool MadeChange = false; + APInt UndefElts2(VWidth, 0); + Value *TmpV; + switch (I->getOpcode()) { + default: break; + + case Instruction::InsertElement: { + // If this is a variable index, we don't know which element it overwrites. + // demand exactly the same input as we produce. + ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2)); + if (Idx == 0) { + // Note that we can't propagate undef elt info, because we don't know + // which elt is getting updated. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + break; + } + + // If this is inserting an element that isn't demanded, remove this + // insertelement. + unsigned IdxNo = Idx->getZExtValue(); + if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { + Worklist.Add(I); + return I->getOperand(0); + } + + // Otherwise, the element inserted overwrites whatever was there, so the + // input demanded set is simpler than the output set. + APInt DemandedElts2 = DemandedElts; + DemandedElts2.clear(IdxNo); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + + // The inserted element is defined. + UndefElts.clear(IdxNo); + break; + } + case Instruction::ShuffleVector: { + ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); + uint64_t LHSVWidth = + cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements(); + APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0); + for (unsigned i = 0; i < VWidth; i++) { + if (DemandedElts[i]) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal != -1u) { + assert(MaskVal < LHSVWidth * 2 && + "shufflevector mask index out of range!"); + if (MaskVal < LHSVWidth) + LeftDemanded.set(MaskVal); + else + RightDemanded.set(MaskVal - LHSVWidth); + } + } + } + + APInt UndefElts4(LHSVWidth, 0); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, + UndefElts4, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + + APInt UndefElts3(LHSVWidth, 0); + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, + UndefElts3, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + bool NewUndefElts = false; + for (unsigned i = 0; i < VWidth; i++) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal == -1u) { + UndefElts.set(i); + } else if (MaskVal < LHSVWidth) { + if (UndefElts4[MaskVal]) { + NewUndefElts = true; + UndefElts.set(i); + } + } else { + if (UndefElts3[MaskVal - LHSVWidth]) { + NewUndefElts = true; + UndefElts.set(i); + } + } + } + + if (NewUndefElts) { + // Add additional discovered undefs. + std::vector<Constant*> Elts; + for (unsigned i = 0; i < VWidth; ++i) { + if (UndefElts[i]) + Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext()))); + else + Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()), + Shuffle->getMaskValue(i))); + } + I->setOperand(2, ConstantVector::get(Elts)); + MadeChange = true; + } + break; + } + case Instruction::BitCast: { + // Vector->vector casts only. + const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); + if (!VTy) break; + unsigned InVWidth = VTy->getNumElements(); + APInt InputDemandedElts(InVWidth, 0); + unsigned Ratio; + + if (VWidth == InVWidth) { + // If we are converting from <4 x i32> -> <4 x f32>, we demand the same + // elements as are demanded of us. + Ratio = 1; + InputDemandedElts = DemandedElts; + } else if (VWidth > InVWidth) { + // Untested so far. + break; + + // If there are more elements in the result than there are in the source, + // then an input element is live if any of the corresponding output + // elements are live. + Ratio = VWidth/InVWidth; + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { + if (DemandedElts[OutIdx]) + InputDemandedElts.set(OutIdx/Ratio); + } + } else { + // Untested so far. + break; + + // If there are more elements in the source than there are in the result, + // then an input element is live if the corresponding output element is + // live. + Ratio = InVWidth/VWidth; + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if (DemandedElts[InIdx/Ratio]) + InputDemandedElts.set(InIdx); + } + + // div/rem demand all inputs, because they don't want divide by zero. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, + UndefElts2, Depth+1); + if (TmpV) { + I->setOperand(0, TmpV); + MadeChange = true; + } + + UndefElts = UndefElts2; + if (VWidth > InVWidth) { + llvm_unreachable("Unimp"); + // If there are more elements in the result than there are in the source, + // then an output element is undef if the corresponding input element is + // undef. + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) + if (UndefElts2[OutIdx/Ratio]) + UndefElts.set(OutIdx); + } else if (VWidth < InVWidth) { + llvm_unreachable("Unimp"); + // If there are more elements in the source than there are in the result, + // then a result element is undef if all of the corresponding input + // elements are undef. + UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. + for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) + if (!UndefElts2[InIdx]) // Not undef? + UndefElts.clear(InIdx/Ratio); // Clear undef bit. + } + break; + } + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // div/rem demand all inputs, because they don't want divide by zero. + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + // Output elements are undefined if both are undefined. Consider things + // like undef&0. The result is known zero, not undef. + UndefElts &= UndefElts2; + break; + + case Instruction::Call: { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); + if (!II) break; + switch (II->getIntrinsicID()) { + default: break; + + // Binary vector operations that work column-wise. A dest element is a + // function of the corresponding input elements from the two inputs. + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse_min_ss: + case Intrinsic::x86_sse_max_ss: + case Intrinsic::x86_sse2_sub_sd: + case Intrinsic::x86_sse2_mul_sd: + case Intrinsic::x86_sse2_min_sd: + case Intrinsic::x86_sse2_max_sd: + TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts, + UndefElts2, Depth+1); + if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; } + + // If only the low elt is demanded and this is a scalarizable intrinsic, + // scalarize it now. + if (DemandedElts == 1) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse2_sub_sd: + case Intrinsic::x86_sse2_mul_sd: + // TODO: Lower MIN/MAX/ABS/etc + Value *LHS = II->getOperand(1); + Value *RHS = II->getOperand(2); + // Extract the element as scalars. + LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, + ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); + RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, + ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); + + switch (II->getIntrinsicID()) { + default: llvm_unreachable("Case stmts out of sync!"); + case Intrinsic::x86_sse_sub_ss: + case Intrinsic::x86_sse2_sub_sd: + TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS, + II->getName()), *II); + break; + case Intrinsic::x86_sse_mul_ss: + case Intrinsic::x86_sse2_mul_sd: + TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS, + II->getName()), *II); + break; + } + + Instruction *New = + InsertElementInst::Create( + UndefValue::get(II->getType()), TmpV, + ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false), + II->getName()); + InsertNewInstBefore(New, *II); + return New; + } + } + + // Output elements are undefined if both are undefined. Consider things + // like undef&0. The result is known zero, not undef. + UndefElts &= UndefElts2; + break; + } + break; + } + } + return MadeChange ? I : 0; +} diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp new file mode 100644 index 0000000..f11f557 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -0,0 +1,560 @@ +//===- InstCombineVectorOps.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements instcombine for ExtractElement, InsertElement and +// ShuffleVector. +// +//===----------------------------------------------------------------------===// + +#include "InstCombine.h" +using namespace llvm; + +/// CheapToScalarize - Return true if the value is cheaper to scalarize than it +/// is to leave as a vector operation. +static bool CheapToScalarize(Value *V, bool isConstant) { + if (isa<ConstantAggregateZero>(V)) + return true; + if (ConstantVector *C = dyn_cast<ConstantVector>(V)) { + if (isConstant) return true; + // If all elts are the same, we can extract. + Constant *Op0 = C->getOperand(0); + for (unsigned i = 1; i < C->getNumOperands(); ++i) + if (C->getOperand(i) != Op0) + return false; + return true; + } + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // Insert element gets simplified to the inserted element or is deleted if + // this is constant idx extract element and its a constant idx insertelt. + if (I->getOpcode() == Instruction::InsertElement && isConstant && + isa<ConstantInt>(I->getOperand(2))) + return true; + if (I->getOpcode() == Instruction::Load && I->hasOneUse()) + return true; + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) + if (BO->hasOneUse() && + (CheapToScalarize(BO->getOperand(0), isConstant) || + CheapToScalarize(BO->getOperand(1), isConstant))) + return true; + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + if (CI->hasOneUse() && + (CheapToScalarize(CI->getOperand(0), isConstant) || + CheapToScalarize(CI->getOperand(1), isConstant))) + return true; + + return false; +} + +/// Read and decode a shufflevector mask. +/// +/// It turns undef elements into values that are larger than the number of +/// elements in the input. +static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) { + unsigned NElts = SVI->getType()->getNumElements(); + if (isa<ConstantAggregateZero>(SVI->getOperand(2))) + return std::vector<unsigned>(NElts, 0); + if (isa<UndefValue>(SVI->getOperand(2))) + return std::vector<unsigned>(NElts, 2*NElts); + + std::vector<unsigned> Result; + const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2)); + for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) + if (isa<UndefValue>(*i)) + Result.push_back(NElts*2); // undef -> 8 + else + Result.push_back(cast<ConstantInt>(*i)->getZExtValue()); + return Result; +} + +/// FindScalarElement - Given a vector and an element number, see if the scalar +/// value is already around as a register, for example if it were inserted then +/// extracted from the vector. +static Value *FindScalarElement(Value *V, unsigned EltNo) { + assert(isa<VectorType>(V->getType()) && "Not looking at a vector?"); + const VectorType *PTy = cast<VectorType>(V->getType()); + unsigned Width = PTy->getNumElements(); + if (EltNo >= Width) // Out of range access. + return UndefValue::get(PTy->getElementType()); + + if (isa<UndefValue>(V)) + return UndefValue::get(PTy->getElementType()); + if (isa<ConstantAggregateZero>(V)) + return Constant::getNullValue(PTy->getElementType()); + if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) + return CP->getOperand(EltNo); + + if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { + // If this is an insert to a variable element, we don't know what it is. + if (!isa<ConstantInt>(III->getOperand(2))) + return 0; + unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); + + // If this is an insert to the element we are looking for, return the + // inserted value. + if (EltNo == IIElt) + return III->getOperand(1); + + // Otherwise, the insertelement doesn't modify the value, recurse on its + // vector input. + return FindScalarElement(III->getOperand(0), EltNo); + } + + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { + unsigned LHSWidth = + cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); + unsigned InEl = getShuffleMask(SVI)[EltNo]; + if (InEl < LHSWidth) + return FindScalarElement(SVI->getOperand(0), InEl); + else if (InEl < LHSWidth*2) + return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); + else + return UndefValue::get(PTy->getElementType()); + } + + // Otherwise, we don't know. + return 0; +} + +Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { + // If vector val is undef, replace extract with scalar undef. + if (isa<UndefValue>(EI.getOperand(0))) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + + // If vector val is constant 0, replace extract with scalar 0. + if (isa<ConstantAggregateZero>(EI.getOperand(0))) + return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); + + if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) { + // If vector val is constant with all elements the same, replace EI with + // that element. When the elements are not identical, we cannot replace yet + // (we do that below, but only when the index is constant). + Constant *op0 = C->getOperand(0); + for (unsigned i = 1; i != C->getNumOperands(); ++i) + if (C->getOperand(i) != op0) { + op0 = 0; + break; + } + if (op0) + return ReplaceInstUsesWith(EI, op0); + } + + // If extracting a specified index from the vector, see if we can recursively + // find a previously computed scalar that was inserted into the vector. + if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { + unsigned IndexVal = IdxC->getZExtValue(); + unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); + + // If this is extracting an invalid index, turn this into undef, to avoid + // crashing the code below. + if (IndexVal >= VectorWidth) + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + + // This instruction only demands the single element from the input vector. + // If the input vector has a single use, simplify it based on this use + // property. + if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { + APInt UndefElts(VectorWidth, 0); + APInt DemandedMask(VectorWidth, 1 << IndexVal); + if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), + DemandedMask, UndefElts)) { + EI.setOperand(0, V); + return &EI; + } + } + + if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) + return ReplaceInstUsesWith(EI, Elt); + + // If the this extractelement is directly using a bitcast from a vector of + // the same number of elements, see if we can find the source element from + // it. In this case, we will end up needing to bitcast the scalars. + if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { + if (const VectorType *VT = + dyn_cast<VectorType>(BCI->getOperand(0)->getType())) + if (VT->getNumElements() == VectorWidth) + if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) + return new BitCastInst(Elt, EI.getType()); + } + } + + if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { + // Push extractelement into predecessor operation if legal and + // profitable to do so + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + if (I->hasOneUse() && + CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { + Value *newEI0 = + Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); + Value *newEI1 = + Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); + return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); + } + } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { + // Extracting the inserted element? + if (IE->getOperand(2) == EI.getOperand(1)) + return ReplaceInstUsesWith(EI, IE->getOperand(1)); + // If the inserted and extracted elements are constants, they must not + // be the same value, extract from the pre-inserted value instead. + if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { + Worklist.AddValue(EI.getOperand(0)); + EI.setOperand(0, IE->getOperand(0)); + return &EI; + } + } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { + // If this is extracting an element from a shufflevector, figure out where + // it came from and extract from the appropriate input element instead. + if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { + unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; + Value *Src; + unsigned LHSWidth = + cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); + + if (SrcIdx < LHSWidth) + Src = SVI->getOperand(0); + else if (SrcIdx < LHSWidth*2) { + SrcIdx -= LHSWidth; + Src = SVI->getOperand(1); + } else { + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + } + return ExtractElementInst::Create(Src, + ConstantInt::get(Type::getInt32Ty(EI.getContext()), + SrcIdx, false)); + } + } + // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) + } + return 0; +} + +/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns +/// elements from either LHS or RHS, return the shuffle mask and true. +/// Otherwise, return false. +static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, + std::vector<Constant*> &Mask) { + assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && + "Invalid CollectSingleShuffleElements"); + unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); + + if (isa<UndefValue>(V)) { + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); + return true; + } + + if (V == LHS) { + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); + return true; + } + + if (V == RHS) { + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), + i+NumElts)); + return true; + } + + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { + // If this is an insert of an extract from some other vector, include it. + Value *VecOp = IEI->getOperand(0); + Value *ScalarOp = IEI->getOperand(1); + Value *IdxOp = IEI->getOperand(2); + + if (!isa<ConstantInt>(IdxOp)) + return false; + unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); + + if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. + // Okay, we can handle this if the vector we are insertinting into is + // transitively ok. + if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + // If so, update the mask to reflect the inserted undef. + Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); + return true; + } + } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ + if (isa<ConstantInt>(EI->getOperand(1)) && + EI->getOperand(0)->getType() == V->getType()) { + unsigned ExtractedIdx = + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + + // This must be extracting from either LHS or RHS. + if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { + // Okay, we can handle this if the vector we are insertinting into is + // transitively ok. + if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + // If so, update the mask to reflect the inserted value. + if (EI->getOperand(0) == LHS) { + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + ExtractedIdx); + } else { + assert(EI->getOperand(0) == RHS); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + ExtractedIdx+NumElts); + + } + return true; + } + } + } + } + } + // TODO: Handle shufflevector here! + + return false; +} + +/// CollectShuffleElements - We are building a shuffle of V, using RHS as the +/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask +/// that computes V and the LHS value of the shuffle. +static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, + Value *&RHS) { + assert(isa<VectorType>(V->getType()) && + (RHS == 0 || V->getType() == RHS->getType()) && + "Invalid shuffle!"); + unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); + + if (isa<UndefValue>(V)) { + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); + return V; + } else if (isa<ConstantAggregateZero>(V)) { + Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); + return V; + } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { + // If this is an insert of an extract from some other vector, include it. + Value *VecOp = IEI->getOperand(0); + Value *ScalarOp = IEI->getOperand(1); + Value *IdxOp = IEI->getOperand(2); + + if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { + if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && + EI->getOperand(0)->getType() == V->getType()) { + unsigned ExtractedIdx = + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); + + // Either the extracted from or inserted into vector must be RHSVec, + // otherwise we'd end up with a shuffle of three inputs. + if (EI->getOperand(0) == RHS || RHS == 0) { + RHS = EI->getOperand(0); + Value *V = CollectShuffleElements(VecOp, Mask, RHS); + Mask[InsertedIdx % NumElts] = + ConstantInt::get(Type::getInt32Ty(V->getContext()), + NumElts+ExtractedIdx); + return V; + } + + if (VecOp == RHS) { + Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); + // Everything but the extracted element is replaced with the RHS. + for (unsigned i = 0; i != NumElts; ++i) { + if (i != InsertedIdx) + Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), + NumElts+i); + } + return V; + } + + // If this insertelement is a chain that comes from exactly these two + // vectors, return the vector and the effective shuffle. + if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask)) + return EI->getOperand(0); + } + } + } + // TODO: Handle shufflevector here! + + // Otherwise, can't do anything fancy. Return an identity vector. + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); + return V; +} + +Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { + Value *VecOp = IE.getOperand(0); + Value *ScalarOp = IE.getOperand(1); + Value *IdxOp = IE.getOperand(2); + + // Inserting an undef or into an undefined place, remove this. + if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) + ReplaceInstUsesWith(IE, VecOp); + + // If the inserted element was extracted from some other vector, and if the + // indexes are constant, try to turn this into a shufflevector operation. + if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { + if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && + EI->getOperand(0)->getType() == IE.getType()) { + unsigned NumVectorElts = IE.getType()->getNumElements(); + unsigned ExtractedIdx = + cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); + unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); + + if (ExtractedIdx >= NumVectorElts) // Out of range extract. + return ReplaceInstUsesWith(IE, VecOp); + + if (InsertedIdx >= NumVectorElts) // Out of range insert. + return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); + + // If we are extracting a value from a vector, then inserting it right + // back into the same place, just use the input vector. + if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) + return ReplaceInstUsesWith(IE, VecOp); + + // If this insertelement isn't used by some other insertelement, turn it + // (and any insertelements it points to), into one big shuffle. + if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { + std::vector<Constant*> Mask; + Value *RHS = 0; + Value *LHS = CollectShuffleElements(&IE, Mask, RHS); + if (RHS == 0) RHS = UndefValue::get(LHS->getType()); + // We now have a shuffle of LHS, RHS, Mask. + return new ShuffleVectorInst(LHS, RHS, + ConstantVector::get(Mask)); + } + } + } + + unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) + return &IE; + + return 0; +} + + +Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { + Value *LHS = SVI.getOperand(0); + Value *RHS = SVI.getOperand(1); + std::vector<unsigned> Mask = getShuffleMask(&SVI); + + bool MadeChange = false; + + // Undefined shuffle mask -> undefined value. + if (isa<UndefValue>(SVI.getOperand(2))) + return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); + + unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); + + if (VWidth != cast<VectorType>(LHS->getType())->getNumElements()) + return 0; + + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { + LHS = SVI.getOperand(0); + RHS = SVI.getOperand(1); + MadeChange = true; + } + + // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') + // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). + if (LHS == RHS || isa<UndefValue>(LHS)) { + if (isa<UndefValue>(LHS) && LHS == RHS) { + // shuffle(undef,undef,mask) -> undef. + return ReplaceInstUsesWith(SVI, LHS); + } + + // Remap any references to RHS to use LHS. + std::vector<Constant*> Elts; + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] >= 2*e) + Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); + else { + if ((Mask[i] >= e && isa<UndefValue>(RHS)) || + (Mask[i] < e && isa<UndefValue>(LHS))) { + Mask[i] = 2*e; // Turn into undef. + Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); + } else { + Mask[i] = Mask[i] % e; // Force to LHS. + Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), + Mask[i])); + } + } + } + SVI.setOperand(0, SVI.getOperand(1)); + SVI.setOperand(1, UndefValue::get(RHS->getType())); + SVI.setOperand(2, ConstantVector::get(Elts)); + LHS = SVI.getOperand(0); + RHS = SVI.getOperand(1); + MadeChange = true; + } + + // Analyze the shuffle, are the LHS or RHS and identity shuffles? + bool isLHSID = true, isRHSID = true; + + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] >= e*2) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == i); + + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); + } + + // Eliminate identity shuffles. + if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); + if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); + + // If the LHS is a shufflevector itself, see if we can combine it with this + // one without producing an unusual shuffle. Here we are really conservative: + // we are absolutely afraid of producing a shuffle mask not in the input + // program, because the code gen may not be smart enough to turn a merged + // shuffle into two specific shuffles: it may produce worse code. As such, + // we only merge two shuffles if the result is one of the two input shuffle + // masks. In this case, merging the shuffles just removes one instruction, + // which we know is safe. This is good for things like turning: + // (splat(splat)) -> splat. + if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) { + if (isa<UndefValue>(RHS)) { + std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI); + + if (LHSMask.size() == Mask.size()) { + std::vector<unsigned> NewMask; + for (unsigned i = 0, e = Mask.size(); i != e; ++i) + if (Mask[i] >= e) + NewMask.push_back(2*e); + else + NewMask.push_back(LHSMask[Mask[i]]); + + // If the result mask is equal to the src shuffle or this + // shuffle mask, do the replacement. + if (NewMask == LHSMask || NewMask == Mask) { + unsigned LHSInNElts = + cast<VectorType>(LHSSVI->getOperand(0)->getType())-> + getNumElements(); + std::vector<Constant*> Elts; + for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { + if (NewMask[i] >= LHSInNElts*2) { + Elts.push_back(UndefValue::get( + Type::getInt32Ty(SVI.getContext()))); + } else { + Elts.push_back(ConstantInt::get( + Type::getInt32Ty(SVI.getContext()), + NewMask[i])); + } + } + return new ShuffleVectorInst(LHSSVI->getOperand(0), + LHSSVI->getOperand(1), + ConstantVector::get(Elts)); + } + } + } + } + + return MadeChange ? &SVI : 0; +} + diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h new file mode 100644 index 0000000..9d88621 --- /dev/null +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -0,0 +1,105 @@ +//===- InstCombineWorklist.h - Worklist for the InstCombine pass ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTCOMBINE_WORKLIST_H +#define INSTCOMBINE_WORKLIST_H + +#define DEBUG_TYPE "instcombine" +#include "llvm/Instruction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// InstCombineWorklist - This is the worklist management logic for +/// InstCombine. +class VISIBILITY_HIDDEN InstCombineWorklist { + SmallVector<Instruction*, 256> Worklist; + DenseMap<Instruction*, unsigned> WorklistMap; + + void operator=(const InstCombineWorklist&RHS); // DO NOT IMPLEMENT + InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT +public: + InstCombineWorklist() {} + + bool isEmpty() const { return Worklist.empty(); } + + /// Add - Add the specified instruction to the worklist if it isn't already + /// in it. + void Add(Instruction *I) { + if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) { + DEBUG(errs() << "IC: ADD: " << *I << '\n'); + Worklist.push_back(I); + } + } + + void AddValue(Value *V) { + if (Instruction *I = dyn_cast<Instruction>(V)) + Add(I); + } + + /// AddInitialGroup - Add the specified batch of stuff in reverse order. + /// which should only be done when the worklist is empty and when the group + /// has no duplicates. + void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { + assert(Worklist.empty() && "Worklist must be empty to add initial group"); + Worklist.reserve(NumEntries+16); + DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); + for (; NumEntries; --NumEntries) { + Instruction *I = List[NumEntries-1]; + WorklistMap.insert(std::make_pair(I, Worklist.size())); + Worklist.push_back(I); + } + } + + // Remove - remove I from the worklist if it exists. + void Remove(Instruction *I) { + DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I); + if (It == WorklistMap.end()) return; // Not in worklist. + + // Don't bother moving everything down, just null out the slot. + Worklist[It->second] = 0; + + WorklistMap.erase(It); + } + + Instruction *RemoveOne() { + Instruction *I = Worklist.back(); + Worklist.pop_back(); + WorklistMap.erase(I); + return I; + } + + /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// the instruction to the work lists because they might get more simplified + /// now. + /// + void AddUsersToWorkList(Instruction &I) { + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); + UI != UE; ++UI) + Add(cast<Instruction>(*UI)); + } + + + /// Zap - check that the worklist is empty and nuke the backing store for + /// the map if it is large. + void Zap() { + assert(WorklistMap.empty() && "Worklist empty, but map not?"); + + // Do an explicit clear, this shrinks the map if needed. + WorklistMap.clear(); + } +}; + +} // end namespace llvm. + +#endif diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp new file mode 100644 index 0000000..93b1961 --- /dev/null +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -0,0 +1,1274 @@ +//===- InstructionCombining.cpp - Combine multiple instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InstructionCombining - Combine instructions to form fewer, simple +// instructions. This pass does not modify the CFG. This pass is where +// algebraic simplification happens. +// +// This pass combines things like: +// %Y = add i32 %X, 1 +// %Z = add i32 %Y, 1 +// into: +// %Z = add i32 %X, 2 +// +// This is a simple worklist driven algorithm. +// +// This pass guarantees that the following canonicalizations are performed on +// the program: +// 1. If a binary operator has a constant operand, it is moved to the RHS +// 2. Bitwise operators with constant operands are always grouped so that +// shifts are performed first, then or's, then and's, then xor's. +// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible +// 4. All cmp instructions on boolean values are replaced with logical ops +// 5. add X, X is represented as (X*2) => (X << 1) +// 6. Multiplies with a power-of-two constant argument are transformed into +// shifts. +// ... etc. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instcombine" +#include "llvm/Transforms/Scalar.h" +#include "InstCombine.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include <algorithm> +#include <climits> +using namespace llvm; +using namespace llvm::PatternMatch; + +STATISTIC(NumCombined , "Number of insts combined"); +STATISTIC(NumConstProp, "Number of constant folds"); +STATISTIC(NumDeadInst , "Number of dead inst eliminated"); +STATISTIC(NumSunkInst , "Number of instructions sunk"); + + +char InstCombiner::ID = 0; +static RegisterPass<InstCombiner> +X("instcombine", "Combine redundant instructions"); + +void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreservedID(LCSSAID); + AU.setPreservesCFG(); +} + + +/// ShouldChangeType - Return true if it is desirable to convert a computation +/// from 'From' to 'To'. We don't want to convert from a legal to an illegal +/// type for example, or from a smaller to a larger illegal type. +bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const { + assert(isa<IntegerType>(From) && isa<IntegerType>(To)); + + // If we don't have TD, we don't know if the source/dest are legal. + if (!TD) return false; + + unsigned FromWidth = From->getPrimitiveSizeInBits(); + unsigned ToWidth = To->getPrimitiveSizeInBits(); + bool FromLegal = TD->isLegalInteger(FromWidth); + bool ToLegal = TD->isLegalInteger(ToWidth); + + // If this is a legal integer from type, and the result would be an illegal + // type, don't do the transformation. + if (FromLegal && !ToLegal) + return false; + + // Otherwise, if both are illegal, do not increase the size of the result. We + // do allow things like i160 -> i64, but not i64 -> i160. + if (!FromLegal && !ToLegal && ToWidth > FromWidth) + return false; + + return true; +} + + +// SimplifyCommutative - This performs a few simplifications for commutative +// operators: +// +// 1. Order operands such that they are listed from right (least complex) to +// left (most complex). This puts constants before unary operators before +// binary operators. +// +// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) +// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) +// +bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { + bool Changed = false; + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) + Changed = !I.swapOperands(); + + if (!I.isAssociative()) return Changed; + + Instruction::BinaryOps Opcode = I.getOpcode(); + if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0))) + if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) { + if (isa<Constant>(I.getOperand(1))) { + Constant *Folded = ConstantExpr::get(I.getOpcode(), + cast<Constant>(I.getOperand(1)), + cast<Constant>(Op->getOperand(1))); + I.setOperand(0, Op->getOperand(0)); + I.setOperand(1, Folded); + return true; + } + + if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1))) + if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) && + Op->hasOneUse() && Op1->hasOneUse()) { + Constant *C1 = cast<Constant>(Op->getOperand(1)); + Constant *C2 = cast<Constant>(Op1->getOperand(1)); + + // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) + Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); + Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), + Op1->getOperand(0), + Op1->getName(), &I); + Worklist.Add(New); + I.setOperand(0, New); + I.setOperand(1, Folded); + return true; + } + } + return Changed; +} + +// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction +// if the LHS is a constant zero (which is the 'negate' form). +// +Value *InstCombiner::dyn_castNegVal(Value *V) const { + if (BinaryOperator::isNeg(V)) + return BinaryOperator::getNegArgument(V); + + // Constants can be considered to be negated values if they can be folded. + if (ConstantInt *C = dyn_cast<ConstantInt>(V)) + return ConstantExpr::getNeg(C); + + if (ConstantVector *C = dyn_cast<ConstantVector>(V)) + if (C->getType()->getElementType()->isInteger()) + return ConstantExpr::getNeg(C); + + return 0; +} + +// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the +// instruction if the LHS is a constant negative zero (which is the 'negate' +// form). +// +Value *InstCombiner::dyn_castFNegVal(Value *V) const { + if (BinaryOperator::isFNeg(V)) + return BinaryOperator::getFNegArgument(V); + + // Constants can be considered to be negated values if they can be folded. + if (ConstantFP *C = dyn_cast<ConstantFP>(V)) + return ConstantExpr::getFNeg(C); + + if (ConstantVector *C = dyn_cast<ConstantVector>(V)) + if (C->getType()->getElementType()->isFloatingPoint()) + return ConstantExpr::getFNeg(C); + + return 0; +} + +static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, + InstCombiner *IC) { + if (CastInst *CI = dyn_cast<CastInst>(&I)) + return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); + + // Figure out if the constant is the left or the right argument. + bool ConstIsRHS = isa<Constant>(I.getOperand(1)); + Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS)); + + if (Constant *SOC = dyn_cast<Constant>(SO)) { + if (ConstIsRHS) + return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); + return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); + } + + Value *Op0 = SO, *Op1 = ConstOperand; + if (!ConstIsRHS) + std::swap(Op0, Op1); + + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) + return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, + SO->getName()+".op"); + if (ICmpInst *CI = dyn_cast<ICmpInst>(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + if (FCmpInst *CI = dyn_cast<FCmpInst>(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + llvm_unreachable("Unknown binary instruction type!"); +} + +// FoldOpIntoSelect - Given an instruction with a select as one operand and a +// constant as the other operand, try to fold the binary operator into the +// select arguments. This also works for Cast instructions, which obviously do +// not have a second operand. +Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { + // Don't modify shared select instructions + if (!SI->hasOneUse()) return 0; + Value *TV = SI->getOperand(1); + Value *FV = SI->getOperand(2); + + if (isa<Constant>(TV) || isa<Constant>(FV)) { + // Bool selects with constant operands can be folded to logical ops. + if (SI->getType()->isInteger(1)) return 0; + + Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); + Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); + + return SelectInst::Create(SI->getCondition(), SelectTrueVal, + SelectFalseVal); + } + return 0; +} + + +/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which +/// has a PHI node as operand #0, see if we can fold the instruction into the +/// PHI (which is only possible if all operands to the PHI are constants). +/// +/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms +/// that would normally be unprofitable because they strongly encourage jump +/// threading. +Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, + bool AllowAggressive) { + AllowAggressive = false; + PHINode *PN = cast<PHINode>(I.getOperand(0)); + unsigned NumPHIValues = PN->getNumIncomingValues(); + if (NumPHIValues == 0 || + // We normally only transform phis with a single use, unless we're trying + // hard to make jump threading happen. + (!PN->hasOneUse() && !AllowAggressive)) + return 0; + + + // Check to see if all of the operands of the PHI are simple constants + // (constantint/constantfp/undef). If there is one non-constant value, + // remember the BB it is in. If there is more than one or if *it* is a PHI, + // bail out. We don't do arbitrary constant expressions here because moving + // their computation can be expensive without a cost model. + BasicBlock *NonConstBB = 0; + for (unsigned i = 0; i != NumPHIValues; ++i) + if (!isa<Constant>(PN->getIncomingValue(i)) || + isa<ConstantExpr>(PN->getIncomingValue(i))) { + if (NonConstBB) return 0; // More than one non-const value. + if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi. + NonConstBB = PN->getIncomingBlock(i); + + // If the incoming non-constant value is in I's block, we have an infinite + // loop. + if (NonConstBB == I.getParent()) + return 0; + } + + // If there is exactly one non-constant value, we can insert a copy of the + // operation in that block. However, if this is a critical edge, we would be + // inserting the computation one some other paths (e.g. inside a loop). Only + // do this if the pred block is unconditionally branching into the phi block. + if (NonConstBB != 0 && !AllowAggressive) { + BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); + if (!BI || !BI->isUnconditional()) return 0; + } + + // Okay, we can do the transformation: create the new PHI node. + PHINode *NewPN = PHINode::Create(I.getType(), ""); + NewPN->reserveOperandSpace(PN->getNumOperands()/2); + InsertNewInstBefore(NewPN, *PN); + NewPN->takeName(PN); + + // Next, add all of the operands to the PHI. + if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { + // We only currently try to fold the condition of a select when it is a phi, + // not the true/false values. + Value *TrueV = SI->getTrueValue(); + Value *FalseV = SI->getFalseValue(); + BasicBlock *PhiTransBB = PN->getParent(); + for (unsigned i = 0; i != NumPHIValues; ++i) { + BasicBlock *ThisBB = PN->getIncomingBlock(i); + Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); + Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); + Value *InV = 0; + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { + InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, + FalseVInPred, + "phitmp", NonConstBB->getTerminator()); + Worklist.Add(cast<Instruction>(InV)); + } + NewPN->addIncoming(InV, ThisBB); + } + } else if (I.getNumOperands() == 2) { + Constant *C = cast<Constant>(I.getOperand(1)); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV = 0; + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { + if (CmpInst *CI = dyn_cast<CmpInst>(&I)) + InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); + else + InV = ConstantExpr::get(I.getOpcode(), InC, C); + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) + InV = BinaryOperator::Create(BO->getOpcode(), + PN->getIncomingValue(i), C, "phitmp", + NonConstBB->getTerminator()); + else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) + InV = CmpInst::Create(CI->getOpcode(), + CI->getPredicate(), + PN->getIncomingValue(i), C, "phitmp", + NonConstBB->getTerminator()); + else + llvm_unreachable("Unknown binop!"); + + Worklist.Add(cast<Instruction>(InV)); + } + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } + } else { + CastInst *CI = cast<CastInst>(&I); + const Type *RetTy = CI->getType(); + for (unsigned i = 0; i != NumPHIValues; ++i) { + Value *InV; + if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { + InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), + I.getType(), "phitmp", + NonConstBB->getTerminator()); + Worklist.Add(cast<Instruction>(InV)); + } + NewPN->addIncoming(InV, PN->getIncomingBlock(i)); + } + } + return ReplaceInstUsesWith(I, NewPN); +} + +/// FindElementAtOffset - Given a type and a constant offset, determine whether +/// or not there is a sequence of GEP indices into the type that will land us at +/// the specified offset. If so, fill them into NewIndices and return the +/// resultant element type, otherwise return null. +const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, + SmallVectorImpl<Value*> &NewIndices) { + if (!TD) return 0; + if (!Ty->isSized()) return 0; + + // Start with the index over the outer type. Note that the type size + // might be zero (even if the offset isn't zero) if the indexed type + // is something like [0 x {int, int}] + const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + int64_t FirstIdx = 0; + if (int64_t TySize = TD->getTypeAllocSize(Ty)) { + FirstIdx = Offset/TySize; + Offset -= FirstIdx*TySize; + + // Handle hosts where % returns negative instead of values [0..TySize). + if (Offset < 0) { + --FirstIdx; + Offset += TySize; + assert(Offset >= 0); + } + assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); + } + + NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); + + // Index into the types. If we fail, set OrigBase to null. + while (Offset) { + // Indexing into tail padding between struct/array elements. + if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) + return 0; + + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = TD->getStructLayout(STy); + assert(Offset < (int64_t)SL->getSizeInBytes() && + "Offset must stay within the indexed type"); + + unsigned Elt = SL->getElementContainingOffset(Offset); + NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), + Elt)); + + Offset -= SL->getElementOffset(Elt); + Ty = STy->getElementType(Elt); + } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); + assert(EltSize && "Cannot index into a zero-sized array"); + NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); + Offset %= EltSize; + Ty = AT->getElementType(); + } else { + // Otherwise, we can't index into the middle of this atomic type, bail. + return 0; + } + } + + return Ty; +} + + + +Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { + SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); + + if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) + return ReplaceInstUsesWith(GEP, V); + + Value *PtrOp = GEP.getOperand(0); + + if (isa<UndefValue>(GEP.getOperand(0))) + return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); + + // Eliminate unneeded casts for indices. + if (TD) { + bool MadeChange = false; + unsigned PtrSize = TD->getPointerSizeInBits(); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); + I != E; ++I, ++GTI) { + if (!isa<SequentialType>(*GTI)) continue; + + // If we are using a wider index than needed for this platform, shrink it + // to what we need. If narrower, sign-extend it to what we need. This + // explicit cast can make subsequent optimizations more obvious. + unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth(); + if (OpBits == PtrSize) + continue; + + *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); + MadeChange = true; + } + if (MadeChange) return &GEP; + } + + // Combine Indices - If the source pointer to this getelementptr instruction + // is a getelementptr instruction, combine the indices of the two + // getelementptr instructions into a single instruction. + // + if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { + // Note that if our source is a gep chain itself that we wait for that + // chain to be resolved before we perform this transformation. This + // avoids us creating a TON of code in some cases. + // + if (GetElementPtrInst *SrcGEP = + dyn_cast<GetElementPtrInst>(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2) + return 0; // Wait until our source is folded to completion. + + SmallVector<Value*, 8> Indices; + + // Find out whether the last index in the source GEP is a sequential idx. + bool EndsWithSequential = false; + for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); + I != E; ++I) + EndsWithSequential = !isa<StructType>(*I); + + // Can we combine the two pointer arithmetics offsets? + if (EndsWithSequential) { + // Replace: gep (gep %P, long B), long A, ... + // With: T = long A+B; gep %P, T, ... + // + Value *Sum; + Value *SO1 = Src->getOperand(Src->getNumOperands()-1); + Value *GO1 = GEP.getOperand(1); + if (SO1 == Constant::getNullValue(SO1->getType())) { + Sum = GO1; + } else if (GO1 == Constant::getNullValue(GO1->getType())) { + Sum = SO1; + } else { + // If they aren't the same type, then the input hasn't been processed + // by the loop above yet (which canonicalizes sequential index types to + // intptr_t). Just avoid transforming this until the input has been + // normalized. + if (SO1->getType() != GO1->getType()) + return 0; + Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); + } + + // Update the GEP in place if possible. + if (Src->getNumOperands() == 2) { + GEP.setOperand(0, Src->getOperand(0)); + GEP.setOperand(1, Sum); + return &GEP; + } + Indices.append(Src->op_begin()+1, Src->op_end()-1); + Indices.push_back(Sum); + Indices.append(GEP.op_begin()+2, GEP.op_end()); + } else if (isa<Constant>(*GEP.idx_begin()) && + cast<Constant>(*GEP.idx_begin())->isNullValue() && + Src->getNumOperands() != 1) { + // Otherwise we can do the fold if the first index of the GEP is a zero + Indices.append(Src->op_begin()+1, Src->op_end()); + Indices.append(GEP.idx_begin()+1, GEP.idx_end()); + } + + if (!Indices.empty()) + return (GEP.isInBounds() && Src->isInBounds()) ? + GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), + Indices.end(), GEP.getName()) : + GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), + Indices.end(), GEP.getName()); + } + + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). + Value *StrippedPtr = PtrOp->stripPointerCasts(); + if (StrippedPtr != PtrOp) { + const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + + bool HasZeroPointerIndex = false; + if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) + HasZeroPointerIndex = C->isZero(); + + // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... + // into : GEP [10 x i8]* X, i32 0, ... + // + // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... + // into : GEP i8* X, ... + // + // This occurs when the program declares an array extern like "int X[];" + if (HasZeroPointerIndex) { + const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); + if (const ArrayType *CATy = + dyn_cast<ArrayType>(CPTy->getElementType())) { + // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? + if (CATy->getElementType() == StrippedPtrTy->getElementType()) { + // -> GEP i8* X, ... + SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end()); + GetElementPtrInst *Res = + GetElementPtrInst::Create(StrippedPtr, Idx.begin(), + Idx.end(), GEP.getName()); + Res->setIsInBounds(GEP.isInBounds()); + return Res; + } + + if (const ArrayType *XATy = + dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){ + // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? + if (CATy->getElementType() == XATy->getElementType()) { + // -> GEP [10 x i8]* X, i32 0, ... + // At this point, we know that the cast source type is a pointer + // to an array of the same type as the destination pointer + // array. Because the array type is never stepped over (there + // is a leading zero) we can fold the cast into this GEP. + GEP.setOperand(0, StrippedPtr); + return &GEP; + } + } + } + } else if (GEP.getNumOperands() == 2) { + // Transform things like: + // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V + // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast + const Type *SrcElTy = StrippedPtrTy->getElementType(); + const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); + if (TD && isa<ArrayType>(SrcElTy) && + TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == + TD->getTypeAllocSize(ResElTy)) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); + Idx[1] = GEP.getOperand(1); + Value *NewGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()); + // V and GEP are both pointer types --> BitCast + return new BitCastInst(NewGEP, GEP.getType()); + } + + // Transform things like: + // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp + // (where tmp = 8*tmp2) into: + // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast + + if (TD && isa<ArrayType>(SrcElTy) && ResElTy->isInteger(8)) { + uint64_t ArrayEltSize = + TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); + + // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We + // allow either a mul, shift, or constant here. + Value *NewIdx = 0; + ConstantInt *Scale = 0; + if (ArrayEltSize == 1) { + NewIdx = GEP.getOperand(1); + Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); + } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) { + NewIdx = ConstantInt::get(CI->getType(), 1); + Scale = CI; + } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){ + if (Inst->getOpcode() == Instruction::Shl && + isa<ConstantInt>(Inst->getOperand(1))) { + ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); + uint32_t ShAmtVal = ShAmt->getLimitedValue(64); + Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), + 1ULL << ShAmtVal); + NewIdx = Inst->getOperand(0); + } else if (Inst->getOpcode() == Instruction::Mul && + isa<ConstantInt>(Inst->getOperand(1))) { + Scale = cast<ConstantInt>(Inst->getOperand(1)); + NewIdx = Inst->getOperand(0); + } + } + + // If the index will be to exactly the right offset with the scale taken + // out, perform the transformation. Note, we don't know whether Scale is + // signed or not. We'll use unsigned version of division/modulo + // operation after making sure Scale doesn't have the sign bit set. + if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && + Scale->getZExtValue() % ArrayEltSize == 0) { + Scale = ConstantInt::get(Scale->getType(), + Scale->getZExtValue() / ArrayEltSize); + if (Scale->getZExtValue() != 1) { + Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), + false /*ZExt*/); + NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); + } + + // Insert the new GEP instruction. + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); + Idx[1] = NewIdx; + Value *NewGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()): + Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()); + // The NewGEP must be pointer typed, so must the old one -> BitCast + return new BitCastInst(NewGEP, GEP.getType()); + } + } + } + } + + /// See if we can simplify: + /// X = bitcast A* to B* + /// Y = gep X, <...constant indices...> + /// into a gep of the original struct. This is important for SROA and alias + /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. + if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { + if (TD && + !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { + // Determine how much the GEP moves the pointer. We are guaranteed to get + // a constant back from EmitGEPOffset. + ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP)); + int64_t Offset = OffsetV->getSExtValue(); + + // If this GEP instruction doesn't move the pointer, just replace the GEP + // with a bitcast of the real input to the dest type. + if (Offset == 0) { + // If the bitcast is of an allocation, and the allocation will be + // converted to match the type of the cast, don't touch this. + if (isa<AllocaInst>(BCI->getOperand(0)) || + isMalloc(BCI->getOperand(0))) { + // See if the bitcast simplifies, if so, don't nuke this GEP yet. + if (Instruction *I = visitBitCast(*BCI)) { + if (I != BCI) { + I->takeName(BCI); + BCI->getParent()->getInstList().insert(BCI, I); + ReplaceInstUsesWith(*BCI, I); + } + return &GEP; + } + } + return new BitCastInst(BCI->getOperand(0), GEP.getType()); + } + + // Otherwise, if the offset is non-zero, we need to find out if there is a + // field at Offset in 'A's type. If so, we can pull the cast through the + // GEP. + SmallVector<Value*, 8> NewIndices; + const Type *InTy = + cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); + if (FindElementAtOffset(InTy, Offset, NewIndices)) { + Value *NGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), + NewIndices.end()) : + Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), + NewIndices.end()); + + if (NGEP->getType() == GEP.getType()) + return ReplaceInstUsesWith(GEP, NGEP); + NGEP->takeName(&GEP); + return new BitCastInst(NGEP, GEP.getType()); + } + } + } + + return 0; +} + +Instruction *InstCombiner::visitFree(Instruction &FI) { + Value *Op = FI.getOperand(1); + + // free undef -> unreachable. + if (isa<UndefValue>(Op)) { + // Insert a new store to null because we cannot modify the CFG here. + new StoreInst(ConstantInt::getTrue(FI.getContext()), + UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI); + return EraseInstFromFunction(FI); + } + + // If we have 'free null' delete the instruction. This can happen in stl code + // when lots of inlining happens. + if (isa<ConstantPointerNull>(Op)) + return EraseInstFromFunction(FI); + + // If we have a malloc call whose only use is a free call, delete both. + if (isMalloc(Op)) { + if (CallInst* CI = extractMallocCallFromBitCast(Op)) { + if (Op->hasOneUse() && CI->hasOneUse()) { + EraseInstFromFunction(FI); + EraseInstFromFunction(*CI); + return EraseInstFromFunction(*cast<Instruction>(Op)); + } + } else { + // Op is a call to malloc + if (Op->hasOneUse()) { + EraseInstFromFunction(FI); + return EraseInstFromFunction(*cast<Instruction>(Op)); + } + } + } + + return 0; +} + + + +Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { + // Change br (not X), label True, label False to: br X, label False, True + Value *X = 0; + BasicBlock *TrueDest; + BasicBlock *FalseDest; + if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && + !isa<Constant>(X)) { + // Swap Destinations and condition... + BI.setCondition(X); + BI.setSuccessor(0, FalseDest); + BI.setSuccessor(1, TrueDest); + return &BI; + } + + // Cannonicalize fcmp_one -> fcmp_oeq + FCmpInst::Predicate FPred; Value *Y; + if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), + TrueDest, FalseDest)) && + BI.getCondition()->hasOneUse()) + if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || + FPred == FCmpInst::FCMP_OGE) { + FCmpInst *Cond = cast<FCmpInst>(BI.getCondition()); + Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); + + // Swap Destinations and condition. + BI.setSuccessor(0, FalseDest); + BI.setSuccessor(1, TrueDest); + Worklist.Add(Cond); + return &BI; + } + + // Cannonicalize icmp_ne -> icmp_eq + ICmpInst::Predicate IPred; + if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), + TrueDest, FalseDest)) && + BI.getCondition()->hasOneUse()) + if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || + IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || + IPred == ICmpInst::ICMP_SGE) { + ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); + Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); + // Swap Destinations and condition. + BI.setSuccessor(0, FalseDest); + BI.setSuccessor(1, TrueDest); + Worklist.Add(Cond); + return &BI; + } + + return 0; +} + +Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { + Value *Cond = SI.getCondition(); + if (Instruction *I = dyn_cast<Instruction>(Cond)) { + if (I->getOpcode() == Instruction::Add) + if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { + // change 'switch (X+4) case 1:' into 'switch (X) case -3' + for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) + SI.setOperand(i, + ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)), + AddRHS)); + SI.setOperand(0, I->getOperand(0)); + Worklist.Add(I); + return &SI; + } + } + return 0; +} + +Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { + Value *Agg = EV.getAggregateOperand(); + + if (!EV.hasIndices()) + return ReplaceInstUsesWith(EV, Agg); + + if (Constant *C = dyn_cast<Constant>(Agg)) { + if (isa<UndefValue>(C)) + return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); + + if (isa<ConstantAggregateZero>(C)) + return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); + + if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) { + // Extract the element indexed by the first index out of the constant + Value *V = C->getOperand(*EV.idx_begin()); + if (EV.getNumIndices() > 1) + // Extract the remaining indices out of the constant indexed by the + // first index + return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); + else + return ReplaceInstUsesWith(EV, V); + } + return 0; // Can't handle other constants + } + if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { + // We're extracting from an insertvalue instruction, compare the indices + const unsigned *exti, *exte, *insi, *inse; + for (exti = EV.idx_begin(), insi = IV->idx_begin(), + exte = EV.idx_end(), inse = IV->idx_end(); + exti != exte && insi != inse; + ++exti, ++insi) { + if (*insi != *exti) + // The insert and extract both reference distinctly different elements. + // This means the extract is not influenced by the insert, and we can + // replace the aggregate operand of the extract with the aggregate + // operand of the insert. i.e., replace + // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 + // %E = extractvalue { i32, { i32 } } %I, 0 + // with + // %E = extractvalue { i32, { i32 } } %A, 0 + return ExtractValueInst::Create(IV->getAggregateOperand(), + EV.idx_begin(), EV.idx_end()); + } + if (exti == exte && insi == inse) + // Both iterators are at the end: Index lists are identical. Replace + // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 + // %C = extractvalue { i32, { i32 } } %B, 1, 0 + // with "i32 42" + return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand()); + if (exti == exte) { + // The extract list is a prefix of the insert list. i.e. replace + // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 + // %E = extractvalue { i32, { i32 } } %I, 1 + // with + // %X = extractvalue { i32, { i32 } } %A, 1 + // %E = insertvalue { i32 } %X, i32 42, 0 + // by switching the order of the insert and extract (though the + // insertvalue should be left in, since it may have other uses). + Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), + EV.idx_begin(), EV.idx_end()); + return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), + insi, inse); + } + if (insi == inse) + // The insert list is a prefix of the extract list + // We can simply remove the common indices from the extract and make it + // operate on the inserted value instead of the insertvalue result. + // i.e., replace + // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 + // %E = extractvalue { i32, { i32 } } %I, 1, 0 + // with + // %E extractvalue { i32 } { i32 42 }, 0 + return ExtractValueInst::Create(IV->getInsertedValueOperand(), + exti, exte); + } + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { + // We're extracting from an intrinsic, see if we're the only user, which + // allows us to simplify multiple result intrinsics to simpler things that + // just get one value.. + if (II->hasOneUse()) { + // Check if we're grabbing the overflow bit or the result of a 'with + // overflow' intrinsic. If it's the latter we can remove the intrinsic + // and replace it with a traditional binary instruction. + switch (II->getIntrinsicID()) { + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateAdd(LHS, RHS); + } + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateSub(LHS, RHS); + } + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateMul(LHS, RHS); + } + break; + default: + break; + } + } + } + // Can't simplify extracts from other values. Note that nested extracts are + // already simplified implicitely by the above (extract ( extract (insert) ) + // will be translated into extract ( insert ( extract ) ) first and then just + // the value inserted, if appropriate). + return 0; +} + + + + +/// TryToSinkInstruction - Try to move the specified instruction from its +/// current block into the beginning of DestBlock, which can only happen if it's +/// safe to move the instruction past all of the instructions between it and the +/// end of its block. +static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { + assert(I->hasOneUse() && "Invariants didn't hold!"); + + // Cannot move control-flow-involving, volatile loads, vaarg, etc. + if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I)) + return false; + + // Do not sink alloca instructions out of the entry block. + if (isa<AllocaInst>(I) && I->getParent() == + &DestBlock->getParent()->getEntryBlock()) + return false; + + // We can only sink load instructions if there is nothing between the load and + // the end of block that could change the value. + if (I->mayReadFromMemory()) { + for (BasicBlock::iterator Scan = I, E = I->getParent()->end(); + Scan != E; ++Scan) + if (Scan->mayWriteToMemory()) + return false; + } + + BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); + + I->moveBefore(InsertPos); + ++NumSunkInst; + return true; +} + + +/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding +/// all reachable code to the worklist. +/// +/// This has a couple of tricks to make the code faster and more powerful. In +/// particular, we constant fold and DCE instructions as we go, to avoid adding +/// them to the worklist (this significantly speeds up instcombine on code where +/// many instructions are dead or constant). Additionally, if we find a branch +/// whose condition is a known constant, we only visit the reachable successors. +/// +static bool AddReachableCodeToWorklist(BasicBlock *BB, + SmallPtrSet<BasicBlock*, 64> &Visited, + InstCombiner &IC, + const TargetData *TD) { + bool MadeIRChange = false; + SmallVector<BasicBlock*, 256> Worklist; + Worklist.push_back(BB); + + std::vector<Instruction*> InstrsForInstCombineWorklist; + InstrsForInstCombineWorklist.reserve(128); + + SmallPtrSet<ConstantExpr*, 64> FoldedConstants; + + do { + BB = Worklist.pop_back_val(); + + // We have now visited this block! If we've already been here, ignore it. + if (!Visited.insert(BB)) continue; + + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { + Instruction *Inst = BBI++; + + // DCE instruction if trivially dead. + if (isInstructionTriviallyDead(Inst)) { + ++NumDeadInst; + DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); + Inst->eraseFromParent(); + continue; + } + + // ConstantProp instruction if trivially constant. + if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) + if (Constant *C = ConstantFoldInstruction(Inst, TD)) { + DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " + << *Inst << '\n'); + Inst->replaceAllUsesWith(C); + ++NumConstProp; + Inst->eraseFromParent(); + continue; + } + + if (TD) { + // See if we can constant fold its operands. + for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); + i != e; ++i) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(i); + if (CE == 0) continue; + + // If we already folded this constant, don't try again. + if (!FoldedConstants.insert(CE)) + continue; + + Constant *NewC = ConstantFoldConstantExpression(CE, TD); + if (NewC && NewC != CE) { + *i = NewC; + MadeIRChange = true; + } + } + } + + InstrsForInstCombineWorklist.push_back(Inst); + } + + // Recursively visit successors. If this is a branch or switch on a + // constant, only visit the reachable successor. + TerminatorInst *TI = BB->getTerminator(); + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) { + bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue(); + BasicBlock *ReachableBB = BI->getSuccessor(!CondVal); + Worklist.push_back(ReachableBB); + continue; + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) { + // See if this is an explicit destination. + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) + if (SI->getCaseValue(i) == Cond) { + BasicBlock *ReachableBB = SI->getSuccessor(i); + Worklist.push_back(ReachableBB); + continue; + } + + // Otherwise it is the default destination. + Worklist.push_back(SI->getSuccessor(0)); + continue; + } + } + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + Worklist.push_back(TI->getSuccessor(i)); + } while (!Worklist.empty()); + + // Once we've found all of the instructions to add to instcombine's worklist, + // add them in reverse order. This way instcombine will visit from the top + // of the function down. This jives well with the way that it adds all uses + // of instructions to the worklist after doing a transformation, thus avoiding + // some N^2 behavior in pathological cases. + IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], + InstrsForInstCombineWorklist.size()); + + return MadeIRChange; +} + +bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { + MadeIRChange = false; + + DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " + << F.getNameStr() << "\n"); + + { + // Do a depth-first traversal of the function, populate the worklist with + // the reachable instructions. Ignore blocks that are not reachable. Keep + // track of which blocks we visit. + SmallPtrSet<BasicBlock*, 64> Visited; + MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); + + // Do a quick scan over the function. If we find any blocks that are + // unreachable, remove any instructions inside of them. This prevents + // the instcombine code from having to deal with some bad special cases. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (!Visited.count(BB)) { + Instruction *Term = BB->getTerminator(); + while (Term != BB->begin()) { // Remove instrs bottom-up + BasicBlock::iterator I = Term; --I; + + DEBUG(errs() << "IC: DCE: " << *I << '\n'); + // A debug intrinsic shouldn't force another iteration if we weren't + // going to do one without it. + if (!isa<DbgInfoIntrinsic>(I)) { + ++NumDeadInst; + MadeIRChange = true; + } + + // If I is not void type then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!I->getType()->isVoidTy()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } + } + } + + while (!Worklist.isEmpty()) { + Instruction *I = Worklist.RemoveOne(); + if (I == 0) continue; // skip null values. + + // Check to see if we can DCE the instruction. + if (isInstructionTriviallyDead(I)) { + DEBUG(errs() << "IC: DCE: " << *I << '\n'); + EraseInstFromFunction(*I); + ++NumDeadInst; + MadeIRChange = true; + continue; + } + + // Instruction isn't dead, see if we can constant propagate it. + if (!I->use_empty() && isa<Constant>(I->getOperand(0))) + if (Constant *C = ConstantFoldInstruction(I, TD)) { + DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); + + // Add operands to the worklist. + ReplaceInstUsesWith(*I, C); + ++NumConstProp; + EraseInstFromFunction(*I); + MadeIRChange = true; + continue; + } + + // See if we can trivially sink this instruction to a successor basic block. + if (I->hasOneUse()) { + BasicBlock *BB = I->getParent(); + Instruction *UserInst = cast<Instruction>(I->use_back()); + BasicBlock *UserParent; + + // Get the block the use occurs in. + if (PHINode *PN = dyn_cast<PHINode>(UserInst)) + UserParent = PN->getIncomingBlock(I->use_begin().getUse()); + else + UserParent = UserInst->getParent(); + + if (UserParent != BB) { + bool UserIsSuccessor = false; + // See if the user is one of our successors. + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) + if (*SI == UserParent) { + UserIsSuccessor = true; + break; + } + + // If the user is one of our immediate successors, and if that successor + // only has us as a predecessors (we'd have to split the critical edge + // otherwise), we can keep going. + if (UserIsSuccessor && UserParent->getSinglePredecessor()) + // Okay, the CFG is simple enough, try to sink this instruction. + MadeIRChange |= TryToSinkInstruction(I, UserParent); + } + } + + // Now that we have an instruction, try combining it to simplify it. + Builder->SetInsertPoint(I->getParent(), I); + +#ifndef NDEBUG + std::string OrigI; +#endif + DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); + DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); + + if (Instruction *Result = visit(*I)) { + ++NumCombined; + // Should we replace the old instruction with a new one? + if (Result != I) { + DEBUG(errs() << "IC: Old = " << *I << '\n' + << " New = " << *Result << '\n'); + + // Everything uses the new instruction now. + I->replaceAllUsesWith(Result); + + // Push the new instruction and any users onto the worklist. + Worklist.Add(Result); + Worklist.AddUsersToWorkList(*Result); + + // Move the name to the new instruction first. + Result->takeName(I); + + // Insert the new instruction into the basic block... + BasicBlock *InstParent = I->getParent(); + BasicBlock::iterator InsertPos = I; + + if (!isa<PHINode>(Result)) // If combining a PHI, don't insert + while (isa<PHINode>(InsertPos)) // middle of a block of PHIs. + ++InsertPos; + + InstParent->getInstList().insert(InsertPos, Result); + + EraseInstFromFunction(*I); + } else { +#ifndef NDEBUG + DEBUG(errs() << "IC: Mod = " << OrigI << '\n' + << " New = " << *I << '\n'); +#endif + + // If the instruction was modified, it's possible that it is now dead. + // if so, remove it. + if (isInstructionTriviallyDead(I)) { + EraseInstFromFunction(*I); + } else { + Worklist.Add(I); + Worklist.AddUsersToWorkList(*I); + } + } + MadeIRChange = true; + } + } + + Worklist.Zap(); + return MadeIRChange; +} + + +bool InstCombiner::runOnFunction(Function &F) { + MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + TD = getAnalysisIfAvailable<TargetData>(); + + + /// Builder - This is an IRBuilder that automatically inserts new + /// instructions into the worklist when they are created. + IRBuilder<true, TargetFolder, InstCombineIRInserter> + TheBuilder(F.getContext(), TargetFolder(TD), + InstCombineIRInserter(Worklist)); + Builder = &TheBuilder; + + bool EverMadeChange = false; + + // Iterate while there is work to do. + unsigned Iteration = 0; + while (DoOneIteration(F, Iteration++)) + EverMadeChange = true; + + Builder = 0; + return EverMadeChange; +} + +FunctionPass *llvm::createInstructionCombiningPass() { + return new InstCombiner(); +} diff --git a/lib/Transforms/InstCombine/Makefile b/lib/Transforms/InstCombine/Makefile new file mode 100644 index 0000000..0c488e78 --- /dev/null +++ b/lib/Transforms/InstCombine/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/InstCombine/Makefile -----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMInstCombine +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/Instrumentation/BlockProfiling.cpp b/lib/Transforms/Instrumentation/BlockProfiling.cpp deleted file mode 100644 index 211a6d6..0000000 --- a/lib/Transforms/Instrumentation/BlockProfiling.cpp +++ /dev/null @@ -1,128 +0,0 @@ -//===- BlockProfiling.cpp - Insert counters for block profiling -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass instruments the specified program with counters for basic block or -// function profiling. This is the most basic form of profiling, which can tell -// which blocks are hot, but cannot reliably detect hot paths through the CFG. -// Block profiling counts the number of times each basic block executes, and -// function profiling counts the number of times each function is called. -// -// Note that this implementation is very naive. Control equivalent regions of -// the CFG should not require duplicate counters, but we do put duplicate -// counters in. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Instrumentation.h" -#include "RSProfiling.h" -#include "ProfilingUtils.h" -using namespace llvm; - -namespace { - class FunctionProfiler : public RSProfilers_std { - public: - static char ID; - bool runOnModule(Module &M); - }; -} - -char FunctionProfiler::ID = 0; - -static RegisterPass<FunctionProfiler> -X("insert-function-profiling", - "Insert instrumentation for function profiling"); -static RegisterAnalysisGroup<RSProfilers> XG(X); - -ModulePass *llvm::createFunctionProfilerPass() { - return new FunctionProfiler(); -} - -bool FunctionProfiler::runOnModule(Module &M) { - Function *Main = M.getFunction("main"); - if (Main == 0) { - errs() << "WARNING: cannot insert function profiling into a module" - << " with no main function!\n"; - return false; // No main, no instrumentation! - } - - unsigned NumFunctions = 0; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration()) - ++NumFunctions; - - const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), - NumFunctions); - GlobalVariable *Counters = - new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(ATy), "FuncProfCounters"); - - // Instrument all of the functions... - unsigned i = 0; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration()) - // Insert counter at the start of the function - IncrementCounterInBlock(&I->getEntryBlock(), i++, Counters); - - // Add the initialization call to main. - InsertProfilingInitCall(Main, "llvm_start_func_profiling", Counters); - return true; -} - - -namespace { - class BlockProfiler : public RSProfilers_std { - bool runOnModule(Module &M); - public: - static char ID; - }; -} - -char BlockProfiler::ID = 0; -static RegisterPass<BlockProfiler> -Y("insert-block-profiling", "Insert instrumentation for block profiling"); -static RegisterAnalysisGroup<RSProfilers> YG(Y); - -ModulePass *llvm::createBlockProfilerPass() { return new BlockProfiler(); } - -bool BlockProfiler::runOnModule(Module &M) { - Function *Main = M.getFunction("main"); - if (Main == 0) { - errs() << "WARNING: cannot insert block profiling into a module" - << " with no main function!\n"; - return false; // No main, no instrumentation! - } - - unsigned NumBlocks = 0; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration()) - NumBlocks += I->size(); - - const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumBlocks); - GlobalVariable *Counters = - new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(ATy), "BlockProfCounters"); - - // Instrument all of the blocks... - unsigned i = 0; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (I->isDeclaration()) continue; - for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB) - // Insert counter at the start of the block - IncrementCounterInBlock(BB, i++, Counters); - } - - // Add the initialization call to main. - InsertProfilingInitCall(Main, "llvm_start_block_profiling", Counters); - return true; -} - diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 494928e..128bf48 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,7 +1,5 @@ add_llvm_library(LLVMInstrumentation - BlockProfiling.cpp EdgeProfiling.cpp OptimalEdgeProfiling.cpp ProfilingUtils.cpp - RSProfiling.cpp ) diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 0a46fe5..94b0671 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -61,7 +61,7 @@ ModulePass *llvm::createOptimalEdgeProfilerPass() { inline static void printEdgeCounter(ProfileInfo::Edge e, BasicBlock* b, unsigned i) { - DEBUG(errs() << "--Edge Counter for " << (e) << " in " \ + DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \ << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n"); } @@ -120,7 +120,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { unsigned i = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n"); + DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); // Calculate a Maximum Spanning Tree with the edge weights determined by // ProfileEstimator. ProfileEstimator also assign weights to the virtual diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 1679bea..3214c8c 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -84,7 +84,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, AI = MainFn->arg_begin(); // If the program looked at argc, have it look at the return value of the // init call instead. - if (AI->getType() != Type::getInt32Ty(Context)) { + if (!AI->getType()->isInteger(32)) { Instruction::CastOps opcode; if (!AI->use_empty()) { opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true); diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp deleted file mode 100644 index c08efc1..0000000 --- a/lib/Transforms/Instrumentation/RSProfiling.cpp +++ /dev/null @@ -1,662 +0,0 @@ -//===- RSProfiling.cpp - Various profiling using random sampling ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// These passes implement a random sampling based profiling. Different methods -// of choosing when to sample are supported, as well as different types of -// profiling. This is done as two passes. The first is a sequence of profiling -// passes which insert profiling into the program, and remember what they -// inserted. -// -// The second stage duplicates all instructions in a function, ignoring the -// profiling code, then connects the two versions togeather at the entry and at -// backedges. At each connection point a choice is made as to whether to jump -// to the profiled code (take a sample) or execute the unprofiled code. -// -// It is highly recommended that after this pass one runs mem2reg and adce -// (instcombine load-vn gdce dse also are good to run afterwards) -// -// This design is intended to make the profiling passes independent of the RS -// framework, but any profiling pass that implements the RSProfiling interface -// is compatible with the rs framework (and thus can be sampled) -// -// TODO: obviously the block and function profiling are almost identical to the -// existing ones, so they can be unified (esp since these passes are valid -// without the rs framework). -// TODO: Fix choice code so that frequency is not hard coded -// -//===----------------------------------------------------------------------===// - -#include "llvm/Pass.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Intrinsics.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Instrumentation.h" -#include "RSProfiling.h" -#include <set> -#include <map> -#include <queue> -using namespace llvm; - -namespace { - enum RandomMeth { - GBV, GBVO, HOSTCC - }; -} - -static cl::opt<RandomMeth> RandomMethod("profile-randomness", - cl::desc("How to randomly choose to profile:"), - cl::values( - clEnumValN(GBV, "global", "global counter"), - clEnumValN(GBVO, "ra_global", - "register allocated global counter"), - clEnumValN(HOSTCC, "rdcc", "cycle counter"), - clEnumValEnd)); - -namespace { - /// NullProfilerRS - The basic profiler that does nothing. It is the default - /// profiler and thus terminates RSProfiler chains. It is useful for - /// measuring framework overhead - class NullProfilerRS : public RSProfilers { - public: - static char ID; // Pass identification, replacement for typeid - bool isProfiling(Value* v) { - return false; - } - bool runOnModule(Module &M) { - return false; - } - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - }; -} - -static RegisterAnalysisGroup<RSProfilers> A("Profiling passes"); -static RegisterPass<NullProfilerRS> NP("insert-null-profiling-rs", - "Measure profiling framework overhead"); -static RegisterAnalysisGroup<RSProfilers, true> NPT(NP); - -namespace { - /// Chooser - Something that chooses when to make a sample of the profiled code - class Chooser { - public: - /// ProcessChoicePoint - is called for each basic block inserted to choose - /// between normal and sample code - virtual void ProcessChoicePoint(BasicBlock*) = 0; - /// PrepFunction - is called once per function before other work is done. - /// This gives the opertunity to insert new allocas and such. - virtual void PrepFunction(Function*) = 0; - virtual ~Chooser() {} - }; - - //Things that implement sampling policies - //A global value that is read-mod-stored to choose when to sample. - //A sample is taken when the global counter hits 0 - class GlobalRandomCounter : public Chooser { - GlobalVariable* Counter; - Value* ResetValue; - const IntegerType* T; - public: - GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval); - virtual ~GlobalRandomCounter(); - virtual void PrepFunction(Function* F); - virtual void ProcessChoicePoint(BasicBlock* bb); - }; - - //Same is GRC, but allow register allocation of the global counter - class GlobalRandomCounterOpt : public Chooser { - GlobalVariable* Counter; - Value* ResetValue; - AllocaInst* AI; - const IntegerType* T; - public: - GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval); - virtual ~GlobalRandomCounterOpt(); - virtual void PrepFunction(Function* F); - virtual void ProcessChoicePoint(BasicBlock* bb); - }; - - //Use the cycle counter intrinsic as a source of pseudo randomness when - //deciding when to sample. - class CycleCounter : public Chooser { - uint64_t rm; - Constant *F; - public: - CycleCounter(Module& m, uint64_t resetmask); - virtual ~CycleCounter(); - virtual void PrepFunction(Function* F); - virtual void ProcessChoicePoint(BasicBlock* bb); - }; - - /// ProfilerRS - Insert the random sampling framework - struct ProfilerRS : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - ProfilerRS() : FunctionPass(&ID) {} - - std::map<Value*, Value*> TransCache; - std::set<BasicBlock*> ChoicePoints; - Chooser* c; - - //Translate and duplicate values for the new profile free version of stuff - Value* Translate(Value* v); - //Duplicate an entire function (with out profiling) - void Duplicate(Function& F, RSProfilers& LI); - //Called once for each backedge, handle the insertion of choice points and - //the interconection of the two versions of the code - void ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F); - bool runOnFunction(Function& F); - bool doInitialization(Module &M); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - }; -} - -static RegisterPass<ProfilerRS> -X("insert-rs-profiling-framework", - "Insert random sampling instrumentation framework"); - -char RSProfilers::ID = 0; -char NullProfilerRS::ID = 0; -char ProfilerRS::ID = 0; - -//Local utilities -static void ReplacePhiPred(BasicBlock* btarget, - BasicBlock* bold, BasicBlock* bnew); - -static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc); - -template<class T> -static void recBackEdge(BasicBlock* bb, T& BackEdges, - std::map<BasicBlock*, int>& color, - std::map<BasicBlock*, int>& depth, - std::map<BasicBlock*, int>& finish, - int& time); - -//find the back edges and where they go to -template<class T> -static void getBackEdges(Function& F, T& BackEdges); - - -/////////////////////////////////////// -// Methods of choosing when to profile -/////////////////////////////////////// - -GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t, - uint64_t resetval) : T(t) { - ConstantInt* Init = ConstantInt::get(T, resetval); - ResetValue = Init; - Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, - Init, "RandomSteeringCounter"); -} - -GlobalRandomCounter::~GlobalRandomCounter() {} - -void GlobalRandomCounter::PrepFunction(Function* F) {} - -void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) { - BranchInst* t = cast<BranchInst>(bb->getTerminator()); - - //decrement counter - LoadInst* l = new LoadInst(Counter, "counter", t); - - ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, - ConstantInt::get(T, 0), - "countercc"); - - Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), - "counternew", t); - new StoreInst(nv, Counter, t); - t->setCondition(s); - - //reset counter - BasicBlock* oldnext = t->getSuccessor(0); - BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), - "reset", oldnext->getParent(), - oldnext); - TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); - t->setSuccessor(0, resetblock); - new StoreInst(ResetValue, Counter, t2); - ReplacePhiPred(oldnext, bb, resetblock); -} - -GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t, - uint64_t resetval) - : AI(0), T(t) { - ConstantInt* Init = ConstantInt::get(T, resetval); - ResetValue = Init; - Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, - Init, "RandomSteeringCounter"); -} - -GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {} - -void GlobalRandomCounterOpt::PrepFunction(Function* F) { - //make a local temporary to cache the global - BasicBlock& bb = F->getEntryBlock(); - BasicBlock::iterator InsertPt = bb.begin(); - AI = new AllocaInst(T, 0, "localcounter", InsertPt); - LoadInst* l = new LoadInst(Counter, "counterload", InsertPt); - new StoreInst(l, AI, InsertPt); - - //modify all functions and return values to restore the local variable to/from - //the global variable - for(Function::iterator fib = F->begin(), fie = F->end(); - fib != fie; ++fib) - for(BasicBlock::iterator bib = fib->begin(), bie = fib->end(); - bib != bie; ++bib) - if (isa<CallInst>(bib)) { - LoadInst* l = new LoadInst(AI, "counter", bib); - new StoreInst(l, Counter, bib); - l = new LoadInst(Counter, "counter", ++bib); - new StoreInst(l, AI, bib--); - } else if (isa<InvokeInst>(bib)) { - LoadInst* l = new LoadInst(AI, "counter", bib); - new StoreInst(l, Counter, bib); - - BasicBlock* bb = cast<InvokeInst>(bib)->getNormalDest(); - BasicBlock::iterator i = bb->getFirstNonPHI(); - l = new LoadInst(Counter, "counter", i); - - bb = cast<InvokeInst>(bib)->getUnwindDest(); - i = bb->getFirstNonPHI(); - l = new LoadInst(Counter, "counter", i); - new StoreInst(l, AI, i); - } else if (isa<UnwindInst>(&*bib) || isa<ReturnInst>(&*bib)) { - LoadInst* l = new LoadInst(AI, "counter", bib); - new StoreInst(l, Counter, bib); - } -} - -void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) { - BranchInst* t = cast<BranchInst>(bb->getTerminator()); - - //decrement counter - LoadInst* l = new LoadInst(AI, "counter", t); - - ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, - ConstantInt::get(T, 0), - "countercc"); - - Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), - "counternew", t); - new StoreInst(nv, AI, t); - t->setCondition(s); - - //reset counter - BasicBlock* oldnext = t->getSuccessor(0); - BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), - "reset", oldnext->getParent(), - oldnext); - TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); - t->setSuccessor(0, resetblock); - new StoreInst(ResetValue, AI, t2); - ReplacePhiPred(oldnext, bb, resetblock); -} - - -CycleCounter::CycleCounter(Module& m, uint64_t resetmask) : rm(resetmask) { - F = Intrinsic::getDeclaration(&m, Intrinsic::readcyclecounter); -} - -CycleCounter::~CycleCounter() {} - -void CycleCounter::PrepFunction(Function* F) {} - -void CycleCounter::ProcessChoicePoint(BasicBlock* bb) { - BranchInst* t = cast<BranchInst>(bb->getTerminator()); - - CallInst* c = CallInst::Create(F, "rdcc", t); - BinaryOperator* b = - BinaryOperator::CreateAnd(c, - ConstantInt::get(Type::getInt64Ty(bb->getContext()), rm), - "mrdcc", t); - - ICmpInst *s = new ICmpInst(t, ICmpInst::ICMP_EQ, b, - ConstantInt::get(Type::getInt64Ty(bb->getContext()), 0), - "mrdccc"); - - t->setCondition(s); -} - -/////////////////////////////////////// -// Profiling: -/////////////////////////////////////// -bool RSProfilers_std::isProfiling(Value* v) { - if (profcode.find(v) != profcode.end()) - return true; - //else - RSProfilers& LI = getAnalysis<RSProfilers>(); - return LI.isProfiling(v); -} - -void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, - GlobalValue *CounterArray) { - // Insert the increment after any alloca or PHI instructions... - BasicBlock::iterator InsertPos = BB->getFirstNonPHI(); - while (isa<AllocaInst>(InsertPos)) - ++InsertPos; - - // Create the getelementptr constant expression - std::vector<Constant*> Indices(2); - Indices[0] = Constant::getNullValue(Type::getInt32Ty(BB->getContext())); - Indices[1] = ConstantInt::get(Type::getInt32Ty(BB->getContext()), CounterNum); - Constant *ElementPtr =ConstantExpr::getGetElementPtr(CounterArray, - &Indices[0], 2); - - // Load, increment and store the value back. - Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos); - profcode.insert(OldVal); - Value *NewVal = BinaryOperator::CreateAdd(OldVal, - ConstantInt::get(Type::getInt32Ty(BB->getContext()), 1), - "NewCounter", InsertPos); - profcode.insert(NewVal); - profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos)); -} - -void RSProfilers_std::getAnalysisUsage(AnalysisUsage &AU) const { - //grab any outstanding profiler, or get the null one - AU.addRequired<RSProfilers>(); -} - -/////////////////////////////////////// -// RS Framework -/////////////////////////////////////// - -Value* ProfilerRS::Translate(Value* v) { - if(TransCache[v]) - return TransCache[v]; - - if (BasicBlock* bb = dyn_cast<BasicBlock>(v)) { - if (bb == &bb->getParent()->getEntryBlock()) - TransCache[bb] = bb; //don't translate entry block - else - TransCache[bb] = BasicBlock::Create(v->getContext(), - "dup_" + bb->getName(), - bb->getParent(), NULL); - return TransCache[bb]; - } else if (Instruction* i = dyn_cast<Instruction>(v)) { - //we have already translated this - //do not translate entry block allocas - if(&i->getParent()->getParent()->getEntryBlock() == i->getParent()) { - TransCache[i] = i; - return i; - } else { - //translate this - Instruction* i2 = i->clone(); - if (i->hasName()) - i2->setName("dup_" + i->getName()); - TransCache[i] = i2; - //NumNewInst++; - for (unsigned x = 0; x < i2->getNumOperands(); ++x) - i2->setOperand(x, Translate(i2->getOperand(x))); - return i2; - } - } else if (isa<Function>(v) || isa<Constant>(v) || isa<Argument>(v)) { - TransCache[v] = v; - return v; - } - llvm_unreachable("Value not handled"); - return 0; -} - -void ProfilerRS::Duplicate(Function& F, RSProfilers& LI) -{ - //perform a breadth first search, building up a duplicate of the code - std::queue<BasicBlock*> worklist; - std::set<BasicBlock*> seen; - - //This loop ensures proper BB order, to help performance - for (Function::iterator fib = F.begin(), fie = F.end(); fib != fie; ++fib) - worklist.push(fib); - while (!worklist.empty()) { - Translate(worklist.front()); - worklist.pop(); - } - - //remember than reg2mem created a new entry block we don't want to duplicate - worklist.push(F.getEntryBlock().getTerminator()->getSuccessor(0)); - seen.insert(&F.getEntryBlock()); - - while (!worklist.empty()) { - BasicBlock* bb = worklist.front(); - worklist.pop(); - if(seen.find(bb) == seen.end()) { - BasicBlock* bbtarget = cast<BasicBlock>(Translate(bb)); - BasicBlock::InstListType& instlist = bbtarget->getInstList(); - for (BasicBlock::iterator iib = bb->begin(), iie = bb->end(); - iib != iie; ++iib) { - //NumOldInst++; - if (!LI.isProfiling(&*iib)) { - Instruction* i = cast<Instruction>(Translate(iib)); - instlist.insert(bbtarget->end(), i); - } - } - //updated search state; - seen.insert(bb); - TerminatorInst* ti = bb->getTerminator(); - for (unsigned x = 0; x < ti->getNumSuccessors(); ++x) { - BasicBlock* bbs = ti->getSuccessor(x); - if (seen.find(bbs) == seen.end()) { - worklist.push(bbs); - } - } - } - } -} - -void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) { - //given a backedge from B -> A, and translations A' and B', - //a: insert C and C' - //b: add branches in C to A and A' and in C' to A and A' - //c: mod terminators@B, replace A with C - //d: mod terminators@B', replace A' with C' - //e: mod phis@A for pred B to be pred C - // if multiple entries, simplify to one - //f: mod phis@A' for pred B' to be pred C' - // if multiple entries, simplify to one - //g: for all phis@A with pred C using x - // add in edge from C' using x' - // add in edge from C using x in A' - - //a: - Function::iterator BBN = src; ++BBN; - BasicBlock* bbC = BasicBlock::Create(F.getContext(), "choice", &F, BBN); - //ChoicePoints.insert(bbC); - BBN = cast<BasicBlock>(Translate(src)); - BasicBlock* bbCp = BasicBlock::Create(F.getContext(), "choice", &F, ++BBN); - ChoicePoints.insert(bbCp); - - //b: - BranchInst::Create(cast<BasicBlock>(Translate(dst)), bbC); - BranchInst::Create(dst, cast<BasicBlock>(Translate(dst)), - ConstantInt::get(Type::getInt1Ty(src->getContext()), true), bbCp); - //c: - { - TerminatorInst* iB = src->getTerminator(); - for (unsigned x = 0; x < iB->getNumSuccessors(); ++x) - if (iB->getSuccessor(x) == dst) - iB->setSuccessor(x, bbC); - } - //d: - { - TerminatorInst* iBp = cast<TerminatorInst>(Translate(src->getTerminator())); - for (unsigned x = 0; x < iBp->getNumSuccessors(); ++x) - if (iBp->getSuccessor(x) == cast<BasicBlock>(Translate(dst))) - iBp->setSuccessor(x, bbCp); - } - //e: - ReplacePhiPred(dst, src, bbC); - //src could be a switch, in which case we are replacing several edges with one - //thus collapse those edges int the Phi - CollapsePhi(dst, bbC); - //f: - ReplacePhiPred(cast<BasicBlock>(Translate(dst)), - cast<BasicBlock>(Translate(src)),bbCp); - CollapsePhi(cast<BasicBlock>(Translate(dst)), bbCp); - //g: - for(BasicBlock::iterator ib = dst->begin(), ie = dst->end(); ib != ie; - ++ib) - if (PHINode* phi = dyn_cast<PHINode>(&*ib)) { - for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x) - if(bbC == phi->getIncomingBlock(x)) { - phi->addIncoming(Translate(phi->getIncomingValue(x)), bbCp); - cast<PHINode>(Translate(phi))->addIncoming(phi->getIncomingValue(x), - bbC); - } - phi->removeIncomingValue(bbC); - } -} - -bool ProfilerRS::runOnFunction(Function& F) { - if (!F.isDeclaration()) { - std::set<std::pair<BasicBlock*, BasicBlock*> > BackEdges; - RSProfilers& LI = getAnalysis<RSProfilers>(); - - getBackEdges(F, BackEdges); - Duplicate(F, LI); - //assume that stuff worked. now connect the duplicated basic blocks - //with the originals in such a way as to preserve ssa. yuk! - for (std::set<std::pair<BasicBlock*, BasicBlock*> >::iterator - ib = BackEdges.begin(), ie = BackEdges.end(); ib != ie; ++ib) - ProcessBackEdge(ib->first, ib->second, F); - - //oh, and add the edge from the reg2mem created entry node to the - //duplicated second node - TerminatorInst* T = F.getEntryBlock().getTerminator(); - ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0), - cast<BasicBlock>( - Translate(T->getSuccessor(0))), - ConstantInt::get(Type::getInt1Ty(F.getContext()), true))); - - //do whatever is needed now that the function is duplicated - c->PrepFunction(&F); - - //add entry node to choice points - ChoicePoints.insert(&F.getEntryBlock()); - - for (std::set<BasicBlock*>::iterator - ii = ChoicePoints.begin(), ie = ChoicePoints.end(); ii != ie; ++ii) - c->ProcessChoicePoint(*ii); - - ChoicePoints.clear(); - TransCache.clear(); - - return true; - } - return false; -} - -bool ProfilerRS::doInitialization(Module &M) { - switch (RandomMethod) { - case GBV: - c = new GlobalRandomCounter(M, Type::getInt32Ty(M.getContext()), - (1 << 14) - 1); - break; - case GBVO: - c = new GlobalRandomCounterOpt(M, Type::getInt32Ty(M.getContext()), - (1 << 14) - 1); - break; - case HOSTCC: - c = new CycleCounter(M, (1 << 14) - 1); - break; - }; - return true; -} - -void ProfilerRS::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<RSProfilers>(); - AU.addRequiredID(DemoteRegisterToMemoryID); -} - -/////////////////////////////////////// -// Utilities: -/////////////////////////////////////// -static void ReplacePhiPred(BasicBlock* btarget, - BasicBlock* bold, BasicBlock* bnew) { - for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end(); - ib != ie; ++ib) - if (PHINode* phi = dyn_cast<PHINode>(&*ib)) { - for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x) - if(bold == phi->getIncomingBlock(x)) - phi->setIncomingBlock(x, bnew); - } -} - -static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc) { - for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end(); - ib != ie; ++ib) - if (PHINode* phi = dyn_cast<PHINode>(&*ib)) { - std::map<BasicBlock*, Value*> counter; - for(unsigned i = 0; i < phi->getNumIncomingValues(); ) { - if (counter[phi->getIncomingBlock(i)]) { - assert(phi->getIncomingValue(i) == counter[phi->getIncomingBlock(i)]); - phi->removeIncomingValue(i, false); - } else { - counter[phi->getIncomingBlock(i)] = phi->getIncomingValue(i); - ++i; - } - } - } -} - -template<class T> -static void recBackEdge(BasicBlock* bb, T& BackEdges, - std::map<BasicBlock*, int>& color, - std::map<BasicBlock*, int>& depth, - std::map<BasicBlock*, int>& finish, - int& time) -{ - color[bb] = 1; - ++time; - depth[bb] = time; - TerminatorInst* t= bb->getTerminator(); - for(unsigned i = 0; i < t->getNumSuccessors(); ++i) { - BasicBlock* bbnew = t->getSuccessor(i); - if (color[bbnew] == 0) - recBackEdge(bbnew, BackEdges, color, depth, finish, time); - else if (color[bbnew] == 1) { - BackEdges.insert(std::make_pair(bb, bbnew)); - //NumBackEdges++; - } - } - color[bb] = 2; - ++time; - finish[bb] = time; -} - - - -//find the back edges and where they go to -template<class T> -static void getBackEdges(Function& F, T& BackEdges) { - std::map<BasicBlock*, int> color; - std::map<BasicBlock*, int> depth; - std::map<BasicBlock*, int> finish; - int time = 0; - recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time); - DEBUG(errs() << F.getName() << " " << BackEdges.size() << "\n"); -} - - -//Creation functions -ModulePass* llvm::createNullProfilerRSPass() { - return new NullProfilerRS(); -} - -FunctionPass* llvm::createRSProfilingPass() { - return new ProfilerRS(); -} diff --git a/lib/Transforms/Instrumentation/RSProfiling.h b/lib/Transforms/Instrumentation/RSProfiling.h deleted file mode 100644 index 8bbe7c7..0000000 --- a/lib/Transforms/Instrumentation/RSProfiling.h +++ /dev/null @@ -1,31 +0,0 @@ -//===- RSProfiling.h - Various profiling using random sampling ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// See notes in RSProfiling.cpp -// -//===----------------------------------------------------------------------===// -#include "llvm/Transforms/RSProfiling.h" -#include <set> - -namespace llvm { - /// RSProfilers_std - a simple support class for profilers that handles most - /// of the work of chaining and tracking inserted code. - struct RSProfilers_std : public RSProfilers { - static char ID; - std::set<Value*> profcode; - // Lookup up values in profcode - virtual bool isProfiling(Value* v); - // handles required chaining - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - // places counter updates in basic blocks and recordes added instructions in - // profcode - void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, - GlobalValue *CounterArray); - }; -} diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile index 025d02a..ea4a115 100644 --- a/lib/Transforms/Makefile +++ b/lib/Transforms/Makefile @@ -8,7 +8,7 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. -PARALLEL_DIRS = Utils Instrumentation Scalar IPO Hello +PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello include $(LEVEL)/Makefile.config diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp index e58fa63..cf5e8c0 100644 --- a/lib/Transforms/Scalar/ABCD.cpp +++ b/lib/Transforms/Scalar/ABCD.cpp @@ -451,7 +451,7 @@ bool ABCD::runOnFunction(Function &F) { modified = false; createSSI(F); executeABCD(F); - DEBUG(inequality_graph.printGraph(errs(), F)); + DEBUG(inequality_graph.printGraph(dbgs(), F)); removePhis(); inequality_graph.clear(); diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index 37f383f..5a49841 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -62,8 +62,7 @@ bool ADCE::runOnFunction(Function& F) { // Propagate liveness backwards to operands. while (!worklist.empty()) { - Instruction* curr = worklist.back(); - worklist.pop_back(); + Instruction* curr = worklist.pop_back_val(); for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end(); OI != OE; ++OI) diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 5a92399..683c1c2 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -9,7 +9,6 @@ add_llvm_library(LLVMScalarOpts GEPSplitter.cpp GVN.cpp IndVarSimplify.cpp - InstructionCombining.cpp JumpThreading.cpp LICM.cpp LoopDeletion.cpp diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 372616c..9c1b440 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -237,7 +237,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); - DEBUG(errs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); + DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); // If the destination block has a single pred, then this is a trivial edge, // just collapse it. @@ -251,7 +251,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); - DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n"); + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } } @@ -294,7 +294,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { } BB->eraseFromParent(); - DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n"); + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } @@ -591,7 +591,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // If all the instructions matched are already in this BB, don't do anything. if (!AnyNonLocal) { - DEBUG(errs() << "CGP: Found local addrmode: " << AddrMode << "\n"); + DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); return false; } @@ -606,12 +606,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // computation. Value *&SunkAddr = SunkAddrs[Addr]; if (SunkAddr) { - DEBUG(errs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " + DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); if (SunkAddr->getType() != Addr->getType()) SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); } else { - DEBUG(errs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 1cfde8f..320afa1 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -52,9 +52,9 @@ namespace { bool runOnBasicBlock(BasicBlock &BB); bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep); bool handleEndBlock(BasicBlock &BB); - bool RemoveUndeadPointers(Value* Ptr, uint64_t killPointerSize, - BasicBlock::iterator& BBI, - SmallPtrSet<Value*, 64>& deadPointers); + bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize, + BasicBlock::iterator &BBI, + SmallPtrSet<Value*, 64> &deadPointers); void DeleteDeadInstruction(Instruction *I, SmallPtrSet<Value*, 64> *deadPointers = 0); @@ -70,6 +70,8 @@ namespace { AU.addPreserved<AliasAnalysis>(); AU.addPreserved<MemoryDependenceAnalysis>(); } + + unsigned getPointerSize(Value *V) const; }; } @@ -173,7 +175,7 @@ static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2, } bool DSE::runOnBasicBlock(BasicBlock &BB) { - MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); + MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); TD = getAnalysisIfAvailable<TargetData>(); bool MadeChange = false; @@ -355,7 +357,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } - Value* killPointer = 0; + Value *killPointer = 0; uint64_t killPointerSize = ~0UL; // If we encounter a use of the pointer, it is no longer considered dead @@ -371,14 +373,14 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } killPointer = L->getPointerOperand(); - } else if (VAArgInst* V = dyn_cast<VAArgInst>(BBI)) { + } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { killPointer = V->getOperand(0); } else if (isa<MemTransferInst>(BBI) && isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) { killPointer = cast<MemTransferInst>(BBI)->getSource(); killPointerSize = cast<ConstantInt>( cast<MemTransferInst>(BBI)->getLength())->getZExtValue(); - } else if (AllocaInst* A = dyn_cast<AllocaInst>(BBI)) { + } else if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) { deadPointers.erase(A); // Dead alloca's can be DCE'd when we reach them @@ -412,23 +414,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) { deadPointers.clear(); return MadeChange; } - - // Get size information for the alloca - unsigned pointerSize = ~0U; - if (TD) { - if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) { - if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize())) - pointerSize = C->getZExtValue() * - TD->getTypeAllocSize(A->getAllocatedType()); - } else { - const PointerType* PT = cast<PointerType>( - cast<Argument>(*I)->getType()); - pointerSize = TD->getTypeAllocSize(PT->getElementType()); - } - } - + // See if the call site touches it - AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I, pointerSize); + AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I, + getPointerSize(*I)); if (A == AliasAnalysis::ModRef) modRef++; @@ -469,11 +458,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) { /// RemoveUndeadPointers - check for uses of a pointer that make it /// undead when scanning for dead stores to alloca's. -bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize, +bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize, BasicBlock::iterator &BBI, - SmallPtrSet<Value*, 64>& deadPointers) { + SmallPtrSet<Value*, 64> &deadPointers) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - + // If the kill pointer can be easily reduced to an alloca, // don't bother doing extraneous AA queries. if (deadPointers.count(killPointer)) { @@ -488,32 +477,19 @@ bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize, bool MadeChange = false; SmallVector<Value*, 16> undead; - + for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(), - E = deadPointers.end(); I != E; ++I) { - // Get size information for the alloca. - unsigned pointerSize = ~0U; - if (TD) { - if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) { - if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize())) - pointerSize = C->getZExtValue() * - TD->getTypeAllocSize(A->getAllocatedType()); - } else { - const PointerType* PT = cast<PointerType>(cast<Argument>(*I)->getType()); - pointerSize = TD->getTypeAllocSize(PT->getElementType()); - } - } - + E = deadPointers.end(); I != E; ++I) { // See if this pointer could alias it - AliasAnalysis::AliasResult A = AA.alias(*I, pointerSize, + AliasAnalysis::AliasResult A = AA.alias(*I, getPointerSize(*I), killPointer, killPointerSize); // If it must-alias and a store, we can delete it if (isa<StoreInst>(BBI) && A == AliasAnalysis::MustAlias) { - StoreInst* S = cast<StoreInst>(BBI); + StoreInst *S = cast<StoreInst>(BBI); // Remove it! - BBI++; + ++BBI; DeleteDeadInstruction(S, &deadPointers); NumFastStores++; MadeChange = true; @@ -547,9 +523,8 @@ void DSE::DeleteDeadInstruction(Instruction *I, // Before we touch this instruction, remove it from memdep! MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>(); - while (!NowDeadInsts.empty()) { - Instruction *DeadInst = NowDeadInsts.back(); - NowDeadInsts.pop_back(); + do { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); ++NumFastOther; @@ -573,5 +548,20 @@ void DSE::DeleteDeadInstruction(Instruction *I, DeadInst->eraseFromParent(); if (ValueSet) ValueSet->erase(DeadInst); + } while (!NowDeadInsts.empty()); +} + +unsigned DSE::getPointerSize(Value *V) const { + if (TD) { + if (AllocaInst *A = dyn_cast<AllocaInst>(V)) { + // Get size information for the alloca + if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize())) + return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType()); + } else { + assert(isa<Argument>(V) && "Expected AllocaInst or Argument!"); + const PointerType *PT = cast<PointerType>(V->getType()); + return TD->getTypeAllocSize(PT->getElementType()); + } } + return ~0U; } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 612b415..ac0d850 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -829,7 +829,7 @@ SpeculationFailure: SmallVector<BasicBlock*, 32> BBWorklist; BBWorklist.push_back(BB); - while (!BBWorklist.empty()) { + do { BasicBlock *Entry = BBWorklist.pop_back_val(); // Note that this sets blocks to 0 (unavailable) if they happen to not // already be in FullyAvailableBlocks. This is safe. @@ -841,7 +841,7 @@ SpeculationFailure: for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) BBWorklist.push_back(*I); - } + } while (!BBWorklist.empty()); return false; } @@ -1022,7 +1022,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, // FIXME: Study to see if/when this happens. if (LoadOffset == StoreOffset) { #if 0 - errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" + dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" << "Base = " << *StoreBase << "\n" << "Store Ptr = " << *WritePtr << "\n" << "Store Offs = " << StoreOffset << "\n" @@ -1053,7 +1053,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, } if (isAAFailure) { #if 0 - errs() << "STORE LOAD DEP WITH COMMON BASE:\n" + dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" << "Base = " << *StoreBase << "\n" << "Store Ptr = " << *WritePtr << "\n" << "Store Offs = " << StoreOffset << "\n" @@ -1362,7 +1362,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, SmallVector<NonLocalDepResult, 64> Deps; MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), Deps); - //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " + //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " // << Deps.size() << *LI << '\n'); // If we had to process more than one hundred blocks to find the @@ -1375,9 +1375,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // clobber in the current block. Reject this early. if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { DEBUG( - errs() << "GVN: non-local load "; - WriteAsOperand(errs(), LI); - errs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; + dbgs() << "GVN: non-local load "; + WriteAsOperand(dbgs(), LI); + dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; ); return false; } @@ -1500,7 +1500,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // load, then it is fully redundant and we can use PHI insertion to compute // its value. Insert PHIs and remove the fully redundant value now. if (UnavailableBlocks.empty()) { - DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); + DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); // Perform PHI construction. Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, @@ -1614,7 +1614,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // We don't currently handle critical edges :( if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { - DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" + DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" << UnavailablePred->getName() << "': " << *LI << '\n'); return false; } @@ -1646,7 +1646,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // we fail PRE. if (LoadPtr == 0) { assert(NewInsts.empty() && "Shouldn't insert insts on failure"); - DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " + DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " << *LI->getOperand(0) << "\n"); return false; } @@ -1679,9 +1679,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // Okay, we can eliminate this load by inserting a reload in the predecessor // and using PHI construction to get the value in the other predecessors, do // it. - DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); + DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); DEBUG(if (!NewInsts.empty()) - errs() << "INSERTED " << NewInsts.size() << " INSTS: " + dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " << *NewInsts.back() << '\n'); Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, @@ -1752,7 +1752,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { } if (AvailVal) { - DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' + DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' << *AvailVal << '\n' << *L << "\n\n\n"); // Replace the load! @@ -1766,10 +1766,10 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { DEBUG( // fast print dep, using operator<< on instruction would be too slow - errs() << "GVN: load "; - WriteAsOperand(errs(), L); + dbgs() << "GVN: load "; + WriteAsOperand(dbgs(), L); Instruction *I = Dep.getInst(); - errs() << " is clobbered by " << *I << '\n'; + dbgs() << " is clobbered by " << *I << '\n'; ); return false; } @@ -1793,7 +1793,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { if (StoredVal == 0) return false; - DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal + DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal << '\n' << *L << "\n\n\n"); } else @@ -1822,7 +1822,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { if (AvailableVal == 0) return false; - DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal + DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal << "\n" << *L << "\n\n\n"); } else @@ -1990,7 +1990,7 @@ bool GVN::runOnFunction(Function& F) { unsigned Iteration = 0; while (ShouldContinue) { - DEBUG(errs() << "GVN iteration: " << Iteration << "\n"); + DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); ShouldContinue = iterateOnFunction(F); Changed |= ShouldContinue; ++Iteration; @@ -2038,7 +2038,7 @@ bool GVN::processBlock(BasicBlock *BB) { for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) { - DEBUG(errs() << "GVN removed: " << **I << '\n'); + DEBUG(dbgs() << "GVN removed: " << **I << '\n'); if (MD) MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); @@ -2196,7 +2196,7 @@ bool GVN::performPRE(Function &F) { MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); - DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n'); + DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); CurInst->eraseFromParent(); DEBUG(verifyRemoved(CurInst)); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 3aa4fd3..ce1307c 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -182,7 +182,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, else Opcode = ICmpInst::ICMP_EQ; - DEBUG(errs() << "INDVARS: Rewriting loop exit condition to:\n" + DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" << " LHS:" << *CmpIndVar << '\n' << " op:\t" << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" @@ -273,7 +273,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); - DEBUG(errs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' + DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); PN->setIncomingValue(i, ExitVal); @@ -401,7 +401,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumInserted; Changed = true; - DEBUG(errs() << "INDVARS: New CanIV: " << *IndVar << '\n'); + DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n'); // Now that the official induction variable is established, reinsert // the old canonical-looking variable after it so that the IR remains @@ -438,7 +438,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0))); // Clean up dead instructions. - DeleteDeadPHIs(L->getHeader()); + Changed |= DeleteDeadPHIs(L->getHeader()); // Check a post-condition. assert(L->isLCSSAForm() && "Indvars did not leave the loop in lcssa form!"); return Changed; @@ -506,7 +506,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, NewVal->takeName(Op); User->replaceUsesOfWith(Op, NewVal); UI->setOperandValToReplace(NewVal); - DEBUG(errs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' << " into = " << *NewVal << "\n"); ++NumRemoved; Changed = true; diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp deleted file mode 100644 index 516d72e..0000000 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ /dev/null @@ -1,13736 +0,0 @@ -//===- InstructionCombining.cpp - Combine multiple instructions -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// InstructionCombining - Combine instructions to form fewer, simple -// instructions. This pass does not modify the CFG. This pass is where -// algebraic simplification happens. -// -// This pass combines things like: -// %Y = add i32 %X, 1 -// %Z = add i32 %Y, 1 -// into: -// %Z = add i32 %X, 2 -// -// This is a simple worklist driven algorithm. -// -// This pass guarantees that the following canonicalizations are performed on -// the program: -// 1. If a binary operator has a constant operand, it is moved to the RHS -// 2. Bitwise operators with constant operands are always grouped so that -// shifts are performed first, then or's, then and's, then xor's. -// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible -// 4. All cmp instructions on boolean values are replaced with logical ops -// 5. add X, X is represented as (X*2) => (X << 1) -// 6. Multiplies with a power-of-two constant argument are transformed into -// shifts. -// ... etc. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "instcombine" -#include "llvm/Transforms/Scalar.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Pass.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Operator.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/ConstantRange.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/PatternMatch.h" -#include "llvm/Support/TargetFolder.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include <algorithm> -#include <climits> -using namespace llvm; -using namespace llvm::PatternMatch; - -STATISTIC(NumCombined , "Number of insts combined"); -STATISTIC(NumConstProp, "Number of constant folds"); -STATISTIC(NumDeadInst , "Number of dead inst eliminated"); -STATISTIC(NumDeadStore, "Number of dead stores eliminated"); -STATISTIC(NumSunkInst , "Number of instructions sunk"); - -/// SelectPatternFlavor - We can match a variety of different patterns for -/// select operations. -enum SelectPatternFlavor { - SPF_UNKNOWN = 0, - SPF_SMIN, SPF_UMIN, - SPF_SMAX, SPF_UMAX - //SPF_ABS - TODO. -}; - -namespace { - /// InstCombineWorklist - This is the worklist management logic for - /// InstCombine. - class InstCombineWorklist { - SmallVector<Instruction*, 256> Worklist; - DenseMap<Instruction*, unsigned> WorklistMap; - - void operator=(const InstCombineWorklist&RHS); // DO NOT IMPLEMENT - InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT - public: - InstCombineWorklist() {} - - bool isEmpty() const { return Worklist.empty(); } - - /// Add - Add the specified instruction to the worklist if it isn't already - /// in it. - void Add(Instruction *I) { - if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) { - DEBUG(errs() << "IC: ADD: " << *I << '\n'); - Worklist.push_back(I); - } - } - - void AddValue(Value *V) { - if (Instruction *I = dyn_cast<Instruction>(V)) - Add(I); - } - - /// AddInitialGroup - Add the specified batch of stuff in reverse order. - /// which should only be done when the worklist is empty and when the group - /// has no duplicates. - void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { - assert(Worklist.empty() && "Worklist must be empty to add initial group"); - Worklist.reserve(NumEntries+16); - DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); - for (; NumEntries; --NumEntries) { - Instruction *I = List[NumEntries-1]; - WorklistMap.insert(std::make_pair(I, Worklist.size())); - Worklist.push_back(I); - } - } - - // Remove - remove I from the worklist if it exists. - void Remove(Instruction *I) { - DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I); - if (It == WorklistMap.end()) return; // Not in worklist. - - // Don't bother moving everything down, just null out the slot. - Worklist[It->second] = 0; - - WorklistMap.erase(It); - } - - Instruction *RemoveOne() { - Instruction *I = Worklist.back(); - Worklist.pop_back(); - WorklistMap.erase(I); - return I; - } - - /// AddUsersToWorkList - When an instruction is simplified, add all users of - /// the instruction to the work lists because they might get more simplified - /// now. - /// - void AddUsersToWorkList(Instruction &I) { - for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); - UI != UE; ++UI) - Add(cast<Instruction>(*UI)); - } - - - /// Zap - check that the worklist is empty and nuke the backing store for - /// the map if it is large. - void Zap() { - assert(WorklistMap.empty() && "Worklist empty, but map not?"); - - // Do an explicit clear, this shrinks the map if needed. - WorklistMap.clear(); - } - }; -} // end anonymous namespace. - - -namespace { - /// InstCombineIRInserter - This is an IRBuilder insertion helper that works - /// just like the normal insertion helper, but also adds any new instructions - /// to the instcombine worklist. - class InstCombineIRInserter : public IRBuilderDefaultInserter<true> { - InstCombineWorklist &Worklist; - public: - InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} - - void InsertHelper(Instruction *I, const Twine &Name, - BasicBlock *BB, BasicBlock::iterator InsertPt) const { - IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); - Worklist.Add(I); - } - }; -} // end anonymous namespace - - -namespace { - class InstCombiner : public FunctionPass, - public InstVisitor<InstCombiner, Instruction*> { - TargetData *TD; - bool MustPreserveLCSSA; - bool MadeIRChange; - public: - /// Worklist - All of the instructions that need to be simplified. - InstCombineWorklist Worklist; - - /// Builder - This is an IRBuilder that automatically inserts new - /// instructions into the worklist when they are created. - typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy; - BuilderTy *Builder; - - static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {} - - LLVMContext *Context; - LLVMContext *getContext() const { return Context; } - - public: - virtual bool runOnFunction(Function &F); - - bool DoOneIteration(Function &F, unsigned ItNum); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreservedID(LCSSAID); - AU.setPreservesCFG(); - } - - TargetData *getTargetData() const { return TD; } - - // Visitation implementation - Implement instruction combining for different - // instruction types. The semantics are as follows: - // Return Value: - // null - No change was made - // I - Change was made, I is still valid, I may be dead though - // otherwise - Change was made, replace I with returned instruction - // - Instruction *visitAdd(BinaryOperator &I); - Instruction *visitFAdd(BinaryOperator &I); - Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty); - Instruction *visitSub(BinaryOperator &I); - Instruction *visitFSub(BinaryOperator &I); - Instruction *visitMul(BinaryOperator &I); - Instruction *visitFMul(BinaryOperator &I); - Instruction *visitURem(BinaryOperator &I); - Instruction *visitSRem(BinaryOperator &I); - Instruction *visitFRem(BinaryOperator &I); - bool SimplifyDivRemOfSelect(BinaryOperator &I); - Instruction *commonRemTransforms(BinaryOperator &I); - Instruction *commonIRemTransforms(BinaryOperator &I); - Instruction *commonDivTransforms(BinaryOperator &I); - Instruction *commonIDivTransforms(BinaryOperator &I); - Instruction *visitUDiv(BinaryOperator &I); - Instruction *visitSDiv(BinaryOperator &I); - Instruction *visitFDiv(BinaryOperator &I); - Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); - Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); - Instruction *visitAnd(BinaryOperator &I); - Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); - Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); - Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C); - Instruction *visitOr (BinaryOperator &I); - Instruction *visitXor(BinaryOperator &I); - Instruction *visitShl(BinaryOperator &I); - Instruction *visitAShr(BinaryOperator &I); - Instruction *visitLShr(BinaryOperator &I); - Instruction *commonShiftTransforms(BinaryOperator &I); - Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, - Constant *RHSC); - Instruction *visitFCmpInst(FCmpInst &I); - Instruction *visitICmpInst(ICmpInst &I); - Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); - Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, - Instruction *LHS, - ConstantInt *RHS); - Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, - ConstantInt *DivRHS); - Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI, - ICmpInst::Predicate Pred, Value *TheAdd); - Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, - ICmpInst::Predicate Cond, Instruction &I); - Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, - BinaryOperator &I); - Instruction *commonCastTransforms(CastInst &CI); - Instruction *commonIntCastTransforms(CastInst &CI); - Instruction *commonPointerCastTransforms(CastInst &CI); - Instruction *visitTrunc(TruncInst &CI); - Instruction *visitZExt(ZExtInst &CI); - Instruction *visitSExt(SExtInst &CI); - Instruction *visitFPTrunc(FPTruncInst &CI); - Instruction *visitFPExt(CastInst &CI); - Instruction *visitFPToUI(FPToUIInst &FI); - Instruction *visitFPToSI(FPToSIInst &FI); - Instruction *visitUIToFP(CastInst &CI); - Instruction *visitSIToFP(CastInst &CI); - Instruction *visitPtrToInt(PtrToIntInst &CI); - Instruction *visitIntToPtr(IntToPtrInst &CI); - Instruction *visitBitCast(BitCastInst &CI); - Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, - Instruction *FI); - Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); - Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, - Value *A, Value *B, Instruction &Outer, - SelectPatternFlavor SPF2, Value *C); - Instruction *visitSelectInst(SelectInst &SI); - Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); - Instruction *visitCallInst(CallInst &CI); - Instruction *visitInvokeInst(InvokeInst &II); - - Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); - Instruction *visitPHINode(PHINode &PN); - Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); - Instruction *visitAllocaInst(AllocaInst &AI); - Instruction *visitFree(Instruction &FI); - Instruction *visitLoadInst(LoadInst &LI); - Instruction *visitStoreInst(StoreInst &SI); - Instruction *visitBranchInst(BranchInst &BI); - Instruction *visitSwitchInst(SwitchInst &SI); - Instruction *visitInsertElementInst(InsertElementInst &IE); - Instruction *visitExtractElementInst(ExtractElementInst &EI); - Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); - Instruction *visitExtractValueInst(ExtractValueInst &EV); - - // visitInstruction - Specify what to return for unhandled instructions... - Instruction *visitInstruction(Instruction &I) { return 0; } - - private: - Instruction *visitCallSite(CallSite CS); - bool transformConstExprCastCall(CallSite CS); - Instruction *transformCallThroughTrampoline(CallSite CS); - Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, - bool DoXform = true); - bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); - DbgDeclareInst *hasOneUsePlusDeclare(Value *V); - - - public: - // InsertNewInstBefore - insert an instruction New before instruction Old - // in the program. Add the new instruction to the worklist. - // - Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { - assert(New && New->getParent() == 0 && - "New instruction already inserted into a basic block!"); - BasicBlock *BB = Old.getParent(); - BB->getInstList().insert(&Old, New); // Insert inst - Worklist.Add(New); - return New; - } - - // ReplaceInstUsesWith - This method is to be used when an instruction is - // found to be dead, replacable with another preexisting expression. Here - // we add all uses of I to the worklist, replace all uses of I with the new - // value, then return I, so that the inst combiner will know that I was - // modified. - // - Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { - Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. - - // If we are replacing the instruction with itself, this must be in a - // segment of unreachable code, so just clobber the instruction. - if (&I == V) - V = UndefValue::get(I.getType()); - - I.replaceAllUsesWith(V); - return &I; - } - - // EraseInstFromFunction - When dealing with an instruction that has side - // effects or produces a void value, we can't rely on DCE to delete the - // instruction. Instead, visit methods should return the value returned by - // this function. - Instruction *EraseInstFromFunction(Instruction &I) { - DEBUG(errs() << "IC: ERASE " << I << '\n'); - - assert(I.use_empty() && "Cannot erase instruction that is used!"); - // Make sure that we reprocess all operands now that we reduced their - // use counts. - if (I.getNumOperands() < 8) { - for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) - if (Instruction *Op = dyn_cast<Instruction>(*i)) - Worklist.Add(Op); - } - Worklist.Remove(&I); - I.eraseFromParent(); - MadeIRChange = true; - return 0; // Don't do anything with FI - } - - void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth = 0) const { - return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); - } - - bool MaskedValueIsZero(Value *V, const APInt &Mask, - unsigned Depth = 0) const { - return llvm::MaskedValueIsZero(V, Mask, TD, Depth); - } - unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const { - return llvm::ComputeNumSignBits(Op, TD, Depth); - } - - private: - - /// SimplifyCommutative - This performs a few simplifications for - /// commutative operators. - bool SimplifyCommutative(BinaryOperator &I); - - /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value - /// based on the demanded bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - APInt& KnownZero, APInt& KnownOne, - unsigned Depth); - bool SimplifyDemandedBits(Use &U, APInt DemandedMask, - APInt& KnownZero, APInt& KnownOne, - unsigned Depth=0); - - /// SimplifyDemandedInstructionBits - Inst is an integer instruction that - /// SimplifyDemandedBits knows about. See if the instruction has any - /// properties that allow us to simplify its operands. - bool SimplifyDemandedInstructionBits(Instruction &Inst); - - Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt& UndefElts, unsigned Depth = 0); - - // FoldOpIntoPhi - Given a binary operator, cast instruction, or select - // which has a PHI node as operand #0, see if we can fold the instruction - // into the PHI (which is only possible if all operands to the PHI are - // constants). - // - // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms - // that would normally be unprofitable because they strongly encourage jump - // threading. - Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false); - - // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" - // operator and they all are only used by the PHI, PHI together their - // inputs, and do the operation once, to the result of the PHI. - Instruction *FoldPHIArgOpIntoPHI(PHINode &PN); - Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN); - Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); - Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); - - - Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, - ConstantInt *AndRHS, BinaryOperator &TheAnd); - - Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, - bool isSub, Instruction &I); - Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, - bool isSigned, bool Inside, Instruction &IB); - Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); - Instruction *MatchBSwap(BinaryOperator &I); - bool SimplifyStoreAtEndOfBlock(StoreInst &SI); - Instruction *SimplifyMemTransfer(MemIntrinsic *MI); - Instruction *SimplifyMemSet(MemSetInst *MI); - - - Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); - - bool CanEvaluateInDifferentType(Value *V, const Type *Ty, - unsigned CastOpc, int &NumCastsRemoved); - unsigned GetOrEnforceKnownAlignment(Value *V, - unsigned PrefAlign = 0); - - }; -} // end anonymous namespace - -char InstCombiner::ID = 0; -static RegisterPass<InstCombiner> -X("instcombine", "Combine redundant instructions"); - -// getComplexity: Assign a complexity or rank value to LLVM Values... -// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst -static unsigned getComplexity(Value *V) { - if (isa<Instruction>(V)) { - if (BinaryOperator::isNeg(V) || - BinaryOperator::isFNeg(V) || - BinaryOperator::isNot(V)) - return 3; - return 4; - } - if (isa<Argument>(V)) return 3; - return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2; -} - -// isOnlyUse - Return true if this instruction will be deleted if we stop using -// it. -static bool isOnlyUse(Value *V) { - return V->hasOneUse() || isa<Constant>(V); -} - -// getPromotedType - Return the specified type promoted as it would be to pass -// though a va_arg area... -static const Type *getPromotedType(const Type *Ty) { - if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { - if (ITy->getBitWidth() < 32) - return Type::getInt32Ty(Ty->getContext()); - } - return Ty; -} - -/// ShouldChangeType - Return true if it is desirable to convert a computation -/// from 'From' to 'To'. We don't want to convert from a legal to an illegal -/// type for example, or from a smaller to a larger illegal type. -static bool ShouldChangeType(const Type *From, const Type *To, - const TargetData *TD) { - assert(isa<IntegerType>(From) && isa<IntegerType>(To)); - - // If we don't have TD, we don't know if the source/dest are legal. - if (!TD) return false; - - unsigned FromWidth = From->getPrimitiveSizeInBits(); - unsigned ToWidth = To->getPrimitiveSizeInBits(); - bool FromLegal = TD->isLegalInteger(FromWidth); - bool ToLegal = TD->isLegalInteger(ToWidth); - - // If this is a legal integer from type, and the result would be an illegal - // type, don't do the transformation. - if (FromLegal && !ToLegal) - return false; - - // Otherwise, if both are illegal, do not increase the size of the result. We - // do allow things like i160 -> i64, but not i64 -> i160. - if (!FromLegal && !ToLegal && ToWidth > FromWidth) - return false; - - return true; -} - -/// getBitCastOperand - If the specified operand is a CastInst, a constant -/// expression bitcast, or a GetElementPtrInst with all zero indices, return the -/// operand value, otherwise return null. -static Value *getBitCastOperand(Value *V) { - if (Operator *O = dyn_cast<Operator>(V)) { - if (O->getOpcode() == Instruction::BitCast) - return O->getOperand(0); - if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) - if (GEP->hasAllZeroIndices()) - return GEP->getPointerOperand(); - } - return 0; -} - -/// This function is a wrapper around CastInst::isEliminableCastPair. It -/// simply extracts arguments and returns what that function returns. -static Instruction::CastOps -isEliminableCastPair( - const CastInst *CI, ///< The first cast instruction - unsigned opcode, ///< The opcode of the second cast instruction - const Type *DstTy, ///< The target type for the second cast instruction - TargetData *TD ///< The target data for pointer size -) { - - const Type *SrcTy = CI->getOperand(0)->getType(); // A from above - const Type *MidTy = CI->getType(); // B from above - - // Get the opcodes of the two Cast instructions - Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); - Instruction::CastOps secondOp = Instruction::CastOps(opcode); - - unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, - DstTy, - TD ? TD->getIntPtrType(CI->getContext()) : 0); - - // We don't want to form an inttoptr or ptrtoint that converts to an integer - // type that differs from the pointer size. - if ((Res == Instruction::IntToPtr && - (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || - (Res == Instruction::PtrToInt && - (!TD || DstTy != TD->getIntPtrType(CI->getContext())))) - Res = 0; - - return Instruction::CastOps(Res); -} - -/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results -/// in any code being generated. It does not require codegen if V is simple -/// enough or if the cast can be folded into other casts. -static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V, - const Type *Ty, TargetData *TD) { - if (V->getType() == Ty || isa<Constant>(V)) return false; - - // If this is another cast that can be eliminated, it isn't codegen either. - if (const CastInst *CI = dyn_cast<CastInst>(V)) - if (isEliminableCastPair(CI, opcode, Ty, TD)) - return false; - return true; -} - -// SimplifyCommutative - This performs a few simplifications for commutative -// operators: -// -// 1. Order operands such that they are listed from right (least complex) to -// left (most complex). This puts constants before unary operators before -// binary operators. -// -// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2)) -// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) -// -bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { - bool Changed = false; - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) - Changed = !I.swapOperands(); - - if (!I.isAssociative()) return Changed; - Instruction::BinaryOps Opcode = I.getOpcode(); - if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0))) - if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) { - if (isa<Constant>(I.getOperand(1))) { - Constant *Folded = ConstantExpr::get(I.getOpcode(), - cast<Constant>(I.getOperand(1)), - cast<Constant>(Op->getOperand(1))); - I.setOperand(0, Op->getOperand(0)); - I.setOperand(1, Folded); - return true; - } else if (BinaryOperator *Op1=dyn_cast<BinaryOperator>(I.getOperand(1))) - if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) && - isOnlyUse(Op) && isOnlyUse(Op1)) { - Constant *C1 = cast<Constant>(Op->getOperand(1)); - Constant *C2 = cast<Constant>(Op1->getOperand(1)); - - // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) - Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); - Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), - Op1->getOperand(0), - Op1->getName(), &I); - Worklist.Add(New); - I.setOperand(0, New); - I.setOperand(1, Folded); - return true; - } - } - return Changed; -} - -// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction -// if the LHS is a constant zero (which is the 'negate' form). -// -static inline Value *dyn_castNegVal(Value *V) { - if (BinaryOperator::isNeg(V)) - return BinaryOperator::getNegArgument(V); - - // Constants can be considered to be negated values if they can be folded. - if (ConstantInt *C = dyn_cast<ConstantInt>(V)) - return ConstantExpr::getNeg(C); - - if (ConstantVector *C = dyn_cast<ConstantVector>(V)) - if (C->getType()->getElementType()->isInteger()) - return ConstantExpr::getNeg(C); - - return 0; -} - -// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the -// instruction if the LHS is a constant negative zero (which is the 'negate' -// form). -// -static inline Value *dyn_castFNegVal(Value *V) { - if (BinaryOperator::isFNeg(V)) - return BinaryOperator::getFNegArgument(V); - - // Constants can be considered to be negated values if they can be folded. - if (ConstantFP *C = dyn_cast<ConstantFP>(V)) - return ConstantExpr::getFNeg(C); - - if (ConstantVector *C = dyn_cast<ConstantVector>(V)) - if (C->getType()->getElementType()->isFloatingPoint()) - return ConstantExpr::getFNeg(C); - - return 0; -} - -/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, -/// returning the kind and providing the out parameter results if we -/// successfully match. -static SelectPatternFlavor -MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { - SelectInst *SI = dyn_cast<SelectInst>(V); - if (SI == 0) return SPF_UNKNOWN; - - ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); - if (ICI == 0) return SPF_UNKNOWN; - - LHS = ICI->getOperand(0); - RHS = ICI->getOperand(1); - - // (icmp X, Y) ? X : Y - if (SI->getTrueValue() == ICI->getOperand(0) && - SI->getFalseValue() == ICI->getOperand(1)) { - switch (ICI->getPredicate()) { - default: return SPF_UNKNOWN; // Equality. - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMAX; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMAX; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMIN; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMIN; - } - } - - // (icmp X, Y) ? Y : X - if (SI->getTrueValue() == ICI->getOperand(1) && - SI->getFalseValue() == ICI->getOperand(0)) { - switch (ICI->getPredicate()) { - default: return SPF_UNKNOWN; // Equality. - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMIN; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMIN; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMAX; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMAX; - } - } - - // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) - - return SPF_UNKNOWN; -} - -/// isFreeToInvert - Return true if the specified value is free to invert (apply -/// ~ to). This happens in cases where the ~ can be eliminated. -static inline bool isFreeToInvert(Value *V) { - // ~(~(X)) -> X. - if (BinaryOperator::isNot(V)) - return true; - - // Constants can be considered to be not'ed values. - if (isa<ConstantInt>(V)) - return true; - - // Compares can be inverted if they have a single use. - if (CmpInst *CI = dyn_cast<CmpInst>(V)) - return CI->hasOneUse(); - - return false; -} - -static inline Value *dyn_castNotVal(Value *V) { - // If this is not(not(x)) don't return that this is a not: we want the two - // not's to be folded first. - if (BinaryOperator::isNot(V)) { - Value *Operand = BinaryOperator::getNotArgument(V); - if (!isFreeToInvert(Operand)) - return Operand; - } - - // Constants can be considered to be not'ed values... - if (ConstantInt *C = dyn_cast<ConstantInt>(V)) - return ConstantInt::get(C->getType(), ~C->getValue()); - return 0; -} - - - -// dyn_castFoldableMul - If this value is a multiply that can be folded into -// other computations (because it has a constant operand), return the -// non-constant operand of the multiply, and set CST to point to the multiplier. -// Otherwise, return null. -// -static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { - if (V->hasOneUse() && V->getType()->isInteger()) - if (Instruction *I = dyn_cast<Instruction>(V)) { - if (I->getOpcode() == Instruction::Mul) - if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) - return I->getOperand(0); - if (I->getOpcode() == Instruction::Shl) - if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) { - // The multiplier is really 1 << CST. - uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); - uint32_t CSTVal = CST->getLimitedValue(BitWidth); - CST = ConstantInt::get(V->getType()->getContext(), - APInt(BitWidth, 1).shl(CSTVal)); - return I->getOperand(0); - } - } - return 0; -} - -/// AddOne - Add one to a ConstantInt -static Constant *AddOne(Constant *C) { - return ConstantExpr::getAdd(C, - ConstantInt::get(C->getType(), 1)); -} -/// SubOne - Subtract one from a ConstantInt -static Constant *SubOne(ConstantInt *C) { - return ConstantExpr::getSub(C, - ConstantInt::get(C->getType(), 1)); -} -/// MultiplyOverflows - True if the multiply can not be expressed in an int -/// this size. -static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { - uint32_t W = C1->getBitWidth(); - APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); - if (sign) { - LHSExt.sext(W * 2); - RHSExt.sext(W * 2); - } else { - LHSExt.zext(W * 2); - RHSExt.zext(W * 2); - } - - APInt MulExt = LHSExt * RHSExt; - - if (!sign) - return MulExt.ugt(APInt::getLowBitsSet(W * 2, W)); - - APInt Min = APInt::getSignedMinValue(W).sext(W * 2); - APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); - return MulExt.slt(Min) || MulExt.sgt(Max); -} - - -/// ShrinkDemandedConstant - Check to see if the specified operand of the -/// specified instruction is a constant integer. If so, check to see if there -/// are any bits set in the constant that are not demanded. If so, shrink the -/// constant and return true. -static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, - APInt Demanded) { - assert(I && "No instruction?"); - assert(OpNo < I->getNumOperands() && "Operand index too large"); - - // If the operand is not a constant integer, nothing to do. - ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo)); - if (!OpC) return false; - - // If there are no bits set that aren't demanded, nothing to do. - Demanded.zextOrTrunc(OpC->getValue().getBitWidth()); - if ((~Demanded & OpC->getValue()) == 0) - return false; - - // This instruction is producing bits that are not demanded. Shrink the RHS. - Demanded &= OpC->getValue(); - I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded)); - return true; -} - -// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a -// set of known zero and one bits, compute the maximum and minimum values that -// could have the specified known zero and known one bits, returning them in -// min/max. -static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero, - const APInt& KnownOne, - APInt& Min, APInt& Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && - "KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); - - // The minimum value is when all unknown bits are zeros, EXCEPT for the sign - // bit if it is unknown. - Min = KnownOne; - Max = KnownOne|UnknownBits; - - if (UnknownBits.isNegative()) { // Sign bit is unknown - Min.set(Min.getBitWidth()-1); - Max.clear(Max.getBitWidth()-1); - } -} - -// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and -// a set of known zero and one bits, compute the maximum and minimum values that -// could have the specified known zero and known one bits, returning them in -// min/max. -static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, - const APInt &KnownOne, - APInt &Min, APInt &Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && - "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); - - // The minimum value is when the unknown bits are all zeros. - Min = KnownOne; - // The maximum value is when the unknown bits are all ones. - Max = KnownOne|UnknownBits; -} - -/// SimplifyDemandedInstructionBits - Inst is an integer instruction that -/// SimplifyDemandedBits knows about. See if the instruction has any -/// properties that allow us to simplify its operands. -bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { - unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); - - Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, - KnownZero, KnownOne, 0); - if (V == 0) return false; - if (V == &Inst) return true; - ReplaceInstUsesWith(Inst, V); - return true; -} - -/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the -/// specified instruction operand if possible, updating it in place. It returns -/// true if it made any change and false otherwise. -bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth) { - Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, - KnownZero, KnownOne, Depth); - if (NewVal == 0) return false; - U = NewVal; - return true; -} - - -/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler -/// value based on the demanded bits. When this function is called, it is known -/// that only the bits set in DemandedMask of the result of V are ever used -/// downstream. Consequently, depending on the mask and V, it may be possible -/// to replace V with a constant or one of its operands. In such cases, this -/// function does the replacement and returns true. In all other cases, it -/// returns false after analyzing the expression and setting KnownOne and known -/// to be one in the expression. KnownZero contains all the bits that are known -/// to be zero in the expression. These are provided to potentially allow the -/// caller (which might recursively be SimplifyDemandedBits itself) to simplify -/// the expression. KnownOne and KnownZero always follow the invariant that -/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that -/// the bits in KnownOne and KnownZero may only be accurate for those bits set -/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero -/// and KnownOne must all be the same. -/// -/// This returns null if it did not change anything and it permits no -/// simplification. This returns V itself if it did some simplification of V's -/// operands based on the information about what bits are demanded. This returns -/// some other non-null value if it found out that V is equal to another value -/// in the context where the specified bits are demanded, but not for all users. -Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth) { - assert(V != 0 && "Null pointer of Value???"); - assert(Depth <= 6 && "Limit Search Depth"); - uint32_t BitWidth = DemandedMask.getBitWidth(); - const Type *VTy = V->getType(); - assert((TD || !isa<PointerType>(VTy)) && - "SimplifyDemandedBits needs to know bit widths!"); - assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && - (!VTy->isIntOrIntVector() || - VTy->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "Value *V, DemandedMask, KnownZero and KnownOne " - "must have same BitWidth"); - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - // We know all of the bits for a constant! - KnownOne = CI->getValue() & DemandedMask; - KnownZero = ~KnownOne & DemandedMask; - return 0; - } - if (isa<ConstantPointerNull>(V)) { - // We know all of the bits for a constant! - KnownOne.clear(); - KnownZero = DemandedMask; - return 0; - } - - KnownZero.clear(); - KnownOne.clear(); - if (DemandedMask == 0) { // Not demanding any bits from V. - if (isa<UndefValue>(V)) - return 0; - return UndefValue::get(VTy); - } - - if (Depth == 6) // Limit search depth. - return 0; - - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne; - - Instruction *I = dyn_cast<Instruction>(V); - if (!I) { - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - return 0; // Only analyze instructions. - } - - // If there are multiple uses of this value and we aren't at the root, then - // we can't do any simplifications of the operands, because DemandedMask - // only reflects the bits demanded by *one* of the users. - if (Depth != 0 && !I->hasOneUse()) { - // Despite the fact that we can't simplify this instruction in all User's - // context, we can at least compute the knownzero/knownone bits, and we can - // do simplifications that apply to *just* the one user if we know that - // this instruction has a simpler value in that context. - if (I->getOpcode() == Instruction::And) { - // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth+1); - - // If all of the demanded bits are known 1 on one side, return the other. - // These bits cannot contribute to the result of the 'and' in this - // context. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) - return I->getOperand(1); - - // If all of the demanded bits in the inputs are known zeros, return zero. - if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Constant::getNullValue(VTy); - - } else if (I->getOpcode() == Instruction::Or) { - // We can simplify (X|Y) -> X or Y in the user's context if we know that - // only bits from X or Y are demanded. - - // If either the LHS or the RHS are One, the result is One. - ComputeMaskedBits(I->getOperand(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth+1); - - // If all of the demanded bits are known zero on one side, return the - // other. These bits cannot contribute to the result of the 'or' in this - // context. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) - return I->getOperand(1); - } - - // Compute the KnownZero/KnownOne bits to simplify things downstream. - ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); - return 0; - } - - // If this is the root being simplified, allow it to have multiple uses, - // just set the DemandedMask to all bits so that we can try to simplify the - // operands. This allows visitTruncInst (for example) to simplify the - // operand of a trunc without duplicating all the logic below. - if (Depth == 0 && !V->hasOneUse()) - DemandedMask = APInt::getAllOnesValue(BitWidth); - - switch (I->getOpcode()) { - default: - ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - break; - case Instruction::And: - // If either the LHS or the RHS are Zero, the result is zero. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If all of the demanded bits are known 1 on one side, return the other. - // These bits cannot contribute to the result of the 'and'. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) - return I->getOperand(1); - - // If all of the demanded bits in the inputs are known zeros, return zero. - if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Constant::getNullValue(VTy); - - // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) - return I; - - // Output known-1 bits are only known if set in both the LHS & RHS. - RHSKnownOne &= LHSKnownOne; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - RHSKnownZero |= LHSKnownZero; - break; - case Instruction::Or: - // If either the LHS or the RHS are One, the result is One. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If all of the demanded bits are known zero on one side, return the other. - // These bits cannot contribute to the result of the 'or'. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) - return I->getOperand(1); - - // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask)) - return I; - - // Output known-0 bits are only known if clear in both the LHS & RHS. - RHSKnownZero &= LHSKnownZero; - // Output known-1 are known to be set if set in either the LHS | RHS. - RHSKnownOne |= LHSKnownOne; - break; - case Instruction::Xor: { - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If all of the demanded bits are known zero on one side, return the other. - // These bits cannot contribute to the result of the 'xor'. - if ((DemandedMask & RHSKnownZero) == DemandedMask) - return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) - return I->getOperand(1); - - // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | - (RHSKnownOne & LHSKnownOne); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | - (RHSKnownOne & LHSKnownZero); - - // If all of the demanded bits are known to be zero on one side or the - // other, turn this into an *inclusive* or. - // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { - Instruction *Or = - BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), - I->getName()); - return InsertNewInstBefore(Or, *I); - } - - // If all of the demanded bits on one side are known, and all of the set - // bits on that side are also known to be set on the other side, turn this - // into an AND, as we know the bits will be cleared. - // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { - // all known - if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { - Constant *AndC = Constant::getIntegerValue(VTy, - ~RHSKnownOne & DemandedMask); - Instruction *And = - BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); - return InsertNewInstBefore(And, *I); - } - } - - // If the RHS is a constant, see if we can simplify it. - // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. - if (ShrinkDemandedConstant(I, 1, DemandedMask)) - return I; - - // If our LHS is an 'and' and if it has one use, and if any of the bits we - // are flipping are known to be set, then the xor is just resetting those - // bits to zero. We can just knock out bits from the 'and' and the 'xor', - // simplifying both of them. - if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) - if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && - isa<ConstantInt>(I->getOperand(1)) && - isa<ConstantInt>(LHSInst->getOperand(1)) && - (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { - ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1)); - ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1)); - APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); - - Constant *AndC = - ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); - Instruction *NewAnd = - BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); - InsertNewInstBefore(NewAnd, *I); - - Constant *XorC = - ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); - Instruction *NewXor = - BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); - return InsertNewInstBefore(NewXor, *I); - } - - - RHSKnownZero = KnownZeroOut; - RHSKnownOne = KnownOneOut; - break; - } - case Instruction::Select: - if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - - // If the operands are constants, see if we can simplify them. - if (ShrinkDemandedConstant(I, 1, DemandedMask) || - ShrinkDemandedConstant(I, 2, DemandedMask)) - return I; - - // Only known if known in both the LHS and RHS. - RHSKnownOne &= LHSKnownOne; - RHSKnownZero &= LHSKnownZero; - break; - case Instruction::Trunc: { - unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask.zext(truncBf); - RHSKnownZero.zext(truncBf); - RHSKnownOne.zext(truncBf); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - DemandedMask.trunc(BitWidth); - RHSKnownZero.trunc(BitWidth); - RHSKnownOne.trunc(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - break; - } - case Instruction::BitCast: - if (!I->getOperand(0)->getType()->isIntOrIntVector()) - return false; // vector->int or fp->int? - - if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { - if (const VectorType *SrcVTy = - dyn_cast<VectorType>(I->getOperand(0)->getType())) { - if (DstVTy->getNumElements() != SrcVTy->getNumElements()) - // Don't touch a bitcast between vectors of different element counts. - return false; - } else - // Don't touch a scalar-to-vector bitcast. - return false; - } else if (isa<VectorType>(I->getOperand(0)->getType())) - // Don't touch a vector-to-scalar bitcast. - return false; - - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - break; - case Instruction::ZExt: { - // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - - DemandedMask.trunc(SrcBitWidth); - RHSKnownZero.trunc(SrcBitWidth); - RHSKnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - DemandedMask.zext(BitWidth); - RHSKnownZero.zext(BitWidth); - RHSKnownOne.zext(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - // The top bits are known to be zero. - RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); - break; - } - case Instruction::SExt: { - // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - - APInt InputDemandedBits = DemandedMask & - APInt::getLowBitsSet(BitWidth, SrcBitWidth); - - APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); - // If any of the sign extended bits are demanded, we know that the sign - // bit is demanded. - if ((NewBits & DemandedMask) != 0) - InputDemandedBits.set(SrcBitWidth-1); - - InputDemandedBits.trunc(SrcBitWidth); - RHSKnownZero.trunc(SrcBitWidth); - RHSKnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - InputDemandedBits.zext(BitWidth); - RHSKnownZero.zext(BitWidth); - RHSKnownOne.zext(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - - // If the sign bit of the input is known set or clear, then we know the - // top bits of the result. - - // If the input sign bit is known zero, or if the NewBits are not demanded - // convert this into a zero extension. - if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { - // Convert to ZExt cast - CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); - return InsertNewInstBefore(NewCast, *I); - } else if (RHSKnownOne[SrcBitWidth-1]) { // Input sign bit known set - RHSKnownOne |= NewBits; - } - break; - } - case Instruction::Add: { - // Figure out what the input bits are. If the top bits of the and result - // are not demanded, then the add doesn't demand them from its input - // either. - unsigned NLZ = DemandedMask.countLeadingZeros(); - - // If there is a constant on the RHS, there are a variety of xformations - // we can do. - if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { - // If null, this should be simplified elsewhere. Some of the xforms here - // won't work if the RHS is zero. - if (RHS->isZero()) - break; - - // If the top bit of the output is demanded, demand everything from the - // input. Otherwise, we demand all the input bits except NLZ top bits. - APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ)); - - // Find information about known zero/one bits in the input. - if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - - // If the RHS of the add has bits set that can't affect the input, reduce - // the constant. - if (ShrinkDemandedConstant(I, 1, InDemandedBits)) - return I; - - // Avoid excess work. - if (LHSKnownZero == 0 && LHSKnownOne == 0) - break; - - // Turn it into OR if input bits are zero. - if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { - Instruction *Or = - BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), - I->getName()); - return InsertNewInstBefore(Or, *I); - } - - // We can say something about the output known-zero and known-one bits, - // depending on potential carries from the input constant and the - // unknowns. For example if the LHS is known to have at most the 0x0F0F0 - // bits set and the RHS constant is 0x01001, then we know we have a known - // one mask of 0x00001 and a known zero mask of 0xE0F0E. - - // To compute this, we first compute the potential carry bits. These are - // the bits which may be modified. I'm not aware of a better way to do - // this scan. - const APInt &RHSVal = RHS->getValue(); - APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal)); - - // Now that we know which bits have carries, compute the known-1/0 sets. - - // Bits are known one if they are known zero in one operand and one in the - // other, and there is no input carry. - RHSKnownOne = ((LHSKnownZero & RHSVal) | - (LHSKnownOne & ~RHSVal)) & ~CarryBits; - - // Bits are known zero if they are known zero in both operands and there - // is no input carry. - RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; - } else { - // If the high-bits of this ADD are not demanded, then it does not demand - // the high bits of its LHS or RHS. - if (DemandedMask[BitWidth-1] == 0) { - // Right fill the mask of bits for this ADD to demand the most - // significant bit and all those below it. - APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - } - } - break; - } - case Instruction::Sub: - // If the high-bits of this SUB are not demanded, then it does not demand - // the high bits of its LHS or RHS. - if (DemandedMask[BitWidth-1] == 0) { - // Right fill the mask of bits for this SUB to demand the most - // significant bit and all those below it. - uint32_t NLZ = DemandedMask.countLeadingZeros(); - APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - } - // Otherwise just hand the sub off to ComputeMaskedBits to fill in - // the known zeros and ones. - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - break; - case Instruction::Shl: - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - RHSKnownZero <<= ShiftAmt; - RHSKnownOne <<= ShiftAmt; - // low bits known zero. - if (ShiftAmt) - RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - } - break; - case Instruction::LShr: - // For a logical shift right - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - - // Unsigned shift right. - APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); - RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); - if (ShiftAmt) { - // Compute the new bits that are at the top now. - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - RHSKnownZero |= HighBits; // high bits known zero. - } - } - break; - case Instruction::AShr: - // If this is an arithmetic shift right and only the low-bit is set, we can - // always convert this into a logical shr, even if the shift amount is - // variable. The low bit of the shift cannot be an input sign bit unless - // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedMask == 1) { - // Perform the logical shift right. - Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), I->getOperand(1), I->getName()); - return InsertNewInstBefore(NewVal, *I); - } - - // If the sign bit is the only bit demanded by this ashr, then there is no - // need to do it, the shift doesn't change the high bit. - if (DemandedMask.isSignBit()) - return I->getOperand(0); - - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint32_t ShiftAmt = SA->getLimitedValue(BitWidth); - - // Signed shift right. - APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - // If any of the "high bits" are demanded, we should set the sign bit as - // demanded. - if (DemandedMask.countLeadingZeros() <= ShiftAmt) - DemandedMaskIn.set(BitWidth-1); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) - return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - // Compute the new bits that are at the top now. - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); - RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); - - // Handle the sign bits. - APInt SignBit(APInt::getSignBit(BitWidth)); - // Adjust to where it is now in the mask. - SignBit = APIntOps::lshr(SignBit, ShiftAmt); - - // If the input sign bit is known to be zero, or if none of the top bits - // are demanded, turn this into an unsigned shift right. - if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || - (HighBits & ~DemandedMask) == HighBits) { - // Perform the logical shift right. - Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), SA, I->getName()); - return InsertNewInstBefore(NewVal, *I); - } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one. - RHSKnownOne |= HighBits; - } - } - break; - case Instruction::SRem: - if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { - APInt RA = Rem->getValue().abs(); - if (RA.isPowerOf2()) { - if (DemandedMask.ult(RA)) // srem won't affect demanded bits - return I->getOperand(0); - - APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, - LHSKnownZero, LHSKnownOne, Depth+1)) - return I; - - if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) - LHSKnownZero |= ~LowBits; - - KnownZero |= LHSKnownZero & DemandedMask; - - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - } - } - break; - case Instruction::URem: { - APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, - KnownZero2, KnownOne2, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), AllOnes, - KnownZero2, KnownOne2, Depth+1)) - return I; - - unsigned Leaders = KnownZero2.countLeadingOnes(); - Leaders = std::max(Leaders, - KnownZero2.countLeadingOnes()); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; - break; - } - case Instruction::Call: - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::bswap: { - // If the only bits demanded come from one byte of the bswap result, - // just shift the input byte into position to eliminate the bswap. - unsigned NLZ = DemandedMask.countLeadingZeros(); - unsigned NTZ = DemandedMask.countTrailingZeros(); - - // Round NTZ down to the next byte. If we have 11 trailing zeros, then - // we need all the bits down to bit 8. Likewise, round NLZ. If we - // have 14 leading zeros, round to 8. - NLZ &= ~7; - NTZ &= ~7; - // If we need exactly one byte, we can do this transformation. - if (BitWidth-NLZ-NTZ == 8) { - unsigned ResultBit = NTZ; - unsigned InputBit = BitWidth-NTZ-8; - - // Replace this with either a left or right shift to get the byte into - // the right place. - Instruction *NewVal; - if (InputBit > ResultBit) - NewVal = BinaryOperator::CreateLShr(I->getOperand(1), - ConstantInt::get(I->getType(), InputBit-ResultBit)); - else - NewVal = BinaryOperator::CreateShl(I->getOperand(1), - ConstantInt::get(I->getType(), ResultBit-InputBit)); - NewVal->takeName(I); - return InsertNewInstBefore(NewVal, *I); - } - - // TODO: Could compute known zero/one bits based on the input. - break; - } - } - } - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); - break; - } - - // If the client is only demanding bits that we know, return the known - // constant. - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) - return Constant::getIntegerValue(VTy, RHSKnownOne); - return false; -} - - -/// SimplifyDemandedVectorElts - The specified value produces a vector with -/// any number of elements. DemandedElts contains the set of elements that are -/// actually used by the caller. This method analyzes which elements of the -/// operand are undef and returns that information in UndefElts. -/// -/// If the information about demanded elements can be used to simplify the -/// operation, the operation is simplified, then the resultant value is -/// returned. This returns null if no change was made. -Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt& UndefElts, - unsigned Depth) { - unsigned VWidth = cast<VectorType>(V->getType())->getNumElements(); - APInt EltMask(APInt::getAllOnesValue(VWidth)); - assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); - - if (isa<UndefValue>(V)) { - // If the entire vector is undefined, just return this info. - UndefElts = EltMask; - return 0; - } else if (DemandedElts == 0) { // If nothing is demanded, provide undef. - UndefElts = EltMask; - return UndefValue::get(V->getType()); - } - - UndefElts = 0; - if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) { - const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); - Constant *Undef = UndefValue::get(EltTy); - - std::vector<Constant*> Elts; - for (unsigned i = 0; i != VWidth; ++i) - if (!DemandedElts[i]) { // If not demanded, set to undef. - Elts.push_back(Undef); - UndefElts.set(i); - } else if (isa<UndefValue>(CP->getOperand(i))) { // Already undef. - Elts.push_back(Undef); - UndefElts.set(i); - } else { // Otherwise, defined. - Elts.push_back(CP->getOperand(i)); - } - - // If we changed the constant, return it. - Constant *NewCP = ConstantVector::get(Elts); - return NewCP != CP ? NewCP : 0; - } else if (isa<ConstantAggregateZero>(V)) { - // Simplify the CAZ to a ConstantVector where the non-demanded elements are - // set to undef. - - // Check if this is identity. If so, return 0 since we are not simplifying - // anything. - if (DemandedElts == ((1ULL << VWidth) -1)) - return 0; - - const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); - Constant *Zero = Constant::getNullValue(EltTy); - Constant *Undef = UndefValue::get(EltTy); - std::vector<Constant*> Elts; - for (unsigned i = 0; i != VWidth; ++i) { - Constant *Elt = DemandedElts[i] ? Zero : Undef; - Elts.push_back(Elt); - } - UndefElts = DemandedElts ^ EltMask; - return ConstantVector::get(Elts); - } - - // Limit search depth. - if (Depth == 10) - return 0; - - // If multiple users are using the root value, procede with - // simplification conservatively assuming that all elements - // are needed. - if (!V->hasOneUse()) { - // Quit if we find multiple users of a non-root value though. - // They'll be handled when it's their turn to be visited by - // the main instcombine process. - if (Depth != 0) - // TODO: Just compute the UndefElts information recursively. - return 0; - - // Conservatively assume that all elements are needed. - DemandedElts = EltMask; - } - - Instruction *I = dyn_cast<Instruction>(V); - if (!I) return 0; // Only analyze instructions. - - bool MadeChange = false; - APInt UndefElts2(VWidth, 0); - Value *TmpV; - switch (I->getOpcode()) { - default: break; - - case Instruction::InsertElement: { - // If this is a variable index, we don't know which element it overwrites. - // demand exactly the same input as we produce. - ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2)); - if (Idx == 0) { - // Note that we can't propagate undef elt info, because we don't know - // which elt is getting updated. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts2, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - break; - } - - // If this is inserting an element that isn't demanded, remove this - // insertelement. - unsigned IdxNo = Idx->getZExtValue(); - if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { - Worklist.Add(I); - return I->getOperand(0); - } - - // Otherwise, the element inserted overwrites whatever was there, so the - // input demanded set is simpler than the output set. - APInt DemandedElts2 = DemandedElts; - DemandedElts2.clear(IdxNo); - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, - UndefElts, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - - // The inserted element is defined. - UndefElts.clear(IdxNo); - break; - } - case Instruction::ShuffleVector: { - ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); - uint64_t LHSVWidth = - cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements(); - APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0); - for (unsigned i = 0; i < VWidth; i++) { - if (DemandedElts[i]) { - unsigned MaskVal = Shuffle->getMaskValue(i); - if (MaskVal != -1u) { - assert(MaskVal < LHSVWidth * 2 && - "shufflevector mask index out of range!"); - if (MaskVal < LHSVWidth) - LeftDemanded.set(MaskVal); - else - RightDemanded.set(MaskVal - LHSVWidth); - } - } - } - - APInt UndefElts4(LHSVWidth, 0); - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, - UndefElts4, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - - APInt UndefElts3(LHSVWidth, 0); - TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, - UndefElts3, Depth+1); - if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } - - bool NewUndefElts = false; - for (unsigned i = 0; i < VWidth; i++) { - unsigned MaskVal = Shuffle->getMaskValue(i); - if (MaskVal == -1u) { - UndefElts.set(i); - } else if (MaskVal < LHSVWidth) { - if (UndefElts4[MaskVal]) { - NewUndefElts = true; - UndefElts.set(i); - } - } else { - if (UndefElts3[MaskVal - LHSVWidth]) { - NewUndefElts = true; - UndefElts.set(i); - } - } - } - - if (NewUndefElts) { - // Add additional discovered undefs. - std::vector<Constant*> Elts; - for (unsigned i = 0; i < VWidth; ++i) { - if (UndefElts[i]) - Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); - else - Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), - Shuffle->getMaskValue(i))); - } - I->setOperand(2, ConstantVector::get(Elts)); - MadeChange = true; - } - break; - } - case Instruction::BitCast: { - // Vector->vector casts only. - const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); - if (!VTy) break; - unsigned InVWidth = VTy->getNumElements(); - APInt InputDemandedElts(InVWidth, 0); - unsigned Ratio; - - if (VWidth == InVWidth) { - // If we are converting from <4 x i32> -> <4 x f32>, we demand the same - // elements as are demanded of us. - Ratio = 1; - InputDemandedElts = DemandedElts; - } else if (VWidth > InVWidth) { - // Untested so far. - break; - - // If there are more elements in the result than there are in the source, - // then an input element is live if any of the corresponding output - // elements are live. - Ratio = VWidth/InVWidth; - for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { - if (DemandedElts[OutIdx]) - InputDemandedElts.set(OutIdx/Ratio); - } - } else { - // Untested so far. - break; - - // If there are more elements in the source than there are in the result, - // then an input element is live if the corresponding output element is - // live. - Ratio = InVWidth/VWidth; - for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) - if (DemandedElts[InIdx/Ratio]) - InputDemandedElts.set(InIdx); - } - - // div/rem demand all inputs, because they don't want divide by zero. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, - UndefElts2, Depth+1); - if (TmpV) { - I->setOperand(0, TmpV); - MadeChange = true; - } - - UndefElts = UndefElts2; - if (VWidth > InVWidth) { - llvm_unreachable("Unimp"); - // If there are more elements in the result than there are in the source, - // then an output element is undef if the corresponding input element is - // undef. - for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) - if (UndefElts2[OutIdx/Ratio]) - UndefElts.set(OutIdx); - } else if (VWidth < InVWidth) { - llvm_unreachable("Unimp"); - // If there are more elements in the source than there are in the result, - // then a result element is undef if all of the corresponding input - // elements are undef. - UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. - for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) - if (!UndefElts2[InIdx]) // Not undef? - UndefElts.clear(InIdx/Ratio); // Clear undef bit. - } - break; - } - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // div/rem demand all inputs, because they don't want divide by zero. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts, Depth+1); - if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, - UndefElts2, Depth+1); - if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } - - // Output elements are undefined if both are undefined. Consider things - // like undef&0. The result is known zero, not undef. - UndefElts &= UndefElts2; - break; - - case Instruction::Call: { - IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); - if (!II) break; - switch (II->getIntrinsicID()) { - default: break; - - // Binary vector operations that work column-wise. A dest element is a - // function of the corresponding input elements from the two inputs. - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse_min_ss: - case Intrinsic::x86_sse_max_ss: - case Intrinsic::x86_sse2_sub_sd: - case Intrinsic::x86_sse2_mul_sd: - case Intrinsic::x86_sse2_min_sd: - case Intrinsic::x86_sse2_max_sd: - TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, - UndefElts, Depth+1); - if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; } - TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts, - UndefElts2, Depth+1); - if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; } - - // If only the low elt is demanded and this is a scalarizable intrinsic, - // scalarize it now. - if (DemandedElts == 1) { - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse2_sub_sd: - case Intrinsic::x86_sse2_mul_sd: - // TODO: Lower MIN/MAX/ABS/etc - Value *LHS = II->getOperand(1); - Value *RHS = II->getOperand(2); - // Extract the element as scalars. - LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, - ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II); - RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, - ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II); - - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Case stmts out of sync!"); - case Intrinsic::x86_sse_sub_ss: - case Intrinsic::x86_sse2_sub_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS, - II->getName()), *II); - break; - case Intrinsic::x86_sse_mul_ss: - case Intrinsic::x86_sse2_mul_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS, - II->getName()), *II); - break; - } - - Instruction *New = - InsertElementInst::Create( - UndefValue::get(II->getType()), TmpV, - ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), II->getName()); - InsertNewInstBefore(New, *II); - return New; - } - } - - // Output elements are undefined if both are undefined. Consider things - // like undef&0. The result is known zero, not undef. - UndefElts &= UndefElts2; - break; - } - break; - } - } - return MadeChange ? I : 0; -} - - -/// AssociativeOpt - Perform an optimization on an associative operator. This -/// function is designed to check a chain of associative operators for a -/// potential to apply a certain optimization. Since the optimization may be -/// applicable if the expression was reassociated, this checks the chain, then -/// reassociates the expression as necessary to expose the optimization -/// opportunity. This makes use of a special Functor, which must define -/// 'shouldApply' and 'apply' methods. -/// -template<typename Functor> -static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) { - unsigned Opcode = Root.getOpcode(); - Value *LHS = Root.getOperand(0); - - // Quick check, see if the immediate LHS matches... - if (F.shouldApply(LHS)) - return F.apply(Root); - - // Otherwise, if the LHS is not of the same opcode as the root, return. - Instruction *LHSI = dyn_cast<Instruction>(LHS); - while (LHSI && LHSI->getOpcode() == Opcode && LHSI->hasOneUse()) { - // Should we apply this transform to the RHS? - bool ShouldApply = F.shouldApply(LHSI->getOperand(1)); - - // If not to the RHS, check to see if we should apply to the LHS... - if (!ShouldApply && F.shouldApply(LHSI->getOperand(0))) { - cast<BinaryOperator>(LHSI)->swapOperands(); // Make the LHS the RHS - ShouldApply = true; - } - - // If the functor wants to apply the optimization to the RHS of LHSI, - // reassociate the expression from ((? op A) op B) to (? op (A op B)) - if (ShouldApply) { - // Now all of the instructions are in the current basic block, go ahead - // and perform the reassociation. - Instruction *TmpLHSI = cast<Instruction>(Root.getOperand(0)); - - // First move the selected RHS to the LHS of the root... - Root.setOperand(0, LHSI->getOperand(1)); - - // Make what used to be the LHS of the root be the user of the root... - Value *ExtraOperand = TmpLHSI->getOperand(1); - if (&Root == TmpLHSI) { - Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType())); - return 0; - } - Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI - TmpLHSI->setOperand(1, &Root); // TmpLHSI now uses the root - BasicBlock::iterator ARI = &Root; ++ARI; - TmpLHSI->moveBefore(ARI); // Move TmpLHSI to after Root - ARI = Root; - - // Now propagate the ExtraOperand down the chain of instructions until we - // get to LHSI. - while (TmpLHSI != LHSI) { - Instruction *NextLHSI = cast<Instruction>(TmpLHSI->getOperand(0)); - // Move the instruction to immediately before the chain we are - // constructing to avoid breaking dominance properties. - NextLHSI->moveBefore(ARI); - ARI = NextLHSI; - - Value *NextOp = NextLHSI->getOperand(1); - NextLHSI->setOperand(1, ExtraOperand); - TmpLHSI = NextLHSI; - ExtraOperand = NextOp; - } - - // Now that the instructions are reassociated, have the functor perform - // the transformation... - return F.apply(Root); - } - - LHSI = dyn_cast<Instruction>(LHSI->getOperand(0)); - } - return 0; -} - -namespace { - -// AddRHS - Implements: X + X --> X << 1 -struct AddRHS { - Value *RHS; - explicit AddRHS(Value *rhs) : RHS(rhs) {} - bool shouldApply(Value *LHS) const { return LHS == RHS; } - Instruction *apply(BinaryOperator &Add) const { - return BinaryOperator::CreateShl(Add.getOperand(0), - ConstantInt::get(Add.getType(), 1)); - } -}; - -// AddMaskingAnd - Implements (A & C1)+(B & C2) --> (A & C1)|(B & C2) -// iff C1&C2 == 0 -struct AddMaskingAnd { - Constant *C2; - explicit AddMaskingAnd(Constant *c) : C2(c) {} - bool shouldApply(Value *LHS) const { - ConstantInt *C1; - return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) && - ConstantExpr::getAnd(C1, C2)->isNullValue(); - } - Instruction *apply(BinaryOperator &Add) const { - return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1)); - } -}; - -} - -static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, - InstCombiner *IC) { - if (CastInst *CI = dyn_cast<CastInst>(&I)) - return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); - - // Figure out if the constant is the left or the right argument. - bool ConstIsRHS = isa<Constant>(I.getOperand(1)); - Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS)); - - if (Constant *SOC = dyn_cast<Constant>(SO)) { - if (ConstIsRHS) - return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); - return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); - } - - Value *Op0 = SO, *Op1 = ConstOperand; - if (!ConstIsRHS) - std::swap(Op0, Op1); - - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) - return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, - SO->getName()+".op"); - if (ICmpInst *CI = dyn_cast<ICmpInst>(&I)) - return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, - SO->getName()+".cmp"); - if (FCmpInst *CI = dyn_cast<FCmpInst>(&I)) - return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, - SO->getName()+".cmp"); - llvm_unreachable("Unknown binary instruction type!"); -} - -// FoldOpIntoSelect - Given an instruction with a select as one operand and a -// constant as the other operand, try to fold the binary operator into the -// select arguments. This also works for Cast instructions, which obviously do -// not have a second operand. -static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, - InstCombiner *IC) { - // Don't modify shared select instructions - if (!SI->hasOneUse()) return 0; - Value *TV = SI->getOperand(1); - Value *FV = SI->getOperand(2); - - if (isa<Constant>(TV) || isa<Constant>(FV)) { - // Bool selects with constant operands can be folded to logical ops. - if (SI->getType() == Type::getInt1Ty(*IC->getContext())) return 0; - - Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC); - Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC); - - return SelectInst::Create(SI->getCondition(), SelectTrueVal, - SelectFalseVal); - } - return 0; -} - - -/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which -/// has a PHI node as operand #0, see if we can fold the instruction into the -/// PHI (which is only possible if all operands to the PHI are constants). -/// -/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms -/// that would normally be unprofitable because they strongly encourage jump -/// threading. -Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, - bool AllowAggressive) { - AllowAggressive = false; - PHINode *PN = cast<PHINode>(I.getOperand(0)); - unsigned NumPHIValues = PN->getNumIncomingValues(); - if (NumPHIValues == 0 || - // We normally only transform phis with a single use, unless we're trying - // hard to make jump threading happen. - (!PN->hasOneUse() && !AllowAggressive)) - return 0; - - - // Check to see if all of the operands of the PHI are simple constants - // (constantint/constantfp/undef). If there is one non-constant value, - // remember the BB it is in. If there is more than one or if *it* is a PHI, - // bail out. We don't do arbitrary constant expressions here because moving - // their computation can be expensive without a cost model. - BasicBlock *NonConstBB = 0; - for (unsigned i = 0; i != NumPHIValues; ++i) - if (!isa<Constant>(PN->getIncomingValue(i)) || - isa<ConstantExpr>(PN->getIncomingValue(i))) { - if (NonConstBB) return 0; // More than one non-const value. - if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi. - NonConstBB = PN->getIncomingBlock(i); - - // If the incoming non-constant value is in I's block, we have an infinite - // loop. - if (NonConstBB == I.getParent()) - return 0; - } - - // If there is exactly one non-constant value, we can insert a copy of the - // operation in that block. However, if this is a critical edge, we would be - // inserting the computation one some other paths (e.g. inside a loop). Only - // do this if the pred block is unconditionally branching into the phi block. - if (NonConstBB != 0 && !AllowAggressive) { - BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); - if (!BI || !BI->isUnconditional()) return 0; - } - - // Okay, we can do the transformation: create the new PHI node. - PHINode *NewPN = PHINode::Create(I.getType(), ""); - NewPN->reserveOperandSpace(PN->getNumOperands()/2); - InsertNewInstBefore(NewPN, *PN); - NewPN->takeName(PN); - - // Next, add all of the operands to the PHI. - if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { - // We only currently try to fold the condition of a select when it is a phi, - // not the true/false values. - Value *TrueV = SI->getTrueValue(); - Value *FalseV = SI->getFalseValue(); - BasicBlock *PhiTransBB = PN->getParent(); - for (unsigned i = 0; i != NumPHIValues; ++i) { - BasicBlock *ThisBB = PN->getIncomingBlock(i); - Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); - Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); - Value *InV = 0; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { - InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, - FalseVInPred, - "phitmp", NonConstBB->getTerminator()); - Worklist.Add(cast<Instruction>(InV)); - } - NewPN->addIncoming(InV, ThisBB); - } - } else if (I.getNumOperands() == 2) { - Constant *C = cast<Constant>(I.getOperand(1)); - for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = 0; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { - if (CmpInst *CI = dyn_cast<CmpInst>(&I)) - InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); - else - InV = ConstantExpr::get(I.getOpcode(), InC, C); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) - InV = BinaryOperator::Create(BO->getOpcode(), - PN->getIncomingValue(i), C, "phitmp", - NonConstBB->getTerminator()); - else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) - InV = CmpInst::Create(CI->getOpcode(), - CI->getPredicate(), - PN->getIncomingValue(i), C, "phitmp", - NonConstBB->getTerminator()); - else - llvm_unreachable("Unknown binop!"); - - Worklist.Add(cast<Instruction>(InV)); - } - NewPN->addIncoming(InV, PN->getIncomingBlock(i)); - } - } else { - CastInst *CI = cast<CastInst>(&I); - const Type *RetTy = CI->getType(); - for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV; - if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { - InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); - } else { - assert(PN->getIncomingBlock(i) == NonConstBB); - InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), - I.getType(), "phitmp", - NonConstBB->getTerminator()); - Worklist.Add(cast<Instruction>(InV)); - } - NewPN->addIncoming(InV, PN->getIncomingBlock(i)); - } - } - return ReplaceInstUsesWith(I, NewPN); -} - - -/// WillNotOverflowSignedAdd - Return true if we can prove that: -/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) -/// This basically requires proving that the add in the original type would not -/// overflow to change the sign bit or have a carry out. -bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { - // There are different heuristics we can use for this. Here are some simple - // ones. - - // Add has the property that adding any two 2's complement numbers can only - // have one carry bit which can change a sign. As such, if LHS and RHS each - // have at least two sign bits, we know that the addition of the two values - // will sign extend fine. - if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) - return true; - - - // If one of the operands only has one non-zero bit, and if the other operand - // has a known-zero bit in a more significant place than it (not including the - // sign bit) the ripple may go up to and fill the zero, but won't change the - // sign. For example, (X & ~4) + 1. - - // TODO: Implement. - - return false; -} - - -Instruction *InstCombiner::visitAdd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - - if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), TD)) - return ReplaceInstUsesWith(I, V); - - - if (Constant *RHSC = dyn_cast<Constant>(RHS)) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) { - // X + (signbit) --> X ^ signbit - const APInt& Val = CI->getValue(); - uint32_t BitWidth = Val.getBitWidth(); - if (Val == APInt::getSignBit(BitWidth)) - return BinaryOperator::CreateXor(LHS, RHS); - - // See if SimplifyDemandedBits can simplify this. This handles stuff like - // (X & 254)+1 -> (X&254)|1 - if (SimplifyDemandedInstructionBits(I)) - return &I; - - // zext(bool) + C -> bool ? C + 1 : C - if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) - if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) - return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - } - - if (isa<PHINode>(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - - ConstantInt *XorRHS = 0; - Value *XorLHS = 0; - if (isa<ConstantInt>(RHSC) && - match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { - uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); - const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue(); - - uint32_t Size = TySizeBits / 2; - APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); - APInt CFF80Val(-C0080Val); - do { - if (TySizeBits > Size) { - // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. - // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. - if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || - (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { - // This is a sign extend if the top bits are known zero. - if (!MaskedValueIsZero(XorLHS, - APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) - Size = 0; // Not a sign ext, but can't be any others either. - break; - } - } - Size >>= 1; - C0080Val = APIntOps::lshr(C0080Val, Size); - CFF80Val = APIntOps::ashr(CFF80Val, Size); - } while (Size >= 1); - - // FIXME: This shouldn't be necessary. When the backends can handle types - // with funny bit widths then this switch statement should be removed. It - // is just here to get the size of the "middle" type back up to something - // that the back ends can handle. - const Type *MiddleType = 0; - switch (Size) { - default: break; - case 32: MiddleType = Type::getInt32Ty(*Context); break; - case 16: MiddleType = Type::getInt16Ty(*Context); break; - case 8: MiddleType = Type::getInt8Ty(*Context); break; - } - if (MiddleType) { - Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); - return new SExtInst(NewTrunc, I.getType(), I.getName()); - } - } - } - - if (I.getType() == Type::getInt1Ty(*Context)) - return BinaryOperator::CreateXor(LHS, RHS); - - // X + X --> X << 1 - if (I.getType()->isInteger()) { - if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) - return Result; - - if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) { - if (RHSI->getOpcode() == Instruction::Sub) - if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B - return ReplaceInstUsesWith(I, RHSI->getOperand(0)); - } - if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) { - if (LHSI->getOpcode() == Instruction::Sub) - if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B - return ReplaceInstUsesWith(I, LHSI->getOperand(0)); - } - } - - // -A + B --> B - A - // -A + -B --> -(A + B) - if (Value *LHSV = dyn_castNegVal(LHS)) { - if (LHS->getType()->isIntOrIntVector()) { - if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); - return BinaryOperator::CreateNeg(NewAdd); - } - } - - return BinaryOperator::CreateSub(RHS, LHSV); - } - - // A + -B --> A - B - if (!isa<Constant>(RHS)) - if (Value *V = dyn_castNegVal(RHS)) - return BinaryOperator::CreateSub(LHS, V); - - - ConstantInt *C2; - if (Value *X = dyn_castFoldableMul(LHS, C2)) { - if (X == RHS) // X*C + X --> X * (C+1) - return BinaryOperator::CreateMul(RHS, AddOne(C2)); - - // X*C1 + X*C2 --> X * (C1+C2) - ConstantInt *C1; - if (X == dyn_castFoldableMul(RHS, C1)) - return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); - } - - // X + X*C --> X * (C+1) - if (dyn_castFoldableMul(RHS, C2) == LHS) - return BinaryOperator::CreateMul(LHS, AddOne(C2)); - - // X + ~X --> -1 since ~X = -X-1 - if (dyn_castNotVal(LHS) == RHS || - dyn_castNotVal(RHS) == LHS) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - - // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0 - if (match(RHS, m_And(m_Value(), m_ConstantInt(C2)))) - if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2))) - return R; - - // A+B --> A|B iff A and B have no bits set in common. - if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { - APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); - APInt LHSKnownOne(IT->getBitWidth(), 0); - APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); - if (LHSKnownZero != 0) { - APInt RHSKnownOne(IT->getBitWidth(), 0); - APInt RHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); - - // No bits in common -> bitwise or. - if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) - return BinaryOperator::CreateOr(LHS, RHS); - } - } - - // W*X + Y*Z --> W * (X+Z) iff W == Y - if (I.getType()->isIntOrIntVector()) { - Value *W, *X, *Y, *Z; - if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && - match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { - if (W != Y) { - if (W == Z) { - std::swap(Y, Z); - } else if (Y == X) { - std::swap(W, X); - } else if (X == Z) { - std::swap(Y, Z); - std::swap(W, X); - } - } - - if (W == Y) { - Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); - return BinaryOperator::CreateMul(W, NewAdd); - } - } - } - - if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { - Value *X = 0; - if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X - return BinaryOperator::CreateSub(SubOne(CRHS), X); - - // (X & FF00) + xx00 -> (X+xx00) & FF00 - if (LHS->hasOneUse() && - match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { - Constant *Anded = ConstantExpr::getAnd(CRHS, C2); - if (Anded == CRHS) { - // See if all bits from the first bit set in the Add RHS up are included - // in the mask. First, get the rightmost bit. - const APInt& AddRHSV = CRHS->getValue(); - - // Form a mask of all bits from the lowest bit added through the top. - APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); - - // See if the and mask includes all of these bits. - APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); - - if (AddRHSHighBits == AddRHSHighBitsAnd) { - // Okay, the xform is safe. Insert the new add pronto. - Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); - return BinaryOperator::CreateAnd(NewAdd, C2); - } - } - } - - // Try to fold constant add into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - } - - // add (select X 0 (sub n A)) A --> select X A n - { - SelectInst *SI = dyn_cast<SelectInst>(LHS); - Value *A = RHS; - if (!SI) { - SI = dyn_cast<SelectInst>(RHS); - A = LHS; - } - if (SI && SI->hasOneUse()) { - Value *TV = SI->getTrueValue(); - Value *FV = SI->getFalseValue(); - Value *N; - - // Can we fold the add into the argument of the select? - // We check both true and false select arguments for a matching subtract. - if (match(FV, m_Zero()) && - match(TV, m_Sub(m_Value(N), m_Specific(A)))) - // Fold the add into the true select value. - return SelectInst::Create(SI->getCondition(), N, A); - if (match(TV, m_Zero()) && - match(FV, m_Sub(m_Value(N), m_Specific(A)))) - // Fold the add into the false select value. - return SelectInst::Create(SI->getCondition(), A, N); - } - } - - // Check for (add (sext x), y), see if we can merge this into an - // integer add followed by a sext. - if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { - // (add (sext x), cst) --> (sext (add x, cst')) - if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { - Constant *CI = - ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); - if (LHSConv->hasOneUse() && - ConstantExpr::getSExt(CI, I.getType()) == RHSC && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { - // Insert the new, smaller add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - CI, "addconv"); - return new SExtInst(NewAdd, I.getType()); - } - } - - // (add (sext x), (sext y)) --> (sext (add int x, y)) - if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { - // Only do this if x/y have the same type, if at last one of them has a - // single use (so we don't increase the number of sexts), and if the - // integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0))) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), "addconv"); - return new SExtInst(NewAdd, I.getType()); - } - } - } - - return Changed ? &I : 0; -} - -Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - - if (Constant *RHSC = dyn_cast<Constant>(RHS)) { - // X + 0 --> X - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { - if (CFP->isExactlyValue(ConstantFP::getNegativeZero - (I.getType())->getValueAPF())) - return ReplaceInstUsesWith(I, LHS); - } - - if (isa<PHINode>(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - // -A + B --> B - A - // -A + -B --> -(A + B) - if (Value *LHSV = dyn_castFNegVal(LHS)) - return BinaryOperator::CreateFSub(RHS, LHSV); - - // A + -B --> A - B - if (!isa<Constant>(RHS)) - if (Value *V = dyn_castFNegVal(RHS)) - return BinaryOperator::CreateFSub(LHS, V); - - // Check for X+0.0. Simplify it to X if we know X is not -0.0. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) - if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) - return ReplaceInstUsesWith(I, LHS); - - // Check for (add double (sitofp x), y), see if we can merge this into an - // integer add followed by a promotion. - if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { - // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) - // ... if the constant fits in the integer value. This is useful for things - // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer - // requires a constant pool load, and generally allows the add to be better - // instcombined. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { - Constant *CI = - ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); - if (LHSConv->hasOneUse() && - ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - CI, "addconv"); - return new SIToFPInst(NewAdd, I.getType()); - } - } - - // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) - if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { - // Only do this if x/y have the same type, if at last one of them has a - // single use (so we don't increase the number of int->fp conversions), - // and if the integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0))) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0),"addconv"); - return new SIToFPInst(NewAdd, I.getType()); - } - } - } - - return Changed ? &I : 0; -} - - -/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the -/// code necessary to compute the offset from the base pointer (without adding -/// in the base pointer). Return the result as a signed integer of intptr size. -static Value *EmitGEPOffset(User *GEP, InstCombiner &IC) { - TargetData &TD = *IC.getTargetData(); - gep_type_iterator GTI = gep_type_begin(GEP); - const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); - Value *Result = Constant::getNullValue(IntPtrTy); - - // Build a mask for high order bits. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - - for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; - ++i, ++GTI) { - Value *Op = *i; - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; - if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) { - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { - Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - - Result = IC.Builder->CreateAdd(Result, - ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".offs"); - continue; - } - - Constant *Scale = ConstantInt::get(IntPtrTy, Size); - Constant *OC = - ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale); - // Emit an add instruction. - Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); - continue; - } - // Convert to correct type. - if (Op->getType() != IntPtrTy) - Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); - if (Size != 1) { - Constant *Scale = ConstantInt::get(IntPtrTy, Size); - // We'll let instcombine(mul) convert this to a shl if possible. - Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); - } - - // Emit an add instruction. - Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); - } - return Result; -} - - -/// EvaluateGEPOffsetExpression - Return a value that can be used to compare -/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we -/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can -/// be complex, and scales are involved. The above expression would also be -/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). -/// This later form is less amenable to optimization though, and we are allowed -/// to generate the first by knowing that pointer arithmetic doesn't overflow. -/// -/// If we can't emit an optimized form for this expression, this returns null. -/// -static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, - InstCombiner &IC) { - TargetData &TD = *IC.getTargetData(); - gep_type_iterator GTI = gep_type_begin(GEP); - - // Check to see if this gep only has a single variable index. If so, and if - // any constant indices are a multiple of its scale, then we can compute this - // in terms of the scale of the variable index. For example, if the GEP - // implies an offset of "12 + i*4", then we can codegen this as "3 + i", - // because the expression will cross zero at the same point. - unsigned i, e = GEP->getNumOperands(); - int64_t Offset = 0; - for (i = 1; i != e; ++i, ++GTI) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { - // Compute the aggregate offset of constant indices. - if (CI->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); - } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += Size*CI->getSExtValue(); - } - } else { - // Found our variable index. - break; - } - } - - // If there are no variable indices, we must have a constant offset, just - // evaluate it the general way. - if (i == e) return 0; - - Value *VariableIdx = GEP->getOperand(i); - // Determine the scale factor of the variable element. For example, this is - // 4 if the variable index is into an array of i32. - uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType()); - - // Verify that there are no other variable indices. If so, emit the hard way. - for (++i, ++GTI; i != e; ++i, ++GTI) { - ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (!CI) return 0; - - // Compute the aggregate offset of constant indices. - if (CI->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue()); - } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += Size*CI->getSExtValue(); - } - } - - // Okay, we know we have a single variable index, which must be a - // pointer/array/vector index. If there is no offset, life is simple, return - // the index. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - if (Offset == 0) { - // Cast to intptrty in case a truncation occurs. If an extension is needed, - // we don't need to bother extending: the extension won't affect where the - // computation crosses zero. - if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) - VariableIdx = new TruncInst(VariableIdx, - TD.getIntPtrType(VariableIdx->getContext()), - VariableIdx->getName(), &I); - return VariableIdx; - } - - // Otherwise, there is an index. The computation we will do will be modulo - // the pointer size, so get it. - uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - - Offset &= PtrSizeMask; - VariableScale &= PtrSizeMask; - - // To do this transformation, any constant index must be a multiple of the - // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i", - // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a - // multiple of the variable scale. - int64_t NewOffs = Offset / (int64_t)VariableScale; - if (Offset != NewOffs*(int64_t)VariableScale) - return 0; - - // Okay, we can do this evaluation. Start by converting the index to intptr. - const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); - if (VariableIdx->getType() != IntPtrTy) - VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, - true /*SExt*/, - VariableIdx->getName(), &I); - Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); - return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I); -} - - -/// Optimize pointer differences into the same array into a size. Consider: -/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer -/// operands to the ptrtoint instructions for the LHS/RHS of the subtract. -/// -Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, - const Type *Ty) { - assert(TD && "Must have target data info for this"); - - // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize - // this. - bool Swapped; - GetElementPtrInst *GEP; - - if ((GEP = dyn_cast<GetElementPtrInst>(LHS)) && - GEP->getOperand(0) == RHS) - Swapped = false; - else if ((GEP = dyn_cast<GetElementPtrInst>(RHS)) && - GEP->getOperand(0) == LHS) - Swapped = true; - else - return 0; - - // TODO: Could also optimize &A[i] - &A[j] -> "i-j". - - // Emit the offset of the GEP and an intptr_t. - Value *Result = EmitGEPOffset(GEP, *this); - - // If we have p - gep(p, ...) then we have to negate the result. - if (Swapped) - Result = Builder->CreateNeg(Result, "diff.neg"); - - return Builder->CreateIntCast(Result, Ty, true); -} - - -Instruction *InstCombiner::visitSub(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Op0 == Op1) // sub X, X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. - if (Value *V = dyn_castNegVal(Op1)) { - BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); - Res->setHasNoSignedWrap(I.hasNoSignedWrap()); - Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - return Res; - } - - if (isa<UndefValue>(Op0)) - return ReplaceInstUsesWith(I, Op0); // undef - X -> undef - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); // X - undef -> undef - if (I.getType() == Type::getInt1Ty(*Context)) - return BinaryOperator::CreateXor(Op0, Op1); - - if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { - // Replace (-1 - A) with (~A). - if (C->isAllOnesValue()) - return BinaryOperator::CreateNot(Op1); - - // C - ~X == X + (1+C) - Value *X = 0; - if (match(Op1, m_Not(m_Value(X)))) - return BinaryOperator::CreateAdd(X, AddOne(C)); - - // -(X >>u 31) -> (X >>s 31) - // -(X >>s 31) -> (X >>u 31) - if (C->isZero()) { - if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) { - if (SI->getOpcode() == Instruction::LShr) { - if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert AShr. - return BinaryOperator::Create(Instruction::AShr, - SI->getOperand(0), CU, SI->getName()); - } - } - } else if (SI->getOpcode() == Instruction::AShr) { - if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) { - // Check to see if we are shifting out everything but the sign bit. - if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) == - SI->getType()->getPrimitiveSizeInBits()-1) { - // Ok, the transformation is safe. Insert LShr. - return BinaryOperator::CreateLShr( - SI->getOperand(0), CU, SI->getName()); - } - } - } - } - } - - // Try to fold constant sub into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - - // C - zext(bool) -> bool ? C - 1 : C - if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) - if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) - return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); - } - - if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { - if (Op1I->getOpcode() == Instruction::Add) { - if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(1), - I.getName()); - else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(0), - I.getName()); - else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) { - if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1))) - // C1-(X+C2) --> (C1-C2)-X - return BinaryOperator::CreateSub( - ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); - } - } - - if (Op1I->hasOneUse()) { - // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression - // is not used by anyone else... - // - if (Op1I->getOpcode() == Instruction::Sub) { - // Swap the two operands of the subexpr... - Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); - Op1I->setOperand(0, IIOp1); - Op1I->setOperand(1, IIOp0); - - // Create the new top level add instruction... - return BinaryOperator::CreateAdd(Op0, Op1); - } - - // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)... - // - if (Op1I->getOpcode() == Instruction::And && - (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { - Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); - - Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); - return BinaryOperator::CreateAnd(Op0, NewNot); - } - - // 0 - (X sdiv C) -> (X sdiv -C) - if (Op1I->getOpcode() == Instruction::SDiv) - if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) - if (CSI->isZero()) - if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1))) - return BinaryOperator::CreateSDiv(Op1I->getOperand(0), - ConstantExpr::getNeg(DivRHS)); - - // X - X*C --> X * (1-C) - ConstantInt *C2 = 0; - if (dyn_castFoldableMul(Op1I, C2) == Op0) { - Constant *CP1 = - ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), - C2); - return BinaryOperator::CreateMul(Op0, CP1); - } - } - } - - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { - if (Op0I->getOpcode() == Instruction::Add) { - if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(1)); - else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(0)); - } else if (Op0I->getOpcode() == Instruction::Sub) { - if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y - return BinaryOperator::CreateNeg(Op0I->getOperand(1), - I.getName()); - } - } - - ConstantInt *C1; - if (Value *X = dyn_castFoldableMul(Op0, C1)) { - if (X == Op1) // X*C - X --> X * (C-1) - return BinaryOperator::CreateMul(Op1, SubOne(C1)); - - ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) - if (X == dyn_castFoldableMul(Op1, C2)) - return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); - } - - // Optimize pointer differences into the same array into a size. Consider: - // &A[10] - &A[0]: we should compile this to "10". - if (TD) { - if (PtrToIntInst *LHS = dyn_cast<PtrToIntInst>(Op0)) - if (PtrToIntInst *RHS = dyn_cast<PtrToIntInst>(Op1)) - if (Value *Res = OptimizePointerDifference(LHS->getOperand(0), - RHS->getOperand(0), - I.getType())) - return ReplaceInstUsesWith(I, Res); - - // trunc(p)-trunc(q) -> trunc(p-q) - if (TruncInst *LHST = dyn_cast<TruncInst>(Op0)) - if (TruncInst *RHST = dyn_cast<TruncInst>(Op1)) - if (PtrToIntInst *LHS = dyn_cast<PtrToIntInst>(LHST->getOperand(0))) - if (PtrToIntInst *RHS = dyn_cast<PtrToIntInst>(RHST->getOperand(0))) - if (Value *Res = OptimizePointerDifference(LHS->getOperand(0), - RHS->getOperand(0), - I.getType())) - return ReplaceInstUsesWith(I, Res); - } - - return 0; -} - -Instruction *InstCombiner::visitFSub(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // If this is a 'B = x-(-A)', change to B = x+A... - if (Value *V = dyn_castFNegVal(Op1)) - return BinaryOperator::CreateFAdd(Op0, V); - - if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { - if (Op1I->getOpcode() == Instruction::FAdd) { - if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateFNeg(Op1I->getOperand(1), - I.getName()); - else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateFNeg(Op1I->getOperand(0), - I.getName()); - } - } - - return 0; -} - -/// isSignBitCheck - Given an exploded icmp instruction, return true if the -/// comparison only checks the sign bit. If it only checks the sign bit, set -/// TrueIfSigned if the result of the comparison is true when the input value is -/// signed. -static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, - bool &TrueIfSigned) { - switch (pred) { - case ICmpInst::ICMP_SLT: // True if LHS s< 0 - TrueIfSigned = true; - return RHS->isZero(); - case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 - TrueIfSigned = true; - return RHS->isAllOnesValue(); - case ICmpInst::ICMP_SGT: // True if LHS s> -1 - TrueIfSigned = false; - return RHS->isAllOnesValue(); - case ICmpInst::ICMP_UGT: - // True if LHS u> RHS and RHS == high-bit-mask - 1 - TrueIfSigned = true; - return RHS->getValue() == - APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); - case ICmpInst::ICMP_UGE: - // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) - TrueIfSigned = true; - return RHS->getValue().isSignBit(); - default: - return false; - } -} - -Instruction *InstCombiner::visitMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (isa<UndefValue>(Op1)) // undef * X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // Simplify mul instructions with a constant RHS. - if (Constant *Op1C = dyn_cast<Constant>(Op1)) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) { - - // ((X << C1)*C2) == (X * (C2 << C1)) - if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) - if (SI->getOpcode() == Instruction::Shl) - if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) - return BinaryOperator::CreateMul(SI->getOperand(0), - ConstantExpr::getShl(CI, ShOp)); - - if (CI->isZero()) - return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 - if (CI->equalsInt(1)) // X * 1 == X - return ReplaceInstUsesWith(I, Op0); - if (CI->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0, I.getName()); - - const APInt& Val = cast<ConstantInt>(CI)->getValue(); - if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C - return BinaryOperator::CreateShl(Op0, - ConstantInt::get(Op0->getType(), Val.logBase2())); - } - } else if (isa<VectorType>(Op1C->getType())) { - if (Op1C->isNullValue()) - return ReplaceInstUsesWith(I, Op1C); - - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { - if (Op1V->isAllOnesValue()) // X * -1 == 0 - X - return BinaryOperator::CreateNeg(Op0, I.getName()); - - // As above, vector X*splat(1.0) -> X in all defined cases. - if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat)) - if (CI->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); - } - } - } - - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) - if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && - isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) { - // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. - Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); - Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); - return BinaryOperator::CreateAdd(Add, C1C2); - - } - - // Try to fold constant mul into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castNegVal(Op1)) - return BinaryOperator::CreateMul(Op0v, Op1v); - - // (X / Y) * Y = X - (X % Y) - // (X / Y) * -Y = (X % Y) - X - { - Value *Op1C = Op1; - BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0); - if (!BO || - (BO->getOpcode() != Instruction::UDiv && - BO->getOpcode() != Instruction::SDiv)) { - Op1C = Op0; - BO = dyn_cast<BinaryOperator>(Op1); - } - Value *Neg = dyn_castNegVal(Op1C); - if (BO && BO->hasOneUse() && - (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && - (BO->getOpcode() == Instruction::UDiv || - BO->getOpcode() == Instruction::SDiv)) { - Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); - - // If the division is exact, X % Y is zero. - if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO)) - if (SDiv->isExact()) { - if (Op1BO == Op1C) - return ReplaceInstUsesWith(I, Op0BO); - return BinaryOperator::CreateNeg(Op0BO); - } - - Value *Rem; - if (BO->getOpcode() == Instruction::UDiv) - Rem = Builder->CreateURem(Op0BO, Op1BO); - else - Rem = Builder->CreateSRem(Op0BO, Op1BO); - Rem->takeName(BO); - - if (Op1BO == Op1C) - return BinaryOperator::CreateSub(Op0BO, Rem); - return BinaryOperator::CreateSub(Rem, Op0BO); - } - } - - /// i1 mul -> i1 and. - if (I.getType() == Type::getInt1Ty(*Context)) - return BinaryOperator::CreateAnd(Op0, Op1); - - // X*(1 << Y) --> X << Y - // (1 << Y)*X --> X << Y - { - Value *Y; - if (match(Op0, m_Shl(m_One(), m_Value(Y)))) - return BinaryOperator::CreateShl(Op1, Y); - if (match(Op1, m_Shl(m_One(), m_Value(Y)))) - return BinaryOperator::CreateShl(Op0, Y); - } - - // If one of the operands of the multiply is a cast from a boolean value, then - // we know the bool is either zero or one, so this is a 'masking' multiply. - // X * Y (where Y is 0 or 1) -> X & (0-Y) - if (!isa<VectorType>(I.getType())) { - // -2 is "-1 << 1" so it is all bits set except the low one. - APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); - - Value *BoolCast = 0, *OtherOp = 0; - if (MaskedValueIsZero(Op0, Negative2)) - BoolCast = Op0, OtherOp = Op1; - else if (MaskedValueIsZero(Op1, Negative2)) - BoolCast = Op1, OtherOp = Op0; - - if (BoolCast) { - Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), - BoolCast, "tmp"); - return BinaryOperator::CreateAnd(V, OtherOp); - } - } - - return Changed ? &I : 0; -} - -Instruction *InstCombiner::visitFMul(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // Simplify mul instructions with a constant RHS... - if (Constant *Op1C = dyn_cast<Constant>(Op1)) { - if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) { - // "In IEEE floating point, x*1 is not equivalent to x for nans. However, - // ANSI says we can drop signals, so we can do this anyway." (from GCC) - if (Op1F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' - } else if (isa<VectorType>(Op1C->getType())) { - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { - // As above, vector X*splat(1.0) -> X in all defined cases. - if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantFP *F = dyn_cast<ConstantFP>(Splat)) - if (F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); - } - } - } - - // Try to fold constant mul into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castFNegVal(Op1)) - return BinaryOperator::CreateFMul(Op0v, Op1v); - - return Changed ? &I : 0; -} - -/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select -/// instruction. -bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { - SelectInst *SI = cast<SelectInst>(I.getOperand(1)); - - // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y - int NonNullOperand = -1; - if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1))) - if (ST->isNullValue()) - NonNullOperand = 2; - // div/rem X, (Cond ? Y : 0) -> div/rem X, Y - if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2))) - if (ST->isNullValue()) - NonNullOperand = 1; - - if (NonNullOperand == -1) - return false; - - Value *SelectCond = SI->getOperand(0); - - // Change the div/rem to use 'Y' instead of the select. - I.setOperand(1, SI->getOperand(NonNullOperand)); - - // Okay, we know we replace the operand of the div/rem with 'Y' with no - // problem. However, the select, or the condition of the select may have - // multiple uses. Based on our knowledge that the operand must be non-zero, - // propagate the known value for the select into other uses of it, and - // propagate a known value of the condition into its other users. - - // If the select and condition only have a single use, don't bother with this, - // early exit. - if (SI->use_empty() && SelectCond->hasOneUse()) - return true; - - // Scan the current block backward, looking for other uses of SI. - BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); - - while (BBI != BBFront) { - --BBI; - // If we found a call to a function, we can't assume it will return, so - // information from below it cannot be propagated above it. - if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI)) - break; - - // Replace uses of the select or its condition with the known values. - for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); - I != E; ++I) { - if (*I == SI) { - *I = SI->getOperand(NonNullOperand); - Worklist.Add(BBI); - } else if (*I == SelectCond) { - *I = NonNullOperand == 1 ? ConstantInt::getTrue(*Context) : - ConstantInt::getFalse(*Context); - Worklist.Add(BBI); - } - } - - // If we past the instruction, quit looking for it. - if (&*BBI == SI) - SI = 0; - if (&*BBI == SelectCond) - SelectCond = 0; - - // If we ran out of things to eliminate, break out of the loop. - if (SelectCond == 0 && SI == 0) - break; - - } - return true; -} - - -/// This function implements the transforms on div instructions that work -/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is -/// used by the visitors to those instructions. -/// @brief Transforms common to all three div instructions -Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // undef / X -> 0 for integer. - // undef / X -> undef for FP (the undef could be a snan). - if (isa<UndefValue>(Op0)) { - if (Op0->getType()->isFPOrFPVector()) - return ReplaceInstUsesWith(I, Op0); - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - - // X / undef -> undef - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); - - return 0; -} - -/// This function implements the transforms common to both integer division -/// instructions (udiv and sdiv). It is called by the visitors to those integer -/// division instructions. -/// @brief Common integer divide transforms -Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // (sdiv X, X) --> 1 (udiv X, X) --> 1 - if (Op0 == Op1) { - if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) { - Constant *CI = ConstantInt::get(Ty->getElementType(), 1); - std::vector<Constant*> Elts(Ty->getNumElements(), CI); - return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); - } - - Constant *CI = ConstantInt::get(I.getType(), 1); - return ReplaceInstUsesWith(I, CI); - } - - if (Instruction *Common = commonDivTransforms(I)) - return Common; - - // Handle cases involving: [su]div X, (select Cond, Y, Z) - // This does not apply for fdiv. - if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) - return &I; - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // div X, 1 == X - if (RHS->equalsInt(1)) - return ReplaceInstUsesWith(I, Op0); - - // (X / C1) / C2 -> X / (C1*C2) - if (Instruction *LHS = dyn_cast<Instruction>(Op0)) - if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) - if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) { - if (MultiplyOverflows(RHS, LHSRHS, - I.getOpcode()==Instruction::SDiv)) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - else - return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), - ConstantExpr::getMul(RHS, LHSRHS)); - } - - if (!RHS->isZero()) { // avoid X udiv 0 - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - } - - // 0 / X == 0, we don't need to preserve faults! - if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0)) - if (LHS->equalsInt(0)) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // It can't be division by zero, hence it must be division by one. - if (I.getType() == Type::getInt1Ty(*Context)) - return ReplaceInstUsesWith(I, Op0); - - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { - if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue())) - // div X, 1 == X - if (X->isOne()) - return ReplaceInstUsesWith(I, Op0); - } - - return 0; -} - -Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // Handle the integer div common cases - if (Instruction *Common = commonIDivTransforms(I)) - return Common; - - if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) { - // X udiv C^2 -> X >> C - // Check to see if this is an unsigned division with an exact power of 2, - // if so, convert to a right shift. - if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 - return BinaryOperator::CreateLShr(Op0, - ConstantInt::get(Op0->getType(), C->getValue().logBase2())); - - // X udiv C, where C >= signbit - if (C->getValue().isNegative()) { - Value *IC = Builder->CreateICmpULT( Op0, C); - return SelectInst::Create(IC, Constant::getNullValue(I.getType()), - ConstantInt::get(I.getType(), 1)); - } - } - - // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) - if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) { - if (RHSI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(RHSI->getOperand(0))) { - const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue(); - if (C1.isPowerOf2()) { - Value *N = RHSI->getOperand(1); - const Type *NTy = N->getType(); - if (uint32_t C2 = C1.logBase2()) - N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp"); - return BinaryOperator::CreateLShr(Op0, N); - } - } - } - - // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) - // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) - if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { - const APInt &TVA = STO->getValue(), &FVA = SFO->getValue(); - if (TVA.isPowerOf2() && FVA.isPowerOf2()) { - // Compute the shift amounts - uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); - // Construct the "on true" case of the select - Constant *TC = ConstantInt::get(Op0->getType(), TSA); - Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t"); - - // Construct the "on false" case of the select - Constant *FC = ConstantInt::get(Op0->getType(), FSA); - Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f"); - - // construct the select instruction and return it. - return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); - } - } - return 0; -} - -Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // Handle the integer div common cases - if (Instruction *Common = commonIDivTransforms(I)) - return Common; - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // sdiv X, -1 == -X - if (RHS->isAllOnesValue()) - return BinaryOperator::CreateNeg(Op0); - - // sdiv X, C --> ashr X, log2(C) - if (cast<SDivOperator>(&I)->isExact() && - RHS->getValue().isNonNegative() && - RHS->getValue().isPowerOf2()) { - Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), - RHS->getValue().exactLogBase2()); - return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); - } - - // -X/C --> X/-C provided the negation doesn't overflow. - if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) - if (isa<Constant>(Sub->getOperand(0)) && - cast<Constant>(Sub->getOperand(0))->isNullValue() && - Sub->hasNoSignedWrap()) - return BinaryOperator::CreateSDiv(Sub->getOperand(1), - ConstantExpr::getNeg(RHS)); - } - - // If the sign bits of both operands are zero (i.e. we can prove they are - // unsigned inputs), turn this into a udiv. - if (I.getType()->isInteger()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op0, Mask)) { - if (MaskedValueIsZero(Op1, Mask)) { - // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); - } - ConstantInt *ShiftedInt; - if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && - ShiftedInt->getValue().isPowerOf2()) { - // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) - // Safe because the only negative value (1 << Y) can take on is - // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have - // the sign bit set. - return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); - } - } - } - - return 0; -} - -Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { - return commonDivTransforms(I); -} - -/// This function implements the transforms on rem instructions that work -/// regardless of the kind of rem instruction it is (urem, srem, or frem). It -/// is used by the visitors to those instructions. -/// @brief Transforms common to all three rem instructions -Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (isa<UndefValue>(Op0)) { // undef % X -> 0 - if (I.getType()->isFPOrFPVector()) - return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); // X % undef -> undef - - // Handle cases involving: rem X, (select Cond, Y, Z) - if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) - return &I; - - return 0; -} - -/// This function implements the transforms common to both integer remainder -/// instructions (urem and srem). It is called by the visitors to those integer -/// remainder instructions. -/// @brief Common integer remainder transforms -Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Instruction *common = commonRemTransforms(I)) - return common; - - // 0 % X == 0 for integer, we don't need to preserve faults! - if (Constant *LHS = dyn_cast<Constant>(Op0)) - if (LHS->isNullValue()) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // X % 0 == undef, we don't need to preserve faults! - if (RHS->equalsInt(0)) - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - - if (RHS->equalsInt(1)) // X % 1 == 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { - if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - } else if (isa<PHINode>(Op0I)) { - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - // See if we can fold away this rem instruction. - if (SimplifyDemandedInstructionBits(I)) - return &I; - } - } - - return 0; -} - -Instruction *InstCombiner::visitURem(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Instruction *common = commonIRemTransforms(I)) - return common; - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // X urem C^2 -> X and C - // Check to see if this is an unsigned remainder with an exact power of 2, - // if so, convert to a bitwise and. - if (ConstantInt *C = dyn_cast<ConstantInt>(RHS)) - if (C->getValue().isPowerOf2()) - return BinaryOperator::CreateAnd(Op0, SubOne(C)); - } - - if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) { - // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) - if (RHSI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(RHSI->getOperand(0))) { - if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) { - Constant *N1 = Constant::getAllOnesValue(I.getType()); - Value *Add = Builder->CreateAdd(RHSI, N1, "tmp"); - return BinaryOperator::CreateAnd(Op0, Add); - } - } - } - - // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2) - // where C1&C2 are powers of two. - if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) { - if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1))) - if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) { - // STO == 0 and SFO == 0 handled above. - if ((STO->getValue().isPowerOf2()) && - (SFO->getValue().isPowerOf2())) { - Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), - SI->getName()+".t"); - Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), - SI->getName()+".f"); - return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); - } - } - } - - return 0; -} - -Instruction *InstCombiner::visitSRem(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // Handle the integer rem common cases - if (Instruction *Common = commonIRemTransforms(I)) - return Common; - - if (Value *RHSNeg = dyn_castNegVal(Op1)) - if (!isa<Constant>(RHSNeg) || - (isa<ConstantInt>(RHSNeg) && - cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) { - // X % -Y -> X % Y - Worklist.AddValue(I.getOperand(1)); - I.setOperand(1, RHSNeg); - return &I; - } - - // If the sign bits of both operands are zero (i.e. we can prove they are - // unsigned inputs), turn this into a urem. - if (I.getType()->isInteger()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { - // X srem Y -> X urem Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateURem(Op0, Op1, I.getName()); - } - } - - // If it's a constant vector, flip any negative values positive. - if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) { - unsigned VWidth = RHSV->getNumOperands(); - - bool hasNegative = false; - for (unsigned i = 0; !hasNegative && i != VWidth; ++i) - if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) - if (RHS->getValue().isNegative()) - hasNegative = true; - - if (hasNegative) { - std::vector<Constant *> Elts(VWidth); - for (unsigned i = 0; i != VWidth; ++i) { - if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { - if (RHS->getValue().isNegative()) - Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); - else - Elts[i] = RHS; - } - } - - Constant *NewRHSV = ConstantVector::get(Elts); - if (NewRHSV != RHSV) { - Worklist.AddValue(I.getOperand(1)); - I.setOperand(1, NewRHSV); - return &I; - } - } - } - - return 0; -} - -Instruction *InstCombiner::visitFRem(BinaryOperator &I) { - return commonRemTransforms(I); -} - -// isOneBitSet - Return true if there is exactly one bit set in the specified -// constant. -static bool isOneBitSet(const ConstantInt *CI) { - return CI->getValue().isPowerOf2(); -} - -// isHighOnes - Return true if the constant is of the form 1+0+. -// This is the same as lowones(~X). -static bool isHighOnes(const ConstantInt *CI) { - return (~CI->getValue() + 1).isPowerOf2(); -} - -/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits -/// are carefully arranged to allow folding of expressions such as: -/// -/// (A < B) | (A > B) --> (A != B) -/// -/// Note that this is only valid if the first and second predicates have the -/// same sign. Is illegal to do: (A u< B) | (A s> B) -/// -/// Three bits are used to represent the condition, as follows: -/// 0 A > B -/// 1 A == B -/// 2 A < B -/// -/// <=> Value Definition -/// 000 0 Always false -/// 001 1 A > B -/// 010 2 A == B -/// 011 3 A >= B -/// 100 4 A < B -/// 101 5 A != B -/// 110 6 A <= B -/// 111 7 Always true -/// -static unsigned getICmpCode(const ICmpInst *ICI) { - switch (ICI->getPredicate()) { - // False -> 0 - case ICmpInst::ICMP_UGT: return 1; // 001 - case ICmpInst::ICMP_SGT: return 1; // 001 - case ICmpInst::ICMP_EQ: return 2; // 010 - case ICmpInst::ICMP_UGE: return 3; // 011 - case ICmpInst::ICMP_SGE: return 3; // 011 - case ICmpInst::ICMP_ULT: return 4; // 100 - case ICmpInst::ICMP_SLT: return 4; // 100 - case ICmpInst::ICMP_NE: return 5; // 101 - case ICmpInst::ICMP_ULE: return 6; // 110 - case ICmpInst::ICMP_SLE: return 6; // 110 - // True -> 7 - default: - llvm_unreachable("Invalid ICmp predicate!"); - return 0; - } -} - -/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp -/// predicate into a three bit mask. It also returns whether it is an ordered -/// predicate by reference. -static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { - isOrdered = false; - switch (CC) { - case FCmpInst::FCMP_ORD: isOrdered = true; return 0; // 000 - case FCmpInst::FCMP_UNO: return 0; // 000 - case FCmpInst::FCMP_OGT: isOrdered = true; return 1; // 001 - case FCmpInst::FCMP_UGT: return 1; // 001 - case FCmpInst::FCMP_OEQ: isOrdered = true; return 2; // 010 - case FCmpInst::FCMP_UEQ: return 2; // 010 - case FCmpInst::FCMP_OGE: isOrdered = true; return 3; // 011 - case FCmpInst::FCMP_UGE: return 3; // 011 - case FCmpInst::FCMP_OLT: isOrdered = true; return 4; // 100 - case FCmpInst::FCMP_ULT: return 4; // 100 - case FCmpInst::FCMP_ONE: isOrdered = true; return 5; // 101 - case FCmpInst::FCMP_UNE: return 5; // 101 - case FCmpInst::FCMP_OLE: isOrdered = true; return 6; // 110 - case FCmpInst::FCMP_ULE: return 6; // 110 - // True -> 7 - default: - // Not expecting FCMP_FALSE and FCMP_TRUE; - llvm_unreachable("Unexpected FCmp predicate!"); - return 0; - } -} - -/// getICmpValue - This is the complement of getICmpCode, which turns an -/// opcode and two operands into either a constant true or false, or a brand -/// new ICmp instruction. The sign is passed in to determine which kind -/// of predicate to use in the new icmp instruction. -static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS, - LLVMContext *Context) { - switch (code) { - default: llvm_unreachable("Illegal ICmp code!"); - case 0: return ConstantInt::getFalse(*Context); - case 1: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); - case 2: return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); - case 3: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); - case 4: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); - case 5: return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); - case 6: - if (sign) - return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); - else - return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); - case 7: return ConstantInt::getTrue(*Context); - } -} - -/// getFCmpValue - This is the complement of getFCmpCode, which turns an -/// opcode and two operands into either a FCmp instruction. isordered is passed -/// in to determine which kind of predicate to use in the new fcmp instruction. -static Value *getFCmpValue(bool isordered, unsigned code, - Value *LHS, Value *RHS, LLVMContext *Context) { - switch (code) { - default: llvm_unreachable("Illegal FCmp code!"); - case 0: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); - case 1: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); - case 2: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); - case 3: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); - case 4: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); - case 5: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); - case 6: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); - case 7: return ConstantInt::getTrue(*Context); - } -} - -/// PredicatesFoldable - Return true if both predicates match sign or if at -/// least one of them is an equality comparison (which is signless). -static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { - return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || - (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || - (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); -} - -namespace { -// FoldICmpLogical - Implements (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) -struct FoldICmpLogical { - InstCombiner &IC; - Value *LHS, *RHS; - ICmpInst::Predicate pred; - FoldICmpLogical(InstCombiner &ic, ICmpInst *ICI) - : IC(ic), LHS(ICI->getOperand(0)), RHS(ICI->getOperand(1)), - pred(ICI->getPredicate()) {} - bool shouldApply(Value *V) const { - if (ICmpInst *ICI = dyn_cast<ICmpInst>(V)) - if (PredicatesFoldable(pred, ICI->getPredicate())) - return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) || - (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS)); - return false; - } - Instruction *apply(Instruction &Log) const { - ICmpInst *ICI = cast<ICmpInst>(Log.getOperand(0)); - if (ICI->getOperand(0) != LHS) { - assert(ICI->getOperand(1) == LHS); - ICI->swapOperands(); // Swap the LHS and RHS of the ICmp - } - - ICmpInst *RHSICI = cast<ICmpInst>(Log.getOperand(1)); - unsigned LHSCode = getICmpCode(ICI); - unsigned RHSCode = getICmpCode(RHSICI); - unsigned Code; - switch (Log.getOpcode()) { - case Instruction::And: Code = LHSCode & RHSCode; break; - case Instruction::Or: Code = LHSCode | RHSCode; break; - case Instruction::Xor: Code = LHSCode ^ RHSCode; break; - default: llvm_unreachable("Illegal logical opcode!"); return 0; - } - - bool isSigned = RHSICI->isSigned() || ICI->isSigned(); - Value *RV = getICmpValue(isSigned, Code, LHS, RHS, IC.getContext()); - if (Instruction *I = dyn_cast<Instruction>(RV)) - return I; - // Otherwise, it's a constant boolean value... - return IC.ReplaceInstUsesWith(Log, RV); - } -}; -} // end anonymous namespace - -// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where -// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is -// guaranteed to be a binary operator. -Instruction *InstCombiner::OptAndOp(Instruction *Op, - ConstantInt *OpRHS, - ConstantInt *AndRHS, - BinaryOperator &TheAnd) { - Value *X = Op->getOperand(0); - Constant *Together = 0; - if (!Op->isShift()) - Together = ConstantExpr::getAnd(AndRHS, OpRHS); - - switch (Op->getOpcode()) { - case Instruction::Xor: - if (Op->hasOneUse()) { - // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) - Value *And = Builder->CreateAnd(X, AndRHS); - And->takeName(Op); - return BinaryOperator::CreateXor(And, Together); - } - break; - case Instruction::Or: - if (Together == AndRHS) // (X | C) & C --> C - return ReplaceInstUsesWith(TheAnd, AndRHS); - - if (Op->hasOneUse() && Together != OpRHS) { - // (X | C1) & C2 --> (X | (C1&C2)) & C2 - Value *Or = Builder->CreateOr(X, Together); - Or->takeName(Op); - return BinaryOperator::CreateAnd(Or, AndRHS); - } - break; - case Instruction::Add: - if (Op->hasOneUse()) { - // Adding a one to a single bit bit-field should be turned into an XOR - // of the bit. First thing to check is to see if this AND is with a - // single bit constant. - const APInt& AndRHSV = cast<ConstantInt>(AndRHS)->getValue(); - - // If there is only one bit set... - if (isOneBitSet(cast<ConstantInt>(AndRHS))) { - // Ok, at this point, we know that we are masking the result of the - // ADD down to exactly one bit. If the constant we are adding has - // no bits set below this bit, then we can eliminate the ADD. - const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue(); - - // Check to see if any bits below the one bit set in AndRHSV are set. - if ((AddRHS & (AndRHSV-1)) == 0) { - // If not, the only thing that can effect the output of the AND is - // the bit specified by AndRHSV. If that bit is set, the effect of - // the XOR is to toggle the bit. If it is clear, then the ADD has - // no effect. - if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop - TheAnd.setOperand(0, X); - return &TheAnd; - } else { - // Pull the XOR out of the AND. - Value *NewAnd = Builder->CreateAnd(X, AndRHS); - NewAnd->takeName(Op); - return BinaryOperator::CreateXor(NewAnd, AndRHS); - } - } - } - } - break; - - case Instruction::Shl: { - // We know that the AND will not produce any of the bits shifted in, so if - // the anded constant includes them, clear them now! - // - uint32_t BitWidth = AndRHS->getType()->getBitWidth(); - uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); - APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); - ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShlMask); - - if (CI->getValue() == ShlMask) { - // Masking out bits that the shift already masks - return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. - } else if (CI != AndRHS) { // Reducing bits set in and. - TheAnd.setOperand(1, CI); - return &TheAnd; - } - break; - } - case Instruction::LShr: - { - // We know that the AND will not produce any of the bits shifted in, so if - // the anded constant includes them, clear them now! This only applies to - // unsigned shifts, because a signed shr may bring in set bits! - // - uint32_t BitWidth = AndRHS->getType()->getBitWidth(); - uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); - APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask); - - if (CI->getValue() == ShrMask) { - // Masking out bits that the shift already masks. - return ReplaceInstUsesWith(TheAnd, Op); - } else if (CI != AndRHS) { - TheAnd.setOperand(1, CI); // Reduce bits set in and cst. - return &TheAnd; - } - break; - } - case Instruction::AShr: - // Signed shr. - // See if this is shifting in some sign extension, then masking it out - // with an and. - if (Op->hasOneUse()) { - uint32_t BitWidth = AndRHS->getType()->getBitWidth(); - uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); - APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - Constant *C = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask); - if (C == AndRHS) { // Masking out bits shifted in. - // (Val ashr C1) & C2 -> (Val lshr C1) & C2 - // Make the argument unsigned. - Value *ShVal = Op->getOperand(0); - ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); - return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); - } - } - break; - } - return 0; -} - - -/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is -/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient -/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates -/// whether to treat the V, Lo and HI as signed or not. IB is the location to -/// insert new instructions. -Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, - bool isSigned, bool Inside, - Instruction &IB) { - assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? - ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && - "Lo is not <= Hi in range emission code!"); - - if (Inside) { - if (Lo == Hi) // Trivially false. - return new ICmpInst(ICmpInst::ICMP_NE, V, V); - - // V >= Min && V < Hi --> V < Hi - if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { - ICmpInst::Predicate pred = (isSigned ? - ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); - return new ICmpInst(pred, V, Hi); - } - - // Emit V-Lo <u Hi-Lo - Constant *NegLo = ConstantExpr::getNeg(Lo); - Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); - Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); - return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); - } - - if (Lo == Hi) // Trivially true. - return new ICmpInst(ICmpInst::ICMP_EQ, V, V); - - // V < Min || V >= Hi -> V > Hi-1 - Hi = SubOne(cast<ConstantInt>(Hi)); - if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { - ICmpInst::Predicate pred = (isSigned ? - ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); - return new ICmpInst(pred, V, Hi); - } - - // Emit V-Lo >u Hi-1-Lo - // Note that Hi has already had one subtracted from it, above. - ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); - Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); - Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); - return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); -} - -// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with -// any number of 0s on either side. The 1s are allowed to wrap from LSB to -// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is -// not, since all 1s are not contiguous. -static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { - const APInt& V = Val->getValue(); - uint32_t BitWidth = Val->getType()->getBitWidth(); - if (!APIntOps::isShiftedMask(BitWidth, V)) return false; - - // look for the first zero bit after the run of ones - MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); - // look for the first non-zero bit - ME = V.getActiveBits(); - return true; -} - -/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, -/// where isSub determines whether the operator is a sub. If we can fold one of -/// the following xforms: -/// -/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask -/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 -/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 -/// -/// return (A +/- B). -/// -Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, - ConstantInt *Mask, bool isSub, - Instruction &I) { - Instruction *LHSI = dyn_cast<Instruction>(LHS); - if (!LHSI || LHSI->getNumOperands() != 2 || - !isa<ConstantInt>(LHSI->getOperand(1))) return 0; - - ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1)); - - switch (LHSI->getOpcode()) { - default: return 0; - case Instruction::And: - if (ConstantExpr::getAnd(N, Mask) == Mask) { - // If the AndRHS is a power of two minus one (0+1+), this is simple. - if ((Mask->getValue().countLeadingZeros() + - Mask->getValue().countPopulation()) == - Mask->getValue().getBitWidth()) - break; - - // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+ - // part, we don't need any explicit masks to take them out of A. If that - // is all N is, ignore it. - uint32_t MB = 0, ME = 0; - if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive - uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth(); - APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1)); - if (MaskedValueIsZero(RHS, Mask)) - break; - } - } - return 0; - case Instruction::Or: - case Instruction::Xor: - // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 - if ((Mask->getValue().countLeadingZeros() + - Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() - && ConstantExpr::getAnd(N, Mask)->isNullValue()) - break; - return 0; - } - - if (isSub) - return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); - return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); -} - -/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. -Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, - ICmpInst *LHS, ICmpInst *RHS) { - // (icmp eq A, null) & (icmp eq B, null) --> - // (icmp eq (ptrtoint(A)|ptrtoint(B)), 0) - if (TD && - LHS->getPredicate() == ICmpInst::ICMP_EQ && - RHS->getPredicate() == ICmpInst::ICMP_EQ && - isa<ConstantPointerNull>(LHS->getOperand(1)) && - isa<ConstantPointerNull>(RHS->getOperand(1))) { - const Type *IntPtrTy = TD->getIntPtrType(I.getContext()); - Value *A = Builder->CreatePtrToInt(LHS->getOperand(0), IntPtrTy); - Value *B = Builder->CreatePtrToInt(RHS->getOperand(0), IntPtrTy); - Value *NewOr = Builder->CreateOr(A, B); - return new ICmpInst(ICmpInst::ICMP_EQ, NewOr, - Constant::getNullValue(IntPtrTy)); - } - - Value *Val, *Val2; - ConstantInt *LHSCst, *RHSCst; - ICmpInst::Predicate LHSCC, RHSCC; - - // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). - if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), - m_ConstantInt(LHSCst))) || - !match(RHS, m_ICmp(RHSCC, m_Value(Val2), - m_ConstantInt(RHSCst)))) - return 0; - - if (LHSCst == RHSCst && LHSCC == RHSCC) { - // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) - // where C is a power of 2 - if (LHSCC == ICmpInst::ICMP_ULT && - LHSCst->getValue().isPowerOf2()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); - } - - // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) - if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); - } - } - - // From here on, we only handle: - // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; - - // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. - if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || - RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || - LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || - RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; - - // We can't fold (ugt x, C) & (sgt x, C2). - if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; - - // Ensure that the larger constant is on the RHS. - bool ShouldSwap; - if (CmpInst::isSigned(LHSCC) || - (ICmpInst::isEquality(LHSCC) && - CmpInst::isSigned(RHSCC))) - ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); - else - ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); - - if (ShouldSwap) { - std::swap(LHS, RHS); - std::swap(LHSCst, RHSCst); - std::swap(LHSCC, RHSCC); - } - - // At this point, we know we have have two icmp instructions - // comparing a value against two constants and and'ing the result - // together. Because of the above check, we know that we only have - // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know - // (from the FoldICmpLogical check above), that the two constants - // are not equal and that the larger constant is on the RHS - assert(LHSCst != RHSCst && "Compares not folded above?"); - - switch (LHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false - case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 - case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 - case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 - return ReplaceInstUsesWith(I, LHS); - } - case ICmpInst::ICMP_NE: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_ULT: - if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 - return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); - break; // (X != 13 & X u< 15) -> no change - case ICmpInst::ICMP_SLT: - if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 - return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); - break; // (X != 13 & X s< 15) -> no change - case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 - case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_NE: - if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 - Constant *AddCST = ConstantExpr::getNeg(LHSCst); - Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); - return new ICmpInst(ICmpInst::ICMP_UGT, Add, - ConstantInt::get(Add->getType(), 1)); - } - break; // (X != 13 & X != 15) -> no change - } - break; - case ICmpInst::ICMP_ULT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 - case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_SLT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false - case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 - case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_UGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: - if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 - return new ICmpInst(LHSCC, Val, RHSCst); - break; // (X u> 13 & X != 15) -> no change - case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1 - return InsertRangeTest(Val, AddOne(LHSCst), - RHSCst, false, true, I); - case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_SGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: - if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 - return new ICmpInst(LHSCC, Val, RHSCst); - break; // (X s> 13 & X != 15) -> no change - case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 - return InsertRangeTest(Val, AddOne(LHSCst), - RHSCst, true, true, I); - case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change - break; - } - break; - } - - return 0; -} - -Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, - FCmpInst *RHS) { - - if (LHS->getPredicate() == FCmpInst::FCMP_ORD && - RHS->getPredicate() == FCmpInst::FCMP_ORD) { - // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) - if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) - if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { - // If either of the constants are nans, then the whole thing returns - // false. - if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - return new FCmpInst(FCmpInst::FCMP_ORD, - LHS->getOperand(0), RHS->getOperand(0)); - } - - // Handle vector zeros. This occurs because the canonical form of - // "fcmp ord x,x" is "fcmp ord x, 0". - if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && - isa<ConstantAggregateZero>(RHS->getOperand(1))) - return new FCmpInst(FCmpInst::FCMP_ORD, - LHS->getOperand(0), RHS->getOperand(0)); - return 0; - } - - Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); - Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); - FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); - - - if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { - // Swap RHS operands to match LHS. - Op1CC = FCmpInst::getSwappedPredicate(Op1CC); - std::swap(Op1LHS, Op1RHS); - } - - if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { - // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). - if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); - - if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - if (Op0CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, RHS); - if (Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, LHS); - - bool Op0Ordered; - bool Op1Ordered; - unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); - unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); - if (Op1Pred == 0) { - std::swap(LHS, RHS); - std::swap(Op0Pred, Op1Pred); - std::swap(Op0Ordered, Op1Ordered); - } - if (Op0Pred == 0) { - // uno && ueq -> uno && (uno || eq) -> ueq - // ord && olt -> ord && (ord && lt) -> olt - if (Op0Ordered == Op1Ordered) - return ReplaceInstUsesWith(I, RHS); - - // uno && oeq -> uno && (ord && eq) -> false - // uno && ord -> false - if (!Op0Ordered) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - // ord && ueq -> ord && (uno || eq) -> oeq - return cast<Instruction>(getFCmpValue(true, Op1Pred, - Op0LHS, Op0RHS, Context)); - } - } - - return 0; -} - - -Instruction *InstCombiner::visitAnd(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyAndInst(Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(I)) - return &I; - - - if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { - const APInt &AndRHSMask = AndRHS->getValue(); - APInt NotAndRHS(~AndRHSMask); - - // Optimize a variety of ((val OP C1) & C2) combinations... - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { - Value *Op0LHS = Op0I->getOperand(0); - Value *Op0RHS = Op0I->getOperand(1); - switch (Op0I->getOpcode()) { - default: break; - case Instruction::Xor: - case Instruction::Or: - // If the mask is only needed on one incoming arm, push it up. - if (!Op0I->hasOneUse()) break; - - if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { - // Not masking anything out for the LHS, move to RHS. - Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, - Op0RHS->getName()+".masked"); - return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); - } - if (!isa<Constant>(Op0RHS) && - MaskedValueIsZero(Op0RHS, NotAndRHS)) { - // Not masking anything out for the RHS, move to LHS. - Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, - Op0LHS->getName()+".masked"); - return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); - } - - break; - case Instruction::Add: - // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS. - // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 - // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0 - if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I)) - return BinaryOperator::CreateAnd(V, AndRHS); - if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I)) - return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes - break; - - case Instruction::Sub: - // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS. - // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 - // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0 - if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) - return BinaryOperator::CreateAnd(V, AndRHS); - - // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS - // has 1's for all bits that the subtraction with A might affect. - if (Op0I->hasOneUse()) { - uint32_t BitWidth = AndRHSMask.getBitWidth(); - uint32_t Zeros = AndRHSMask.countLeadingZeros(); - APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros); - - ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS); - if (!(A && A->isZero()) && // avoid infinite recursion. - MaskedValueIsZero(Op0LHS, Mask)) { - Value *NewNeg = Builder->CreateNeg(Op0RHS); - return BinaryOperator::CreateAnd(NewNeg, AndRHS); - } - } - break; - - case Instruction::Shl: - case Instruction::LShr: - // (1 << x) & 1 --> zext(x == 0) - // (1 >> x) & 1 --> zext(x == 0) - if (AndRHSMask == 1 && Op0LHS == AndRHS) { - Value *NewICmp = - Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); - return new ZExtInst(NewICmp, I.getType()); - } - break; - } - - if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) - if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) - return Res; - } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) { - // If this is an integer truncation or change from signed-to-unsigned, and - // if the source is an and/or with immediate, transform it. This - // frequently occurs for bitfield accesses. - if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) { - if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) && - CastOp->getNumOperands() == 2) - if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){ - if (CastOp->getOpcode() == Instruction::And) { - // Change: and (cast (and X, C1) to T), C2 - // into : and (cast X to T), trunc_or_bitcast(C1)&C2 - // This will fold the two constants together, which may allow - // other simplifications. - Value *NewCast = Builder->CreateTruncOrBitCast( - CastOp->getOperand(0), I.getType(), - CastOp->getName()+".shrunk"); - // trunc_or_bitcast(C1)&C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - C3 = ConstantExpr::getAnd(C3, AndRHS); - return BinaryOperator::CreateAnd(NewCast, C3); - } else if (CastOp->getOpcode() == Instruction::Or) { - // Change: and (cast (or X, C1) to T), C2 - // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 - Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); - if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) - // trunc(C1)&C2 - return ReplaceInstUsesWith(I, AndRHS); - } - } - } - } - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - - // (~A & ~B) == (~(A | B)) - De Morgan's Law - if (Value *Op0NotVal = dyn_castNotVal(Op0)) - if (Value *Op1NotVal = dyn_castNotVal(Op1)) - if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - return BinaryOperator::CreateNot(Or); - } - - { - Value *A = 0, *B = 0, *C = 0, *D = 0; - // (A|B) & ~(A&B) -> A^B - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - - // ~(A&B) & (A|B) -> A^B - if (match(Op1, m_Or(m_Value(A), m_Value(B))) && - match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - - if (Op0->hasOneUse() && - match(Op0, m_Xor(m_Value(A), m_Value(B)))) { - if (A == Op1) { // (A^B)&A -> A&(A^B) - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); - } else if (B == Op1) { // (A^B)&B -> B&(B^A) - cast<BinaryOperator>(Op0)->swapOperands(); - I.swapOperands(); // Simplify below - std::swap(Op0, Op1); - } - } - - if (Op1->hasOneUse() && - match(Op1, m_Xor(m_Value(A), m_Value(B)))) { - if (B == Op0) { // B&(A^B) -> B&(B^A) - cast<BinaryOperator>(Op1)->swapOperands(); - std::swap(A, B); - } - if (A == Op0) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); - } - - // (A&((~A)|B)) -> A&B - if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || - match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) - return BinaryOperator::CreateAnd(A, Op1); - if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || - match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) - return BinaryOperator::CreateAnd(A, Op0); - } - - if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) { - // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) - return R; - - if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) - if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) - return Res; - } - - // fold (and (cast A), (cast B)) -> (cast (and A, B)) - if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) - if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && - // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType(), TD) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType(), TD)) { - Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - } - - // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. - if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { - if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) - if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && - SI0->getOperand(1) == SI1->getOperand(1) && - (SI0->hasOneUse() || SI1->hasOneUse())) { - Value *NewOp = - Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), - SI0->getName()); - return BinaryOperator::Create(SI1->getOpcode(), NewOp, - SI1->getOperand(1)); - } - } - - // If and'ing two fcmp, try combine them into one. - if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) - return Res; - } - - return Changed ? &I : 0; -} - -/// CollectBSwapParts - Analyze the specified subexpression and see if it is -/// capable of providing pieces of a bswap. The subexpression provides pieces -/// of a bswap if it is proven that each of the non-zero bytes in the output of -/// the expression came from the corresponding "byte swapped" byte in some other -/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then -/// we know that the expression deposits the low byte of %X into the high byte -/// of the bswap result and that all other bytes are zero. This expression is -/// accepted, the high byte of ByteValues is set to X to indicate a correct -/// match. -/// -/// This function returns true if the match was unsuccessful and false if so. -/// On entry to the function the "OverallLeftShift" is a signed integer value -/// indicating the number of bytes that the subexpression is later shifted. For -/// example, if the expression is later right shifted by 16 bits, the -/// OverallLeftShift value would be -2 on entry. This is used to specify which -/// byte of ByteValues is actually being set. -/// -/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding -/// byte is masked to zero by a user. For example, in (X & 255), X will be -/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits -/// this function to working on up to 32-byte (256 bit) values. ByteMask is -/// always in the local (OverallLeftShift) coordinate space. -/// -static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, - SmallVector<Value*, 8> &ByteValues) { - if (Instruction *I = dyn_cast<Instruction>(V)) { - // If this is an or instruction, it may be an inner node of the bswap. - if (I->getOpcode() == Instruction::Or) { - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues) || - CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, - ByteValues); - } - - // If this is a logical shift by a constant multiple of 8, recurse with - // OverallLeftShift and ByteMask adjusted. - if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { - unsigned ShAmt = - cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); - // Ensure the shift amount is defined and of a byte value. - if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) - return true; - - unsigned ByteShift = ShAmt >> 3; - if (I->getOpcode() == Instruction::Shl) { - // X << 2 -> collect(X, +2) - OverallLeftShift += ByteShift; - ByteMask >>= ByteShift; - } else { - // X >>u 2 -> collect(X, -2) - OverallLeftShift -= ByteShift; - ByteMask <<= ByteShift; - ByteMask &= (~0U >> (32-ByteValues.size())); - } - - if (OverallLeftShift >= (int)ByteValues.size()) return true; - if (OverallLeftShift <= -(int)ByteValues.size()) return true; - - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues); - } - - // If this is a logical 'and' with a mask that clears bytes, clear the - // corresponding bytes in ByteMask. - if (I->getOpcode() == Instruction::And && - isa<ConstantInt>(I->getOperand(1))) { - // Scan every byte of the and mask, seeing if the byte is either 0 or 255. - unsigned NumBytes = ByteValues.size(); - APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); - const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); - - for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { - // If this byte is masked out by a later operation, we don't care what - // the and mask is. - if ((ByteMask & (1 << i)) == 0) - continue; - - // If the AndMask is all zeros for this byte, clear the bit. - APInt MaskB = AndMask & Byte; - if (MaskB == 0) { - ByteMask &= ~(1U << i); - continue; - } - - // If the AndMask is not all ones for this byte, it's not a bytezap. - if (MaskB != Byte) - return true; - - // Otherwise, this byte is kept. - } - - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues); - } - } - - // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be - // the input value to the bswap. Some observations: 1) if more than one byte - // is demanded from this input, then it could not be successfully assembled - // into a byteswap. At least one of the two bytes would not be aligned with - // their ultimate destination. - if (!isPowerOf2_32(ByteMask)) return true; - unsigned InputByteNo = CountTrailingZeros_32(ByteMask); - - // 2) The input and ultimate destinations must line up: if byte 3 of an i32 - // is demanded, it needs to go into byte 0 of the result. This means that the - // byte needs to be shifted until it lands in the right byte bucket. The - // shift amount depends on the position: if the byte is coming from the high - // part of the value (e.g. byte 3) then it must be shifted right. If from the - // low part, it must be shifted left. - unsigned DestByteNo = InputByteNo + OverallLeftShift; - if (InputByteNo < ByteValues.size()/2) { - if (ByteValues.size()-1-DestByteNo != InputByteNo) - return true; - } else { - if (ByteValues.size()-1-DestByteNo != InputByteNo) - return true; - } - - // If the destination byte value is already defined, the values are or'd - // together, which isn't a bswap (unless it's an or of the same bits). - if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) - return true; - ByteValues[DestByteNo] = V; - return false; -} - -/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. -/// If so, insert the new bswap intrinsic and return it. -Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { - const IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); - if (!ITy || ITy->getBitWidth() % 16 || - // ByteMask only allows up to 32-byte values. - ITy->getBitWidth() > 32*8) - return 0; // Can only bswap pairs of bytes. Can't do vectors. - - /// ByteValues - For each byte of the result, we keep track of which value - /// defines each byte. - SmallVector<Value*, 8> ByteValues; - ByteValues.resize(ITy->getBitWidth()/8); - - // Try to find all the pieces corresponding to the bswap. - uint32_t ByteMask = ~0U >> (32-ByteValues.size()); - if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) - return 0; - - // Check to see if all of the bytes come from the same value. - Value *V = ByteValues[0]; - if (V == 0) return 0; // Didn't find a byte? Must be zero. - - // Check to make sure that all of the bytes come from the same value. - for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) - if (ByteValues[i] != V) - return 0; - const Type *Tys[] = { ITy }; - Module *M = I.getParent()->getParent()->getParent(); - Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); - return CallInst::Create(F, V); -} - -/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check -/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then -/// we can simplify this expression to "cond ? C : D or B". -static Instruction *MatchSelectFromAndOr(Value *A, Value *B, - Value *C, Value *D, - LLVMContext *Context) { - // If A is not a select of -1/0, this cannot match. - Value *Cond = 0; - if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) - return 0; - - // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. - if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) - return SelectInst::Create(Cond, C, B); - if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) - return SelectInst::Create(Cond, C, B); - // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. - if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) - return SelectInst::Create(Cond, C, D); - if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) - return SelectInst::Create(Cond, C, D); - return 0; -} - -/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. -Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, - ICmpInst *LHS, ICmpInst *RHS) { - // (icmp ne A, null) | (icmp ne B, null) --> - // (icmp ne (ptrtoint(A)|ptrtoint(B)), 0) - if (TD && - LHS->getPredicate() == ICmpInst::ICMP_NE && - RHS->getPredicate() == ICmpInst::ICMP_NE && - isa<ConstantPointerNull>(LHS->getOperand(1)) && - isa<ConstantPointerNull>(RHS->getOperand(1))) { - const Type *IntPtrTy = TD->getIntPtrType(I.getContext()); - Value *A = Builder->CreatePtrToInt(LHS->getOperand(0), IntPtrTy); - Value *B = Builder->CreatePtrToInt(RHS->getOperand(0), IntPtrTy); - Value *NewOr = Builder->CreateOr(A, B); - return new ICmpInst(ICmpInst::ICMP_NE, NewOr, - Constant::getNullValue(IntPtrTy)); - } - - Value *Val, *Val2; - ConstantInt *LHSCst, *RHSCst; - ICmpInst::Predicate LHSCC, RHSCC; - - // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). - if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || - !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) - return 0; - - - // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) - if (LHSCst == RHSCst && LHSCC == RHSCC && - LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); - } - - // From here on, we only handle: - // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; - - // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. - if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || - RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || - LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || - RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; - - // We can't fold (ugt x, C) | (sgt x, C2). - if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; - - // Ensure that the larger constant is on the RHS. - bool ShouldSwap; - if (CmpInst::isSigned(LHSCC) || - (ICmpInst::isEquality(LHSCC) && - CmpInst::isSigned(RHSCC))) - ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); - else - ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); - - if (ShouldSwap) { - std::swap(LHS, RHS); - std::swap(LHSCst, RHSCst); - std::swap(LHSCC, RHSCC); - } - - // At this point, we know we have have two icmp instructions - // comparing a value against two constants and or'ing the result - // together. Because of the above check, we know that we only have - // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the - // FoldICmpLogical check above), that the two constants are not - // equal. - assert(LHSCst != RHSCst && "Compares not folded above?"); - - switch (LHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - if (LHSCst == SubOne(RHSCst)) { - // (X == 13 | X == 14) -> X-13 <u 2 - Constant *AddCST = ConstantExpr::getNeg(LHSCst); - Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); - AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); - return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); - } - break; // (X == 13 | X == 15) -> no change - case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change - case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change - break; - case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 - case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 - return ReplaceInstUsesWith(I, RHS); - } - break; - case ICmpInst::ICMP_NE: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 - case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 - case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true - case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true - case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - } - break; - case ICmpInst::ICMP_ULT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change - break; - case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 - // If RHSCst is [us]MAXINT, it is always false. Not handling - // this can cause overflow. - if (RHSCst->isMaxValue(false)) - return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), - false, false, I); - case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_SLT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change - break; - case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 - // If RHSCst is [us]MAXINT, it is always false. Not handling - // this can cause overflow. - if (RHSCst->isMaxValue(true)) - return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), - true, false, I); - case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 - return ReplaceInstUsesWith(I, RHS); - case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_UGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 - case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true - case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change - break; - } - break; - case ICmpInst::ICMP_SGT: - switch (RHSCC) { - default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 - case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 - return ReplaceInstUsesWith(I, LHS); - case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change - break; - case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true - case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change - break; - } - break; - } - return 0; -} - -Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, - FCmpInst *RHS) { - if (LHS->getPredicate() == FCmpInst::FCMP_UNO && - RHS->getPredicate() == FCmpInst::FCMP_UNO && - LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { - if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) - if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { - // If either of the constants are nans, then the whole thing returns - // true. - if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - - // Otherwise, no need to compare the two constants, compare the - // rest. - return new FCmpInst(FCmpInst::FCMP_UNO, - LHS->getOperand(0), RHS->getOperand(0)); - } - - // Handle vector zeros. This occurs because the canonical form of - // "fcmp uno x,x" is "fcmp uno x, 0". - if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && - isa<ConstantAggregateZero>(RHS->getOperand(1))) - return new FCmpInst(FCmpInst::FCMP_UNO, - LHS->getOperand(0), RHS->getOperand(0)); - - return 0; - } - - Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); - Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); - FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); - - if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { - // Swap RHS operands to match LHS. - Op1CC = FCmpInst::getSwappedPredicate(Op1CC); - std::swap(Op1LHS, Op1RHS); - } - if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { - // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). - if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, - Op0LHS, Op0RHS); - if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, RHS); - if (Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, LHS); - bool Op0Ordered; - bool Op1Ordered; - unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); - unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); - if (Op0Ordered == Op1Ordered) { - // If both are ordered or unordered, return a new fcmp with - // or'ed predicates. - Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, - Op0LHS, Op0RHS, Context); - if (Instruction *I = dyn_cast<Instruction>(RV)) - return I; - // Otherwise, it's a constant boolean value... - return ReplaceInstUsesWith(I, RV); - } - } - return 0; -} - -/// FoldOrWithConstants - This helper function folds: -/// -/// ((A | B) & C1) | (B & C2) -/// -/// into: -/// -/// (A & C1) | B -/// -/// when the XOR of the two constants is "all ones" (-1). -Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C) { - ConstantInt *CI1 = dyn_cast<ConstantInt>(C); - if (!CI1) return 0; - - Value *V1 = 0; - ConstantInt *CI2 = 0; - if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; - - APInt Xor = CI1->getValue() ^ CI2->getValue(); - if (!Xor.isAllOnesValue()) return 0; - - if (V1 == A || V1 == B) { - Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); - return BinaryOperator::CreateOr(NewOp, V1); - } - - return 0; -} - -Instruction *InstCombiner::visitOr(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyOrInst(Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(I)) - return &I; - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - ConstantInt *C1 = 0; Value *X = 0; - // (X & C1) | C2 --> (X | C2) & (C1|C2) - if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && - isOnlyUse(Op0)) { - Value *Or = Builder->CreateOr(X, RHS); - Or->takeName(Op0); - return BinaryOperator::CreateAnd(Or, - ConstantInt::get(*Context, RHS->getValue() | C1->getValue())); - } - - // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) - if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && - isOnlyUse(Op0)) { - Value *Or = Builder->CreateOr(X, RHS); - Or->takeName(Op0); - return BinaryOperator::CreateXor(Or, - ConstantInt::get(*Context, C1->getValue() & ~RHS->getValue())); - } - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - Value *A = 0, *B = 0; - ConstantInt *C1 = 0, *C2 = 0; - - // (A | B) | C and A | (B | C) -> bswap if possible. - // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. - if (match(Op0, m_Or(m_Value(), m_Value())) || - match(Op1, m_Or(m_Value(), m_Value())) || - (match(Op0, m_Shift(m_Value(), m_Value())) && - match(Op1, m_Shift(m_Value(), m_Value())))) { - if (Instruction *BSwap = MatchBSwap(I)) - return BSwap; - } - - // (X^C)|Y -> (X|Y)^C iff Y&C == 0 - if (Op0->hasOneUse() && - match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op1, C1->getValue())) { - Value *NOr = Builder->CreateOr(A, Op1); - NOr->takeName(Op0); - return BinaryOperator::CreateXor(NOr, C1); - } - - // Y|(X^C) -> (X|Y)^C iff Y&C == 0 - if (Op1->hasOneUse() && - match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && - MaskedValueIsZero(Op0, C1->getValue())) { - Value *NOr = Builder->CreateOr(A, Op0); - NOr->takeName(Op0); - return BinaryOperator::CreateXor(NOr, C1); - } - - // (A & C)|(B & D) - Value *C = 0, *D = 0; - if (match(Op0, m_And(m_Value(A), m_Value(C))) && - match(Op1, m_And(m_Value(B), m_Value(D)))) { - Value *V1 = 0, *V2 = 0, *V3 = 0; - C1 = dyn_cast<ConstantInt>(C); - C2 = dyn_cast<ConstantInt>(D); - if (C1 && C2) { // (A & C1)|(B & C2) - // If we have: ((V + N) & C1) | (V & C2) - // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 - // replace with V+N. - if (C1->getValue() == ~C2->getValue()) { - if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ - match(A, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) - return ReplaceInstUsesWith(I, A); - if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) - return ReplaceInstUsesWith(I, A); - } - // Or commutes, try both ways. - if ((C1->getValue() & (C1->getValue()+1)) == 0 && - match(B, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) - return ReplaceInstUsesWith(I, B); - if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) - return ReplaceInstUsesWith(I, B); - } - } - V1 = 0; V2 = 0; V3 = 0; - } - - // Check to see if we have any common things being and'ed. If so, find the - // terms for V1 & (V2|V3). - if (isOnlyUse(Op0) || isOnlyUse(Op1)) { - if (A == B) // (A & C)|(A & D) == A & (C|D) - V1 = A, V2 = C, V3 = D; - else if (A == D) // (A & C)|(B & A) == A & (B|C) - V1 = A, V2 = B, V3 = C; - else if (C == B) // (A & C)|(C & D) == C & (A|D) - V1 = C, V2 = A, V3 = D; - else if (C == D) // (A & C)|(B & C) == C & (A|B) - V1 = C, V2 = A, V3 = B; - - if (V1) { - Value *Or = Builder->CreateOr(V2, V3, "tmp"); - return BinaryOperator::CreateAnd(V1, Or); - } - } - - // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants - if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D, Context)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C, Context)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D, Context)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C, Context)) - return Match; - - // ((A&~B)|(~A&B)) -> A^B - if ((match(C, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, D); - // ((~B&A)|(~A&B)) -> A^B - if ((match(A, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, D); - // ((A&~B)|(B&~A)) -> A^B - if ((match(C, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, B); - // ((~B&A)|(B&~A)) -> A^B - if ((match(A, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, B); - } - - // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. - if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { - if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) - if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && - SI0->getOperand(1) == SI1->getOperand(1) && - (SI0->hasOneUse() || SI1->hasOneUse())) { - Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), - SI0->getName()); - return BinaryOperator::Create(SI1->getOpcode(), NewOp, - SI1->getOperand(1)); - } - } - - // ((A|B)&1)|(B&-2) -> (A&1) | B - if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); - if (Ret) return Ret; - } - // (B&-2)|((A|B)&1) -> (A&1) | B - if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); - if (Ret) return Ret; - } - - // (~A | ~B) == (~(A & B)) - De Morgan's Law - if (Value *Op0NotVal = dyn_castNotVal(Op0)) - if (Value *Op1NotVal = dyn_castNotVal(Op1)) - if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - return BinaryOperator::CreateNot(And); - } - - // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) - if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) { - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) - return R; - - if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) - if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) - return Res; - } - - // fold (or (cast A), (cast B)) -> (cast (or A, B)) - if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { - if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - if (!isa<ICmpInst>(Op0C->getOperand(0)) || - !isa<ICmpInst>(Op1C->getOperand(0))) { - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && - // Only do this if the casts both really cause code to be - // generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType(), TD) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType(), TD)) { - Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - } - } - } - - - // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) - if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) - return Res; - } - - return Changed ? &I : 0; -} - -namespace { - -// XorSelf - Implements: X ^ X --> 0 -struct XorSelf { - Value *RHS; - XorSelf(Value *rhs) : RHS(rhs) {} - bool shouldApply(Value *LHS) const { return LHS == RHS; } - Instruction *apply(BinaryOperator &Xor) const { - return &Xor; - } -}; - -} - -Instruction *InstCombiner::visitXor(BinaryOperator &I) { - bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (isa<UndefValue>(Op1)) { - if (isa<UndefValue>(Op0)) - // Handle undef ^ undef -> 0 special case. This is a common - // idiom (misuse). - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef - } - - // xor X, X = 0, even if X is nested in a sequence of Xor's. - if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { - assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(I)) - return &I; - if (isa<VectorType>(I.getType())) - if (isa<ConstantAggregateZero>(Op1)) - return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X - - // Is this a ~ operation? - if (Value *NotOp = dyn_castNotVal(&I)) { - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) { - if (Op0I->getOpcode() == Instruction::And || - Op0I->getOpcode() == Instruction::Or) { - // ~(~X & Y) --> (X | ~Y) - De Morgan's Law - // ~(~X | Y) === (X & ~Y) - De Morgan's Law - if (dyn_castNotVal(Op0I->getOperand(1))) - Op0I->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), - Op0I->getOperand(1)->getName()+".not"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(Op0NotVal, NotY); - return BinaryOperator::CreateAnd(Op0NotVal, NotY); - } - - // ~(X & Y) --> (~X | ~Y) - De Morgan's Law - // ~(X | Y) === (~X & ~Y) - De Morgan's Law - if (isFreeToInvert(Op0I->getOperand(0)) && - isFreeToInvert(Op0I->getOperand(1))) { - Value *NotX = - Builder->CreateNot(Op0I->getOperand(0), "notlhs"); - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), "notrhs"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(NotX, NotY); - return BinaryOperator::CreateAnd(NotX, NotY); - } - } - } - } - - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - if (RHS->isOne() && Op0->hasOneUse()) { - // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B - if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0)) - return new ICmpInst(ICI->getInversePredicate(), - ICI->getOperand(0), ICI->getOperand(1)); - - if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0)) - return new FCmpInst(FCI->getInversePredicate(), - FCI->getOperand(0), FCI->getOperand(1)); - } - - // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). - if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { - if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) { - if (CI->hasOneUse() && Op0C->hasOneUse()) { - Instruction::CastOps Opcode = Op0C->getOpcode(); - if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && - (RHS == ConstantExpr::getCast(Opcode, - ConstantInt::getTrue(*Context), - Op0C->getDestTy()))) { - CI->setPredicate(CI->getInversePredicate()); - return CastInst::Create(Opcode, CI, Op0C->getType()); - } - } - } - } - - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { - // ~(c-X) == X-c-1 == X+(-c-1) - if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) - if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) { - Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); - Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, - ConstantInt::get(I.getType(), 1)); - return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); - } - - if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { - if (Op0I->getOpcode() == Instruction::Add) { - // ~(X-c) --> (-c-1)-X - if (RHS->isAllOnesValue()) { - Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); - return BinaryOperator::CreateSub( - ConstantExpr::getSub(NegOp0CI, - ConstantInt::get(I.getType(), 1)), - Op0I->getOperand(0)); - } else if (RHS->getValue().isSignBit()) { - // (X + C) ^ signbit -> (X + C + signbit) - Constant *C = ConstantInt::get(*Context, - RHS->getValue() + Op0CI->getValue()); - return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); - - } - } else if (Op0I->getOpcode() == Instruction::Or) { - // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 - if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { - Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); - // Anything in both C1 and C2 is known to be zero, remove it from - // NewRHS. - Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); - NewRHS = ConstantExpr::getAnd(NewRHS, - ConstantExpr::getNot(CommonBits)); - Worklist.Add(Op0I); - I.setOperand(0, Op0I->getOperand(0)); - I.setOperand(1, NewRHS); - return &I; - } - } - } - } - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } - - if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 - if (X == Op1) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 - if (X == Op0) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - - BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); - if (Op1I) { - Value *A, *B; - if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) { - if (A == Op0) { // B^(B|A) == (A|B)^B - Op1I->swapOperands(); - I.swapOperands(); - std::swap(Op0, Op1); - } else if (B == Op0) { // B^(A|B) == (A|B)^B - I.swapOperands(); // Simplified below. - std::swap(Op0, Op1); - } - } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // A^(A^B) == B - } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { - return ReplaceInstUsesWith(I, A); // A^(B^A) == B - } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && - Op1I->hasOneUse()){ - if (A == Op0) { // A^(A&B) -> A^(B&A) - Op1I->swapOperands(); - std::swap(A, B); - } - if (B == Op0) { // A^(B&A) -> (B&A)^A - I.swapOperands(); // Simplified below. - std::swap(Op0, Op1); - } - } - } - - BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0); - if (Op0I) { - Value *A, *B; - if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && - Op0I->hasOneUse()) { - if (A == Op1) // (B|A)^B == (A|B)^B - std::swap(A, B); - if (B == Op1) // (A|B)^B == A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); - } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { - return ReplaceInstUsesWith(I, B); // (A^B)^A == B - } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { - return ReplaceInstUsesWith(I, A); // (B^A)^A == B - } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && - Op0I->hasOneUse()){ - if (A == Op1) // (A&B)^A -> (B&A)^A - std::swap(A, B); - if (B == Op1 && // (B&A)^A == ~B & A - !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C - return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); - } - } - } - - // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. - if (Op0I && Op1I && Op0I->isShift() && - Op0I->getOpcode() == Op1I->getOpcode() && - Op0I->getOperand(1) == Op1I->getOperand(1) && - (Op1I->hasOneUse() || Op1I->hasOneUse())) { - Value *NewOp = - Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), - Op0I->getName()); - return BinaryOperator::Create(Op1I->getOpcode(), NewOp, - Op1I->getOperand(1)); - } - - if (Op0I && Op1I) { - Value *A, *B, *C, *D; - // (A & B)^(A | B) -> A ^ B - if (match(Op0I, m_And(m_Value(A), m_Value(B))) && - match(Op1I, m_Or(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - // (A | B)^(A & B) -> A ^ B - if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && - match(Op1I, m_And(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - - // (A & B)^(C & D) - if ((Op0I->hasOneUse() || Op1I->hasOneUse()) && - match(Op0I, m_And(m_Value(A), m_Value(B))) && - match(Op1I, m_And(m_Value(C), m_Value(D)))) { - // (X & Y)^(X & Y) -> (Y^Z) & X - Value *X = 0, *Y = 0, *Z = 0; - if (A == C) - X = A, Y = B, Z = D; - else if (A == D) - X = A, Y = B, Z = C; - else if (B == C) - X = B, Y = A, Z = D; - else if (B == D) - X = B, Y = A, Z = C; - - if (X) { - Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); - return BinaryOperator::CreateAnd(NewOp, X); - } - } - } - - // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) - if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) - return R; - - // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) - if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { - if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && - // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType(), TD) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType(), TD)) { - Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); - return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); - } - } - } - - return Changed ? &I : 0; -} - -static ConstantInt *ExtractElement(Constant *V, Constant *Idx, - LLVMContext *Context) { - return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx)); -} - -static bool HasAddOverflow(ConstantInt *Result, - ConstantInt *In1, ConstantInt *In2, - bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().ult(In1->getValue()); -} - -/// AddWithOverflow - Compute Result = In1+In2, returning true if the result -/// overflowed for this type. -static bool AddWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, LLVMContext *Context, - bool IsSigned = false) { - Result = ConstantExpr::getAdd(In1, In2); - - if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i); - if (HasAddOverflow(ExtractElement(Result, Idx, Context), - ExtractElement(In1, Idx, Context), - ExtractElement(In2, Idx, Context), - IsSigned)) - return true; - } - return false; - } - - return HasAddOverflow(cast<ConstantInt>(Result), - cast<ConstantInt>(In1), cast<ConstantInt>(In2), - IsSigned); -} - -static bool HasSubOverflow(ConstantInt *Result, - ConstantInt *In1, ConstantInt *In2, - bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().ugt(In1->getValue()); -} - -/// SubWithOverflow - Compute Result = In1-In2, returning true if the result -/// overflowed for this type. -static bool SubWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, LLVMContext *Context, - bool IsSigned = false) { - Result = ConstantExpr::getSub(In1, In2); - - if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i); - if (HasSubOverflow(ExtractElement(Result, Idx, Context), - ExtractElement(In1, Idx, Context), - ExtractElement(In2, Idx, Context), - IsSigned)) - return true; - } - return false; - } - - return HasSubOverflow(cast<ConstantInt>(Result), - cast<ConstantInt>(In1), cast<ConstantInt>(In2), - IsSigned); -} - - -/// FoldGEPICmp - Fold comparisons between a GEP instruction and something -/// else. At this point we know that the GEP is on the LHS of the comparison. -Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, - ICmpInst::Predicate Cond, - Instruction &I) { - // Look through bitcasts. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS)) - RHS = BCI->getOperand(0); - - Value *PtrBase = GEPLHS->getOperand(0); - if (TD && PtrBase == RHS && GEPLHS->isInBounds()) { - // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). - // This transformation (ignoring the base and scales) is valid because we - // know pointers can't overflow since the gep is inbounds. See if we can - // output an optimized form. - Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); - - // If not, synthesize the offset the hard way. - if (Offset == 0) - Offset = EmitGEPOffset(GEPLHS, *this); - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, - Constant::getNullValue(Offset->getType())); - } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) { - // If the base pointers are different, but the indices are the same, just - // compare the base pointer. - if (PtrBase != GEPRHS->getOperand(0)) { - bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands(); - IndicesTheSame &= GEPLHS->getOperand(0)->getType() == - GEPRHS->getOperand(0)->getType(); - if (IndicesTheSame) - for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) - if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { - IndicesTheSame = false; - break; - } - - // If all indices are the same, just compare the base pointers. - if (IndicesTheSame) - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), - GEPLHS->getOperand(0), GEPRHS->getOperand(0)); - - // Otherwise, the base pointers are different and the indices are - // different, bail out. - return 0; - } - - // If one of the GEPs has all zero indices, recurse. - bool AllZeros = true; - for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) - if (!isa<Constant>(GEPLHS->getOperand(i)) || - !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) { - AllZeros = false; - break; - } - if (AllZeros) - return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), - ICmpInst::getSwappedPredicate(Cond), I); - - // If the other GEP has all zero indices, recurse. - AllZeros = true; - for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) - if (!isa<Constant>(GEPRHS->getOperand(i)) || - !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) { - AllZeros = false; - break; - } - if (AllZeros) - return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); - - if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) { - // If the GEPs only differ by one index, compare it. - unsigned NumDifferences = 0; // Keep track of # differences. - unsigned DiffOperand = 0; // The operand that differs. - for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) - if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { - if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != - GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { - // Irreconcilable differences. - NumDifferences = 2; - break; - } else { - if (NumDifferences++) break; - DiffOperand = i; - } - } - - if (NumDifferences == 0) // SAME GEP? - return ReplaceInstUsesWith(I, // No comparison is needed here. - ConstantInt::get(Type::getInt1Ty(*Context), - ICmpInst::isTrueWhenEqual(Cond))); - - else if (NumDifferences == 1) { - Value *LHSV = GEPLHS->getOperand(DiffOperand); - Value *RHSV = GEPRHS->getOperand(DiffOperand); - // Make sure we do a signed comparison here. - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV); - } - } - - // Only lower this if the icmp is the only user of the GEP or if we expect - // the result to fold to a constant! - if (TD && - (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && - (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { - // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) - Value *L = EmitGEPOffset(GEPLHS, *this); - Value *R = EmitGEPOffset(GEPRHS, *this); - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); - } - } - return 0; -} - -/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. -/// -Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, - Instruction *LHSI, - Constant *RHSC) { - if (!isa<ConstantFP>(RHSC)) return 0; - const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); - - // Get the width of the mantissa. We don't want to hack on conversions that - // might lose information from the integer, e.g. "i64 -> float" - int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); - if (MantissaWidth == -1) return 0; // Unknown. - - // Check to see that the input is converted from an integer type that is small - // enough that preserves all bits. TODO: check here for "known" sign bits. - // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. - unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); - - // If this is a uitofp instruction, we need an extra bit to hold the sign. - bool LHSUnsigned = isa<UIToFPInst>(LHSI); - if (LHSUnsigned) - ++InputSize; - - // If the conversion would lose info, don't hack on this. - if ((int)InputSize > MantissaWidth) - return 0; - - // Otherwise, we can potentially simplify the comparison. We know that it - // will always come through as an integer value and we know the constant is - // not a NAN (it would have been previously simplified). - assert(!RHS.isNaN() && "NaN comparison not already folded!"); - - ICmpInst::Predicate Pred; - switch (I.getPredicate()) { - default: llvm_unreachable("Unexpected predicate!"); - case FCmpInst::FCMP_UEQ: - case FCmpInst::FCMP_OEQ: - Pred = ICmpInst::ICMP_EQ; - break; - case FCmpInst::FCMP_UGT: - case FCmpInst::FCMP_OGT: - Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT; - break; - case FCmpInst::FCMP_UGE: - case FCmpInst::FCMP_OGE: - Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE; - break; - case FCmpInst::FCMP_ULT: - case FCmpInst::FCMP_OLT: - Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT; - break; - case FCmpInst::FCMP_ULE: - case FCmpInst::FCMP_OLE: - Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE; - break; - case FCmpInst::FCMP_UNE: - case FCmpInst::FCMP_ONE: - Pred = ICmpInst::ICMP_NE; - break; - case FCmpInst::FCMP_ORD: - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - case FCmpInst::FCMP_UNO: - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - } - - const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); - - // Now we know that the APFloat is a normal number, zero or inf. - - // See if the FP constant is too large for the integer. For example, - // comparing an i8 to 300.0. - unsigned IntWidth = IntTy->getScalarSizeInBits(); - - if (!LHSUnsigned) { - // If the RHS value is > SignedMax, fold the comparison. This handles +INF - // and large values. - APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false); - SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true, - APFloat::rmNearestTiesToEven); - if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 - if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || - Pred == ICmpInst::ICMP_SLE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - } - } else { - // If the RHS value is > UnsignedMax, fold the comparison. This handles - // +INF and large values. - APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false); - UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false, - APFloat::rmNearestTiesToEven); - if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 - if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || - Pred == ICmpInst::ICMP_ULE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - } - } - - if (!LHSUnsigned) { - // See if the RHS value is < SignedMin. - APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); - SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true, - APFloat::rmNearestTiesToEven); - if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 - if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || - Pred == ICmpInst::ICMP_SGE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - } - } - - // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or - // [0, UMAX], but it may still be fractional. See if it is fractional by - // casting the FP value to the integer value and back, checking for equality. - // Don't do this for zero, because -0.0 is not fractional. - Constant *RHSInt = LHSUnsigned - ? ConstantExpr::getFPToUI(RHSC, IntTy) - : ConstantExpr::getFPToSI(RHSC, IntTy); - if (!RHS.isZero()) { - bool Equal = LHSUnsigned - ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC - : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; - if (!Equal) { - // If we had a comparison against a fractional value, we have to adjust - // the compare predicate and sometimes the value. RHSC is rounded towards - // zero at this point. - switch (Pred) { - default: llvm_unreachable("Unexpected integer comparison!"); - case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - case ICmpInst::ICMP_ULE: - // (float)int <= 4.4 --> int <= 4 - // (float)int <= -4.4 --> false - if (RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - case ICmpInst::ICMP_SLE: - // (float)int <= 4.4 --> int <= 4 - // (float)int <= -4.4 --> int < -4 - if (RHS.isNegative()) - Pred = ICmpInst::ICMP_SLT; - break; - case ICmpInst::ICMP_ULT: - // (float)int < -4.4 --> false - // (float)int < 4.4 --> int <= 4 - if (RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - Pred = ICmpInst::ICMP_ULE; - break; - case ICmpInst::ICMP_SLT: - // (float)int < -4.4 --> int < -4 - // (float)int < 4.4 --> int <= 4 - if (!RHS.isNegative()) - Pred = ICmpInst::ICMP_SLE; - break; - case ICmpInst::ICMP_UGT: - // (float)int > 4.4 --> int > 4 - // (float)int > -4.4 --> true - if (RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - break; - case ICmpInst::ICMP_SGT: - // (float)int > 4.4 --> int > 4 - // (float)int > -4.4 --> int >= -4 - if (RHS.isNegative()) - Pred = ICmpInst::ICMP_SGE; - break; - case ICmpInst::ICMP_UGE: - // (float)int >= -4.4 --> true - // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - Pred = ICmpInst::ICMP_UGT; - break; - case ICmpInst::ICMP_SGE: - // (float)int >= -4.4 --> int >= -4 - // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) - Pred = ICmpInst::ICMP_SGT; - break; - } - } - } - - // Lower this FP comparison into an appropriate integer version of the - // comparison. - return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); -} - -Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { - bool Changed = false; - - /// Orders the operands of the compare so that they are listed from most - /// complex to least complex. This puts constants before unary operators, - /// before binary operators. - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { - I.swapOperands(); - Changed = true; - } - - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - // Simplify 'fcmp pred X, X' - if (Op0 == Op1) { - switch (I.getPredicate()) { - default: llvm_unreachable("Unknown predicate!"); - case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) - case FCmpInst::FCMP_ULT: // True if unordered or less than - case FCmpInst::FCMP_UGT: // True if unordered or greater than - case FCmpInst::FCMP_UNE: // True if unordered or not equal - // Canonicalize these to be 'fcmp uno %X, 0.0'. - I.setPredicate(FCmpInst::FCMP_UNO); - I.setOperand(1, Constant::getNullValue(Op0->getType())); - return &I; - - case FCmpInst::FCMP_ORD: // True if ordered (no nans) - case FCmpInst::FCMP_OEQ: // True if ordered and equal - case FCmpInst::FCMP_OGE: // True if ordered and greater than or equal - case FCmpInst::FCMP_OLE: // True if ordered and less than or equal - // Canonicalize these to be 'fcmp ord %X, 0.0'. - I.setPredicate(FCmpInst::FCMP_ORD); - I.setOperand(1, Constant::getNullValue(Op0->getType())); - return &I; - } - } - - // Handle fcmp with constant RHS - if (Constant *RHSC = dyn_cast<Constant>(Op1)) { - if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) - switch (LHSI->getOpcode()) { - case Instruction::PHI: - // Only fold fcmp into the PHI if the phi and fcmp are in the same - // block. If in the same block, we're encouraging jump threading. If - // not, we are just pessimizing the code by making an i1 phi. - if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) - return NV; - break; - case Instruction::SIToFP: - case Instruction::UIToFP: - if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC)) - return NV; - break; - case Instruction::Select: - // If either operand of the select is a constant, we can fold the - // comparison into the select arms, which will cause one to be - // constant folded and the select turned into a bitwise or. - Value *Op1 = 0, *Op2 = 0; - if (LHSI->hasOneUse()) { - if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) { - // Fold the known value into the constant operand. - Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); - // Insert a new FCmp of the other select operand. - Op2 = Builder->CreateFCmp(I.getPredicate(), - LHSI->getOperand(2), RHSC, I.getName()); - } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { - // Fold the known value into the constant operand. - Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); - // Insert a new FCmp of the other select operand. - Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), - RHSC, I.getName()); - } - } - - if (Op1) - return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); - break; - } - } - - return Changed ? &I : 0; -} - -Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { - bool Changed = false; - - /// Orders the operands of the compare so that they are listed from most - /// complex to least complex. This puts constants before unary operators, - /// before binary operators. - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { - I.swapOperands(); - Changed = true; - } - - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) - return ReplaceInstUsesWith(I, V); - - const Type *Ty = Op0->getType(); - - // icmp's with boolean values can always be turned into bitwise operations - if (Ty == Type::getInt1Ty(*Context)) { - switch (I.getPredicate()) { - default: llvm_unreachable("Invalid icmp instruction!"); - case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) - Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); - return BinaryOperator::CreateNot(Xor); - } - case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B - return BinaryOperator::CreateXor(Op0, Op1); - - case ICmpInst::ICMP_UGT: - std::swap(Op0, Op1); // Change icmp ugt -> icmp ult - // FALL THROUGH - case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B - Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); - return BinaryOperator::CreateAnd(Not, Op1); - } - case ICmpInst::ICMP_SGT: - std::swap(Op0, Op1); // Change icmp sgt -> icmp slt - // FALL THROUGH - case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B - Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); - return BinaryOperator::CreateAnd(Not, Op0); - } - case ICmpInst::ICMP_UGE: - std::swap(Op0, Op1); // Change icmp uge -> icmp ule - // FALL THROUGH - case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B - Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); - return BinaryOperator::CreateOr(Not, Op1); - } - case ICmpInst::ICMP_SGE: - std::swap(Op0, Op1); // Change icmp sge -> icmp sle - // FALL THROUGH - case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B - Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); - return BinaryOperator::CreateOr(Not, Op0); - } - } - } - - unsigned BitWidth = 0; - if (TD) - BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); - else if (Ty->isIntOrIntVector()) - BitWidth = Ty->getScalarSizeInBits(); - - bool isSignBit = false; - - // See if we are doing a comparison with a constant. - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - Value *A = 0, *B = 0; - - // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B) - if (I.isEquality() && CI->isNullValue() && - match(Op0, m_Sub(m_Value(A), m_Value(B)))) { - // (icmp cond A B) if cond is equality - return new ICmpInst(I.getPredicate(), A, B); - } - - // If we have an icmp le or icmp ge instruction, turn it into the - // appropriate icmp lt or icmp gt instruction. This allows us to rely on - // them being folded in the code below. The SimplifyICmpInst code has - // already handled the edge cases for us, so we just assert on them. - switch (I.getPredicate()) { - default: break; - case ICmpInst::ICMP_ULE: - assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE - return new ICmpInst(ICmpInst::ICMP_ULT, Op0, - AddOne(CI)); - case ICmpInst::ICMP_SLE: - assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, - AddOne(CI)); - case ICmpInst::ICMP_UGE: - assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE - return new ICmpInst(ICmpInst::ICMP_UGT, Op0, - SubOne(CI)); - case ICmpInst::ICMP_SGE: - assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, - SubOne(CI)); - } - - // If this comparison is a normal comparison, it demands all - // bits, if it is a sign bit comparison, it only demands the sign bit. - bool UnusedBit; - isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit); - } - - // See if we can fold the comparison based on range information we can get - // by checking whether bits are known to be zero or one in the input. - if (BitWidth != 0) { - APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); - APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); - - if (SimplifyDemandedBits(I.getOperandUse(0), - isSignBit ? APInt::getSignBit(BitWidth) - : APInt::getAllOnesValue(BitWidth), - Op0KnownZero, Op0KnownOne, 0)) - return &I; - if (SimplifyDemandedBits(I.getOperandUse(1), - APInt::getAllOnesValue(BitWidth), - Op1KnownZero, Op1KnownOne, 0)) - return &I; - - // Given the known and unknown bits, compute a range that the LHS could be - // in. Compute the Min, Max and RHS values based on the known bits. For the - // EQ and NE we use unsigned values. - APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); - APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); - if (I.isSigned()) { - ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, - Op0Min, Op0Max); - ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, - Op1Min, Op1Max); - } else { - ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, - Op0Min, Op0Max); - ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, - Op1Min, Op1Max); - } - - // If Min and Max are known to be the same, then SimplifyDemandedBits - // figured out that the LHS is a constant. Just constant fold this now so - // that code below can assume that Min != Max. - if (!isa<Constant>(Op0) && Op0Min == Op0Max) - return new ICmpInst(I.getPredicate(), - ConstantInt::get(*Context, Op0Min), Op1); - if (!isa<Constant>(Op1) && Op1Min == Op1Max) - return new ICmpInst(I.getPredicate(), Op0, - ConstantInt::get(*Context, Op1Min)); - - // Based on the range information we know about the LHS, see if we can - // simplify this comparison. For example, (x&4) < 8 is always true. - switch (I.getPredicate()) { - default: llvm_unreachable("Unknown icmp opcode!"); - case ICmpInst::ICMP_EQ: - if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - case ICmpInst::ICMP_NE: - if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - break; - case ICmpInst::ICMP_ULT: - if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - if (Op1Max == Op0Min+1) // A <u C -> A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - SubOne(CI)); - - // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear - if (CI->isMinValue(true)) - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, - Constant::getAllOnesValue(Op0->getType())); - } - break; - case ICmpInst::ICMP_UGT: - if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - - if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - AddOne(CI)); - - // (x >u 2147483647) -> (x <s 0) -> true if sign bit set - if (CI->isMaxValue(true)) - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, - Constant::getNullValue(Op0->getType())); - } - break; - case ICmpInst::ICMP_SLT: - if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - if (Op1Max == Op0Min+1) // A <s C -> A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - SubOne(CI)); - } - break; - case ICmpInst::ICMP_SGT: - if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - - if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - AddOne(CI)); - } - break; - case ICmpInst::ICMP_SGE: - assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); - if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - case ICmpInst::ICMP_SLE: - assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); - if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - case ICmpInst::ICMP_UGE: - assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); - if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - case ICmpInst::ICMP_ULE: - assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); - if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - break; - } - - // Turn a signed comparison into an unsigned one if both operands - // are known to have the same sign. - if (I.isSigned() && - ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || - (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) - return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); - } - - // Test if the ICmpInst instruction is used exclusively by a select as - // part of a minimum or maximum operation. If so, refrain from doing - // any other folding. This helps out other analyses which understand - // non-obfuscated minimum and maximum idioms, such as ScalarEvolution - // and CodeGen. And in this case, at least one of the comparison - // operands has at least one user besides the compare (the select), - // which would often largely negate the benefit of folding anyway. - if (I.hasOneUse()) - if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin())) - if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || - (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) - return 0; - - // See if we are doing a comparison between a constant and an instruction that - // can be folded into the comparison. - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - // Since the RHS is a ConstantInt (CI), if the left hand side is an - // instruction, see if that instruction also has constants so that the - // instruction can be folded into the icmp - if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) - if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI)) - return Res; - } - - // Handle icmp with constant (but not simple integer constant) RHS - if (Constant *RHSC = dyn_cast<Constant>(Op1)) { - if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) - switch (LHSI->getOpcode()) { - case Instruction::GetElementPtr: - if (RHSC->isNullValue()) { - // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null - bool isAllZeros = true; - for (unsigned i = 1, e = LHSI->getNumOperands(); i != e; ++i) - if (!isa<Constant>(LHSI->getOperand(i)) || - !cast<Constant>(LHSI->getOperand(i))->isNullValue()) { - isAllZeros = false; - break; - } - if (isAllZeros) - return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), - Constant::getNullValue(LHSI->getOperand(0)->getType())); - } - break; - - case Instruction::PHI: - // Only fold icmp into the PHI if the phi and icmp are in the same - // block. If in the same block, we're encouraging jump threading. If - // not, we are just pessimizing the code by making an i1 phi. - if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I, true)) - return NV; - break; - case Instruction::Select: { - // If either operand of the select is a constant, we can fold the - // comparison into the select arms, which will cause one to be - // constant folded and the select turned into a bitwise or. - Value *Op1 = 0, *Op2 = 0; - if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) - Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) - Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - - // We only want to perform this transformation if it will not lead to - // additional code. This is true if either both sides of the select - // fold to a constant (in which case the icmp is replaced with a select - // which will usually simplify) or this is the only user of the - // select (in which case we are trading a select+icmp for a simpler - // select+icmp). - if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { - if (!Op1) - Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), - RHSC, I.getName()); - if (!Op2) - Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), - RHSC, I.getName()); - return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); - } - break; - } - case Instruction::Call: - // If we have (malloc != null), and if the malloc has a single use, we - // can assume it is successful and remove the malloc. - if (isMalloc(LHSI) && LHSI->hasOneUse() && - isa<ConstantPointerNull>(RHSC)) { - // Need to explicitly erase malloc call here, instead of adding it to - // Worklist, because it won't get DCE'd from the Worklist since - // isInstructionTriviallyDead() returns false for function calls. - // It is OK to replace LHSI/MallocCall with Undef because the - // instruction that uses it will be erased via Worklist. - if (extractMallocCall(LHSI)) { - LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType())); - EraseInstFromFunction(*LHSI); - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(*Context), - !I.isTrueWhenEqual())); - } - if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI)) - if (MallocCall->hasOneUse()) { - MallocCall->replaceAllUsesWith( - UndefValue::get(MallocCall->getType())); - EraseInstFromFunction(*MallocCall); - Worklist.Add(LHSI); // The malloc's bitcast use. - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(*Context), - !I.isTrueWhenEqual())); - } - } - break; - } - } - - // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. - if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) - if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) - return NI; - if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) - if (Instruction *NI = FoldGEPICmp(GEP, Op0, - ICmpInst::getSwappedPredicate(I.getPredicate()), I)) - return NI; - - // Test to see if the operands of the icmp are casted versions of other - // values. If the ptr->ptr cast can be stripped off both arguments, we do so - // now. - if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) { - if (isa<PointerType>(Op0->getType()) && - (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { - // We keep moving the cast from the left operand over to the right - // operand, where it can often be eliminated completely. - Op0 = CI->getOperand(0); - - // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast - // so eliminate it as well. - if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1)) - Op1 = CI2->getOperand(0); - - // If Op1 is a constant, we can fold the cast into the constant. - if (Op0->getType() != Op1->getType()) { - if (Constant *Op1C = dyn_cast<Constant>(Op1)) { - Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); - } else { - // Otherwise, cast the RHS right before the icmp - Op1 = Builder->CreateBitCast(Op1, Op0->getType()); - } - } - return new ICmpInst(I.getPredicate(), Op0, Op1); - } - } - - if (isa<CastInst>(Op0)) { - // Handle the special case of: icmp (cast bool to X), <cst> - // This comes up when you have code like - // int X = A < B; - // if (X) ... - // For generality, we handle any zero-extension of any operand comparison - // with a constant or another cast from the same type. - if (isa<Constant>(Op1) || isa<CastInst>(Op1)) - if (Instruction *R = visitICmpInstWithCastAndCast(I)) - return R; - } - - // See if it's the same type of instruction on the left and right. - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { - if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { - if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() && - Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) { - switch (Op0I->getOpcode()) { - default: break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Xor: - if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b - return new ICmpInst(I.getPredicate(), Op0I->getOperand(0), - Op1I->getOperand(0)); - // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { - if (CI->getValue().isSignBit()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - - if (CI->getValue().isMaxSignedValue()) { - ICmpInst::Predicate Pred = I.isSigned() - ? I.getUnsignedPredicate() - : I.getSignedPredicate(); - Pred = I.getSwappedPredicate(Pred); - return new ICmpInst(Pred, Op0I->getOperand(0), - Op1I->getOperand(0)); - } - } - break; - case Instruction::Mul: - if (!I.isEquality()) - break; - - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { - // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask - // Mask = -1 >> count-trailing-zeros(Cst). - if (!CI->isZero() && !CI->isOne()) { - const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get(*Context, - APInt::getLowBitsSet(AP.getBitWidth(), - AP.getBitWidth() - - AP.countTrailingZeros())); - Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); - Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); - return new ICmpInst(I.getPredicate(), And1, And2); - } - } - break; - } - } - } - } - - // ~x < ~y --> y < x - { Value *A, *B; - if (match(Op0, m_Not(m_Value(A))) && - match(Op1, m_Not(m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, A); - } - - if (I.isEquality()) { - Value *A, *B, *C, *D; - - // -x == -y --> x == y - if (match(Op0, m_Neg(m_Value(A))) && - match(Op1, m_Neg(m_Value(B)))) - return new ICmpInst(I.getPredicate(), A, B); - - if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) { - if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 - Value *OtherVal = A == Op1 ? B : A; - return new ICmpInst(I.getPredicate(), OtherVal, - Constant::getNullValue(A->getType())); - } - - if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { - // A^c1 == C^c2 --> A == C^(c1^c2) - ConstantInt *C1, *C2; - if (match(B, m_ConstantInt(C1)) && - match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { - Constant *NC = - ConstantInt::get(*Context, C1->getValue() ^ C2->getValue()); - Value *Xor = Builder->CreateXor(C, NC, "tmp"); - return new ICmpInst(I.getPredicate(), A, Xor); - } - - // A^B == A^D -> B == D - if (A == C) return new ICmpInst(I.getPredicate(), B, D); - if (A == D) return new ICmpInst(I.getPredicate(), B, C); - if (B == C) return new ICmpInst(I.getPredicate(), A, D); - if (B == D) return new ICmpInst(I.getPredicate(), A, C); - } - } - - if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && - (A == Op0 || B == Op0)) { - // A == (A^B) -> B == 0 - Value *OtherVal = A == Op0 ? B : A; - return new ICmpInst(I.getPredicate(), OtherVal, - Constant::getNullValue(A->getType())); - } - - // (A-B) == A -> B == 0 - if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - - // A == (A-B) -> B == 0 - if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) - return new ICmpInst(I.getPredicate(), B, - Constant::getNullValue(B->getType())); - - // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (Op0->hasOneUse() && Op1->hasOneUse() && - match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { - Value *X = 0, *Y = 0, *Z = 0; - - if (A == C) { - X = B; Y = D; Z = A; - } else if (A == D) { - X = B; Y = C; Z = A; - } else if (B == C) { - X = A; Y = D; Z = B; - } else if (B == D) { - X = A; Y = C; Z = B; - } - - if (X) { // Build (X^Y) & Z - Op1 = Builder->CreateXor(X, Y, "tmp"); - Op1 = Builder->CreateAnd(Op1, Z, "tmp"); - I.setOperand(0, Op1); - I.setOperand(1, Constant::getNullValue(Op1->getType())); - return &I; - } - } - } - - { - Value *X; ConstantInt *Cst; - // icmp X+Cst, X - if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) - return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0); - - // icmp X, X+Cst - if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) - return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1); - } - return Changed ? &I : 0; -} - -/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". -Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, - Value *X, ConstantInt *CI, - ICmpInst::Predicate Pred, - Value *TheAdd) { - // If we have X+0, exit early (simplifying logic below) and let it get folded - // elsewhere. icmp X+0, X -> icmp X, X - if (CI->isZero()) { - bool isTrue = ICmpInst::isTrueWhenEqual(Pred); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - - // (X+4) == X -> false. - if (Pred == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); - - // (X+4) != X -> true. - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); - - // If this is an instruction (as opposed to constantexpr) get NUW/NSW info. - bool isNUW = false, isNSW = false; - if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) { - isNUW = Add->hasNoUnsignedWrap(); - isNSW = Add->hasNoSignedWrap(); - } - - // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, - // so the values can never be equal. Similiarly for all other "or equals" - // operators. - - // (X+1) <u X --> X >u (MAXUINT-1) --> X != 255 - // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 - // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 - if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { - // If this is an NUW add, then this is always false. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); - - Value *R = ConstantExpr::getSub(ConstantInt::get(CI->getType(), -1ULL), CI); - return new ICmpInst(ICmpInst::ICMP_UGT, X, R); - } - - // (X+1) >u X --> X <u (0-1) --> X != 255 - // (X+2) >u X --> X <u (0-2) --> X <u 254 - // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 - if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { - // If this is an NUW add, then this is always true. - if (isNUW) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); - return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); - } - - unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); - ConstantInt *SMax = ConstantInt::get(X->getContext(), - APInt::getSignedMaxValue(BitWidth)); - - // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127 - // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125 - // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0 - // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 - // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 - // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 - if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always false, if negative, this is always true. - if (isNSW) { - bool isTrue = CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - - return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); - } - - // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 - // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 - // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 - // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 - // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 - // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 - - // If this is an NSW add, then we have two cases: if the constant is - // positive, then this is always true, if negative, this is always false. - if (isNSW) { - bool isTrue = !CI->getValue().isNegative(); - return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); - } - - assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); - Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); - return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); -} - -/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS -/// and CmpRHS are both known to be integer constants. -Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, - ConstantInt *DivRHS) { - ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1)); - const APInt &CmpRHSV = CmpRHS->getValue(); - - // FIXME: If the operand types don't match the type of the divide - // then don't attempt this transform. The code below doesn't have the - // logic to deal with a signed divide and an unsigned compare (and - // vice versa). This is because (x /s C1) <s C2 produces different - // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even - // (x /u C1) <u C2. Simply casting the operands and result won't - // work. :( The if statement below tests that condition and bails - // if it finds it. - bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; - if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) - return 0; - if (DivRHS->isZero()) - return 0; // The ProdOV computation fails on divide by zero. - if (DivIsSigned && DivRHS->isAllOnesValue()) - return 0; // The overflow computation also screws up here - if (DivRHS->isOne()) - return 0; // Not worth bothering, and eliminates some funny cases - // with INT_MIN. - - // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and - // C2 (CI). By solving for X we can turn this into a range check - // instead of computing a divide. - Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); - - // Determine if the product overflows by seeing if the product is - // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. - bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : - ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; - - // Get the ICmp opcode - ICmpInst::Predicate Pred = ICI.getPredicate(); - - // Figure out the interval that is being checked. For example, a comparison - // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). - // Compute this interval based on the constants involved and the signedness of - // the compare/divide. This computes a half-open interval, keeping track of - // whether either value in the interval overflows. After analysis each - // overflow variable is set to 0 if it's corresponding bound variable is valid - // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. - int LoOverflow = 0, HiOverflow = 0; - Constant *LoBound = 0, *HiBound = 0; - - if (!DivIsSigned) { // udiv - // e.g. X/5 op 3 --> [15, 20) - LoBound = Prod; - HiOverflow = LoOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, Context, false); - } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. - if (CmpRHSV == 0) { // (X / pos) op 0 - // Can't overflow. e.g. X/2 op 0 --> [-1, 2) - LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS))); - HiBound = DivRHS; - } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos - LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) - HiOverflow = LoOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, Context, true); - } else { // (X / pos) op neg - // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) - HiBound = AddOne(Prod); - LoOverflow = HiOverflow = ProdOV ? -1 : 0; - if (!LoOverflow) { - ConstantInt* DivNeg = - cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); - LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, Context, - true) ? -1 : 0; - } - } - } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. - if (CmpRHSV == 0) { // (X / neg) op 0 - // e.g. X/-5 op 0 --> [-4, 5) - LoBound = AddOne(DivRHS); - HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS)); - if (HiBound == DivRHS) { // -INTMIN = INTMIN - HiOverflow = 1; // [INTMIN+1, overflow) - HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN - } - } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos - // e.g. X/-5 op 3 --> [-19, -14) - HiBound = AddOne(Prod); - HiOverflow = LoOverflow = ProdOV ? -1 : 0; - if (!LoOverflow) - LoOverflow = AddWithOverflow(LoBound, HiBound, - DivRHS, Context, true) ? -1 : 0; - } else { // (X / neg) op neg - LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) - LoOverflow = HiOverflow = ProdOV; - if (!HiOverflow) - HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, Context, true); - } - - // Dividing by a negative swaps the condition. LT <-> GT - Pred = ICmpInst::getSwappedPredicate(Pred); - } - - Value *X = DivI->getOperand(0); - switch (Pred) { - default: llvm_unreachable("Unhandled icmp opcode!"); - case ICmpInst::ICMP_EQ: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); - case ICmpInst::ICMP_NE: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - if (LoOverflow == +1) // Low bound is greater than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); - if (LoOverflow == -1) // Low bound is less than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); - return new ICmpInst(Pred, X, LoBound); - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - if (HiOverflow == +1) // High bound greater than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); - else if (HiOverflow == -1) // High bound less than input range. - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); - if (Pred == ICmpInst::ICMP_UGT) - return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); - else - return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); - } -} - - -/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". -/// -Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, - Instruction *LHSI, - ConstantInt *RHS) { - const APInt &RHSV = RHS->getValue(); - - switch (LHSI->getOpcode()) { - case Instruction::Trunc: - if (ICI.isEquality() && LHSI->hasOneUse()) { - // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all - // of the high bits truncated out of x are known. - unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), - SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); - APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); - APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); - - // If all the high bits are known, we can do this xform. - if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { - // Pull in the high bits from known-ones set. - APInt NewRHS(RHS->getValue()); - NewRHS.zext(SrcBits); - NewRHS |= KnownOne; - return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - ConstantInt::get(*Context, NewRHS)); - } - } - break; - - case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) - if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { - // If this is a comparison that tests the signbit (X < 0) or (x > -1), - // fold the xor. - if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || - (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { - Value *CompareVal = LHSI->getOperand(0); - - // If the sign bit of the XorCST is not set, there is no change to - // the operation, just stop using the Xor. - if (!XorCST->getValue().isNegative()) { - ICI.setOperand(0, CompareVal); - Worklist.Add(LHSI); - return &ICI; - } - - // Was the old condition true if the operand is positive? - bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT; - - // If so, the new one isn't. - isTrueIfPositive ^= true; - - if (isTrueIfPositive) - return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, - SubOne(RHS)); - else - return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, - AddOne(RHS)); - } - - if (LHSI->hasOneUse()) { - // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) - if (!ICI.isEquality() && XorCST->getValue().isSignBit()) { - const APInt &SignBit = XorCST->getValue(); - ICmpInst::Predicate Pred = ICI.isSigned() - ? ICI.getUnsignedPredicate() - : ICI.getSignedPredicate(); - return new ICmpInst(Pred, LHSI->getOperand(0), - ConstantInt::get(*Context, RHSV ^ SignBit)); - } - - // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) - if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { - const APInt &NotSignBit = XorCST->getValue(); - ICmpInst::Predicate Pred = ICI.isSigned() - ? ICI.getUnsignedPredicate() - : ICI.getSignedPredicate(); - Pred = ICI.getSwappedPredicate(Pred); - return new ICmpInst(Pred, LHSI->getOperand(0), - ConstantInt::get(*Context, RHSV ^ NotSignBit)); - } - } - } - break; - case Instruction::And: // (icmp pred (and X, AndCST), RHS) - if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) && - LHSI->getOperand(0)->hasOneUse()) { - ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1)); - - // If the LHS is an AND of a truncating cast, we can widen the - // and/compare to be the input width without changing the value - // produced, eliminating a cast. - if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) { - // We can do this transformation if either the AND constant does not - // have its sign bit set or if it is an equality comparison. - // Extending a relational comparison when we're checking the sign - // bit would not work. - if (Cast->hasOneUse() && - (ICI.isEquality() || - (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { - uint32_t BitWidth = - cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth(); - APInt NewCST = AndCST->getValue(); - NewCST.zext(BitWidth); - APInt NewCI = RHSV; - NewCI.zext(BitWidth); - Value *NewAnd = - Builder->CreateAnd(Cast->getOperand(0), - ConstantInt::get(*Context, NewCST), LHSI->getName()); - return new ICmpInst(ICI.getPredicate(), NewAnd, - ConstantInt::get(*Context, NewCI)); - } - } - - // If this is: (X >> C1) & C2 != C3 (where any shift and any compare - // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This - // happens a LOT in code produced by the C front-end, for bitfield - // access. - BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); - if (Shift && !Shift->isShift()) - Shift = 0; - - ConstantInt *ShAmt; - ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0; - const Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. - const Type *AndTy = AndCST->getType(); // Type of the and. - - // We can fold this as long as we can't shift unknown bits - // into the mask. This can only happen with signed shift - // rights, as they sign-extend. - if (ShAmt) { - bool CanFold = Shift->isLogicalShift(); - if (!CanFold) { - // To test for the bad case of the signed shr, see if any - // of the bits shifted in could be tested after the mask. - uint32_t TyBits = Ty->getPrimitiveSizeInBits(); - int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); - - uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); - if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & - AndCST->getValue()) == 0) - CanFold = true; - } - - if (CanFold) { - Constant *NewCst; - if (Shift->getOpcode() == Instruction::Shl) - NewCst = ConstantExpr::getLShr(RHS, ShAmt); - else - NewCst = ConstantExpr::getShl(RHS, ShAmt); - - // Check to see if we are shifting out any of the bits being - // compared. - if (ConstantExpr::get(Shift->getOpcode(), - NewCst, ShAmt) != RHS) { - // If we shifted bits out, the fold is not going to work out. - // As a special case, check to see if this means that the - // result is always true or false now. - if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); - if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); - } else { - ICI.setOperand(1, NewCst); - Constant *NewAndCST; - if (Shift->getOpcode() == Instruction::Shl) - NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); - else - NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); - LHSI->setOperand(1, NewAndCST); - LHSI->setOperand(0, Shift->getOperand(0)); - Worklist.Add(Shift); // Shift is dead. - return &ICI; - } - } - } - - // Turn ((X >> Y) & C) == 0 into (X & (C << Y)) == 0. The later is - // preferable because it allows the C<<Y expression to be hoisted out - // of a loop if Y is invariant and X is not. - if (Shift && Shift->hasOneUse() && RHSV == 0 && - ICI.isEquality() && !Shift->isArithmeticShift() && - !isa<Constant>(Shift->getOperand(0))) { - // Compute C << Y. - Value *NS; - if (Shift->getOpcode() == Instruction::LShr) { - NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp"); - } else { - // Insert a logical shift. - NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp"); - } - - // Compute X & (C << Y). - Value *NewAnd = - Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); - - ICI.setOperand(0, NewAnd); - return &ICI; - } - } - break; - - case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) - ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); - if (!ShAmt) break; - - uint32_t TypeBits = RHSV.getBitWidth(); - - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - if (ShAmt->uge(TypeBits)) - break; - - if (ICI.isEquality()) { - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - Constant *Comp = - ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), - ShAmt); - if (Comp != RHS) {// Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } - - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - Constant *Mask = - ConstantInt::get(*Context, APInt::getLowBitsSet(TypeBits, - TypeBits-ShAmtVal)); - - Value *And = - Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); - return new ICmpInst(ICI.getPredicate(), And, - ConstantInt::get(*Context, RHSV.lshr(ShAmtVal))); - } - } - - // Otherwise, if this is a comparison of the sign bit, simplify to and/test. - bool TrueIfSigned = false; - if (LHSI->hasOneUse() && - isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { - // (X << 31) <s 0 --> (X&1) != 0 - Constant *Mask = ConstantInt::get(*Context, APInt(TypeBits, 1) << - (TypeBits-ShAmt->getZExtValue()-1)); - Value *And = - Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); - return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, - And, Constant::getNullValue(And->getType())); - } - break; - } - - case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) - case Instruction::AShr: { - // Only handle equality comparisons of shift-by-constant. - ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); - if (!ShAmt || !ICI.isEquality()) break; - - // Check that the shift amount is in range. If not, don't perform - // undefined shifts. When the shift is visited it will be - // simplified. - uint32_t TypeBits = RHSV.getBitWidth(); - if (ShAmt->uge(TypeBits)) - break; - - uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); - - // If we are comparing against bits always shifted out, the - // comparison cannot succeed. - APInt Comp = RHSV << ShAmtVal; - if (LHSI->getOpcode() == Instruction::LShr) - Comp = Comp.lshr(ShAmtVal); - else - Comp = Comp.ashr(ShAmtVal); - - if (Comp != RHSV) { // Comparing against a bit that we know is zero. - bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE); - return ReplaceInstUsesWith(ICI, Cst); - } - - // Otherwise, check to see if the bits shifted out are known to be zero. - // If so, we can compare against the unshifted value: - // (X & 4) >> 1 == 2 --> (X & 4) == 4. - if (LHSI->hasOneUse() && - MaskedValueIsZero(LHSI->getOperand(0), - APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { - return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - ConstantExpr::getShl(RHS, ShAmt)); - } - - if (LHSI->hasOneUse()) { - // Otherwise strength reduce the shift into an and. - APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = ConstantInt::get(*Context, Val); - - Value *And = Builder->CreateAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - return new ICmpInst(ICI.getPredicate(), And, - ConstantExpr::getShl(RHS, ShAmt)); - } - break; - } - - case Instruction::SDiv: - case Instruction::UDiv: - // Fold: icmp pred ([us]div X, C1), C2 -> range test - // Fold this div into the comparison, producing a range check. - // Determine, based on the divide type, what the range is being - // checked. If there is an overflow on the low or high side, remember - // it, otherwise compute the range [low, hi) bounding the new value. - // See: InsertRangeTest above for the kinds of replacements possible. - if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) - if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI), - DivRHS)) - return R; - break; - - case Instruction::Add: - // Fold: icmp pred (add X, C1), C2 - if (!ICI.isEquality()) { - ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1)); - if (!LHSC) break; - const APInt &LHSV = LHSC->getValue(); - - ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV) - .subtract(LHSV); - - if (ICI.isSigned()) { - if (CR.getLower().isSignBit()) { - return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), - ConstantInt::get(*Context, CR.getUpper())); - } else if (CR.getUpper().isSignBit()) { - return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), - ConstantInt::get(*Context, CR.getLower())); - } - } else { - if (CR.getLower().isMinValue()) { - return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), - ConstantInt::get(*Context, CR.getUpper())); - } else if (CR.getUpper().isMinValue()) { - return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), - ConstantInt::get(*Context, CR.getLower())); - } - } - } - break; - } - - // Simplify icmp_eq and icmp_ne instructions with integer constant RHS. - if (ICI.isEquality()) { - bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - - // If the first operand is (add|sub|and|or|xor|rem) with a constant, and - // the second operand is a constant, simplify a bit. - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) { - switch (BO->getOpcode()) { - case Instruction::SRem: - // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. - if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){ - const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue(); - if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { - Value *NewRem = - Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), - BO->getName()); - return new ICmpInst(ICI.getPredicate(), NewRem, - Constant::getNullValue(BO->getType())); - } - } - break; - case Instruction::Add: - // Replace ((add A, B) != C) with (A != C-B) if B & C are constants. - if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) { - if (BO->hasOneUse()) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - ConstantExpr::getSub(RHS, BOp1C)); - } else if (RHSV == 0) { - // Replace ((add A, B) != 0) with (A != -B) if A or B is - // efficiently invertible, or if the add has just this one use. - Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); - - if (Value *NegVal = dyn_castNegVal(BOp1)) - return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0)) - return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); - else if (BO->hasOneUse()) { - Value *Neg = Builder->CreateNeg(BOp1); - Neg->takeName(BO); - return new ICmpInst(ICI.getPredicate(), BOp0, Neg); - } - } - break; - case Instruction::Xor: - // For the xor case, we can xor two constants together, eliminating - // the explicit xor. - if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - ConstantExpr::getXor(RHS, BOC)); - - // FALLTHROUGH - case Instruction::Sub: - // Replace (([sub|xor] A, B) != 0) with (A != B) - if (RHSV == 0) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - BO->getOperand(1)); - break; - - case Instruction::Or: - // If bits are being or'd in that are not present in the constant we - // are comparing against, then the comparison could never succeed! - if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { - Constant *NotCI = ConstantExpr::getNot(RHS); - if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) - return ReplaceInstUsesWith(ICI, - ConstantInt::get(Type::getInt1Ty(*Context), - isICMP_NE)); - } - break; - - case Instruction::And: - if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { - // If bits are being compared against that are and'd out, then the - // comparison can never succeed! - if ((RHSV & ~BOC->getValue()) != 0) - return ReplaceInstUsesWith(ICI, - ConstantInt::get(Type::getInt1Ty(*Context), - isICMP_NE)); - - // If we have ((X & C) == C), turn it into ((X & C) != 0). - if (RHS == BOC && RHSV.isPowerOf2()) - return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : - ICmpInst::ICMP_NE, LHSI, - Constant::getNullValue(RHS->getType())); - - // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 - if (BOC->getValue().isSignBit()) { - Value *X = BO->getOperand(0); - Constant *Zero = Constant::getNullValue(X->getType()); - ICmpInst::Predicate pred = isICMP_NE ? - ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; - return new ICmpInst(pred, X, Zero); - } - - // ((X & ~7) == 0) --> X < 8 - if (RHSV == 0 && isHighOnes(BOC)) { - Value *X = BO->getOperand(0); - Constant *NegX = ConstantExpr::getNeg(BOC); - ICmpInst::Predicate pred = isICMP_NE ? - ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; - return new ICmpInst(pred, X, NegX); - } - } - default: break; - } - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { - // Handle icmp {eq|ne} <intrinsic>, intcst. - if (II->getIntrinsicID() == Intrinsic::bswap) { - Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); - ICI.setOperand(1, ConstantInt::get(*Context, RHSV.byteSwap())); - return &ICI; - } - } - } - return 0; -} - -/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). -/// We only handle extending casts so far. -/// -Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { - const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0)); - Value *LHSCIOp = LHSCI->getOperand(0); - const Type *SrcTy = LHSCIOp->getType(); - const Type *DestTy = LHSCI->getType(); - Value *RHSCIOp; - - // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the - // integer type is the same size as the pointer type. - if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && - TD->getPointerSizeInBits() == - cast<IntegerType>(DestTy)->getBitWidth()) { - Value *RHSOp = 0; - if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { - RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); - } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) { - RHSOp = RHSC->getOperand(0); - // If the pointer types don't match, insert a bitcast. - if (LHSCIOp->getType() != RHSOp->getType()) - RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); - } - - if (RHSOp) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); - } - - // The code below only handles extension cast instructions, so far. - // Enforce this. - if (LHSCI->getOpcode() != Instruction::ZExt && - LHSCI->getOpcode() != Instruction::SExt) - return 0; - - bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; - bool isSignedCmp = ICI.isSigned(); - - if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) { - // Not an extension from the same type? - RHSCIOp = CI->getOperand(0); - if (RHSCIOp->getType() != LHSCIOp->getType()) - return 0; - - // If the signedness of the two casts doesn't agree (i.e. one is a sext - // and the other is a zext), then we can't handle this. - if (CI->getOpcode() != LHSCI->getOpcode()) - return 0; - - // Deal with equality cases early. - if (ICI.isEquality()) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); - - // A signed comparison of sign extended values simplifies into a - // signed comparison. - if (isSignedCmp && isSignedExt) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp); - - // The other three cases all fold into an unsigned comparison. - return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp); - } - - // If we aren't dealing with a constant on the RHS, exit early - ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1)); - if (!CI) - return 0; - - // Compute the constant that would happen if we truncated to SrcTy then - // reextended to DestTy. - Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); - Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), - Res1, DestTy); - - // If the re-extended constant didn't change... - if (Res2 == CI) { - // Deal with equality cases early. - if (ICI.isEquality()) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); - - // A signed comparison of sign extended values simplifies into a - // signed comparison. - if (isSignedExt && isSignedCmp) - return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); - - // The other three cases all fold into an unsigned comparison. - return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); - } - - // The re-extended constant changed so the constant cannot be represented - // in the shorter type. Consequently, we cannot emit a simple comparison. - - // First, handle some easy cases. We know the result cannot be equal at this - // point so handle the ICI.isEquality() cases - if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); - if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); - - // Evaluate the comparison for LT (we invert for GT below). LE and GE cases - // should have been folded away previously and not enter in here. - Value *Result; - if (isSignedCmp) { - // We're performing a signed comparison. - if (cast<ConstantInt>(CI)->getValue().isNegative()) - Result = ConstantInt::getFalse(*Context); // X < (small) --> false - else - Result = ConstantInt::getTrue(*Context); // X < (large) --> true - } else { - // We're performing an unsigned comparison. - if (isSignedExt) { - // We're performing an unsigned comp with a sign extended value. - // This is true if the input is >= 0. [aka >s -1] - Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); - } else { - // Unsigned extend & unsigned compare -> always true. - Result = ConstantInt::getTrue(*Context); - } - } - - // Finally, return the value computed. - if (ICI.getPredicate() == ICmpInst::ICMP_ULT || - ICI.getPredicate() == ICmpInst::ICMP_SLT) - return ReplaceInstUsesWith(ICI, Result); - - assert((ICI.getPredicate()==ICmpInst::ICMP_UGT || - ICI.getPredicate()==ICmpInst::ICMP_SGT) && - "ICmp should be folded!"); - if (Constant *CI = dyn_cast<Constant>(Result)) - return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); - return BinaryOperator::CreateNot(Result); -} - -Instruction *InstCombiner::visitShl(BinaryOperator &I) { - return commonShiftTransforms(I); -} - -Instruction *InstCombiner::visitLShr(BinaryOperator &I) { - return commonShiftTransforms(I); -} - -Instruction *InstCombiner::visitAShr(BinaryOperator &I) { - if (Instruction *R = commonShiftTransforms(I)) - return R; - - Value *Op0 = I.getOperand(0); - - // ashr int -1, X = -1 (for any arithmetic shift rights of ~0) - if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) - if (CSI->isAllOnesValue()) - return ReplaceInstUsesWith(I, CSI); - - // See if we can turn a signed shr into an unsigned shr. - if (MaskedValueIsZero(Op0, - APInt::getSignBit(I.getType()->getScalarSizeInBits()))) - return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); - - // Arithmetic shifting an all-sign-bit value is a no-op. - unsigned NumSignBits = ComputeNumSignBits(Op0); - if (NumSignBits == Op0->getType()->getScalarSizeInBits()) - return ReplaceInstUsesWith(I, Op0); - - return 0; -} - -Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { - assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // shl X, 0 == X and shr X, 0 == X - // shl 0, X == 0 and shr 0, X == 0 - if (Op1 == Constant::getNullValue(Op1->getType()) || - Op0 == Constant::getNullValue(Op0->getType())) - return ReplaceInstUsesWith(I, Op0); - - if (isa<UndefValue>(Op0)) { - if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef - return ReplaceInstUsesWith(I, Op0); - else // undef << X -> 0, undef >>u X -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - if (isa<UndefValue>(Op1)) { - if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X - return ReplaceInstUsesWith(I, Op0); - else // X << undef, X >>u undef -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - } - - // See if we can fold away this shift. - if (SimplifyDemandedInstructionBits(I)) - return &I; - - // Try to fold constant and into select arguments. - if (isa<Constant>(Op0)) - if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - - if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) - if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) - return Res; - return 0; -} - -Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, - BinaryOperator &I) { - bool isLeftShift = I.getOpcode() == Instruction::Shl; - - // See if we can simplify any instructions used by the instruction whose sole - // purpose is to compute bits we don't care about. - uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); - - // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate - // a signed shift. - // - if (Op1->uge(TypeBits)) { - if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); - else { - I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); - return &I; - } - } - - // ((X*C1) << C2) == (X * (C1 << C2)) - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) - if (BO->getOpcode() == Instruction::Mul && isLeftShift) - if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) - return BinaryOperator::CreateMul(BO->getOperand(0), - ConstantExpr::getShl(BOOp, Op1)); - - // Try to fold constant and into select arguments. - if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *R = FoldOpIntoSelect(I, SI, this)) - return R; - if (isa<PHINode>(Op0)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - - // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) - if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { - Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); - // If 'shift2' is an ashr, we would have to get the sign bit into a funny - // place. Don't try to do this transformation in this case. Also, we - // require that the input operand is a shift-by-constant so that we have - // confidence that the shifts will get folded together. We could do this - // xform in more cases, but it is unlikely to be profitable. - if (TrOp && I.isLogicalShift() && TrOp->isShift() && - isa<ConstantInt>(TrOp->getOperand(1))) { - // Okay, we'll do this xform. Make the shift of shift. - Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); - // (shift2 (shift1 & 0x00FF), c2) - Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); - - // For logical shifts, the truncation has the effect of making the high - // part of the register be zeros. Emulate this by inserting an AND to - // clear the top bits as needed. This 'and' will usually be zapped by - // other xforms later if dead. - unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); - unsigned DstSize = TI->getType()->getScalarSizeInBits(); - APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); - - // The mask we constructed says what the trunc would do if occurring - // between the shifts. We want to know the effect *after* the second - // shift. We know that it is a logical shift by a constant, so adjust the - // mask as appropriate. - if (I.getOpcode() == Instruction::Shl) - MaskV <<= Op1->getZExtValue(); - else { - assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); - MaskV = MaskV.lshr(Op1->getZExtValue()); - } - - // shift1 & 0x00FF - Value *And = Builder->CreateAnd(NSh, ConstantInt::get(*Context, MaskV), - TI->getName()); - - // Return the value truncated to the interesting size. - return new TruncInst(And, I.getType()); - } - } - - if (Op0->hasOneUse()) { - if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { - // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) - Value *V1, *V2; - ConstantInt *CC; - switch (Op0BO->getOpcode()) { - default: break; - case Instruction::Add: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - // These operators commute. - // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) - if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && - match(Op0BO->getOperand(1), m_Shr(m_Value(V1), - m_Specific(Op1)))) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); - // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, - Op0BO->getOperand(1)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); - } - - // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) - Value *Op0BOOp1 = Op0BO->getOperand(1); - if (isLeftShift && Op0BOOp1->hasOneUse() && - match(Op0BOOp1, - m_And(m_Shr(m_Value(V1), m_Specific(Op1)), - m_ConstantInt(CC))) && - cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, - Op0BO->getName()); - // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); - return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); - } - } - - // FALL THROUGH. - case Instruction::Sub: { - // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) - if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && - match(Op0BO->getOperand(0), m_Shr(m_Value(V1), - m_Specific(Op1)))) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); - // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, - Op0BO->getOperand(0)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); - } - - // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) - if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && - match(Op0BO->getOperand(0), - m_And(m_Shr(m_Value(V1), m_Value(V2)), - m_ConstantInt(CC))) && V2 == Op1 && - cast<BinaryOperator>(Op0BO->getOperand(0)) - ->getOperand(0)->hasOneUse()) { - Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); - // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); - - return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); - } - - break; - } - } - - - // If the operand is an bitwise operator with a constant RHS, and the - // shift is the only use, we can pull it out of the shift. - if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { - bool isValid = true; // Valid only for And, Or, Xor - bool highBitSet = false; // Transform if high bit of constant set? - - switch (Op0BO->getOpcode()) { - default: isValid = false; break; // Do not perform transform! - case Instruction::Add: - isValid = isLeftShift; - break; - case Instruction::Or: - case Instruction::Xor: - highBitSet = false; - break; - case Instruction::And: - highBitSet = true; - break; - } - - // If this is a signed shift right, and the high bit is modified - // by the logical operation, do not perform the transformation. - // The highBitSet boolean indicates the value of the high bit of - // the constant which would cause it to be modified for this - // operation. - // - if (isValid && I.getOpcode() == Instruction::AShr) - isValid = Op0C->getValue()[TypeBits-1] == highBitSet; - - if (isValid) { - Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); - - Value *NewShift = - Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); - NewShift->takeName(Op0BO); - - return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, - NewRHS); - } - } - } - } - - // Find out if this is a shift of a shift by a constant. - BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); - if (ShiftOp && !ShiftOp->isShift()) - ShiftOp = 0; - - if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { - ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); - uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); - uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); - assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); - if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. - Value *X = ShiftOp->getOperand(0); - - uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. - - const IntegerType *Ty = cast<IntegerType>(I.getType()); - - // Check for (X << c1) << c2 and (X >> c1) >> c2 - if (I.getOpcode() == ShiftOp->getOpcode()) { - // If this is oversized composite shift, then unsigned shifts get 0, ashr - // saturates. - if (AmtSum >= TypeBits) { - if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. - } - - return BinaryOperator::Create(I.getOpcode(), X, - ConstantInt::get(Ty, AmtSum)); - } - - if (ShiftOp->getOpcode() == Instruction::LShr && - I.getOpcode() == Instruction::AShr) { - if (AmtSum >= TypeBits) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. - return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); - } - - if (ShiftOp->getOpcode() == Instruction::AShr && - I.getOpcode() == Instruction::LShr) { - // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. - if (AmtSum >= TypeBits) - AmtSum = TypeBits-1; - - Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, ConstantInt::get(*Context, Mask)); - } - - // Okay, if we get here, one shift must be left, and the other shift must be - // right. See if the amounts are equal. - if (ShiftAmt1 == ShiftAmt2) { - // If we have ((X >>? C) << C), turn this into X & (-1 << C). - if (I.getOpcode() == Instruction::Shl) { - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask)); - } - // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). - if (I.getOpcode() == Instruction::LShr) { - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask)); - } - // We can simplify ((X << C) >>s C) into a trunc + sext. - // NOTE: we could do this for any C, but that would make 'unusual' integer - // types. For now, just stick to ones well-supported by the code - // generators. - const Type *SExtType = 0; - switch (Ty->getBitWidth() - ShiftAmt1) { - case 1 : - case 8 : - case 16 : - case 32 : - case 64 : - case 128: - SExtType = IntegerType::get(*Context, Ty->getBitWidth() - ShiftAmt1); - break; - default: break; - } - if (SExtType) - return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty); - // Otherwise, we can't handle it yet. - } else if (ShiftAmt1 < ShiftAmt2) { - uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; - - // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::LShr || - ShiftOp->getOpcode() == Instruction::AShr); - Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); - - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(*Context, Mask)); - } - - // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { - assert(ShiftOp->getOpcode() == Instruction::Shl); - Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(*Context, Mask)); - } - - // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. - } else { - assert(ShiftAmt2 < ShiftAmt1); - uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; - - // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::LShr || - ShiftOp->getOpcode() == Instruction::AShr); - Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, - ConstantInt::get(Ty, ShiftDiff)); - - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(*Context, Mask)); - } - - // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { - assert(ShiftOp->getOpcode() == Instruction::Shl); - Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(*Context, Mask)); - } - - // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. - } - } - return 0; -} - - -/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear -/// expression. If so, decompose it, returning some value X, such that Val is -/// X*Scale+Offset. -/// -static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, - int &Offset, LLVMContext *Context) { - assert(Val->getType() == Type::getInt32Ty(*Context) && - "Unexpected allocation size type!"); - if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { - Offset = CI->getZExtValue(); - Scale = 0; - return ConstantInt::get(Type::getInt32Ty(*Context), 0); - } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { - if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { - if (I->getOpcode() == Instruction::Shl) { - // This is a value scaled by '1 << the shift amt'. - Scale = 1U << RHS->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } else if (I->getOpcode() == Instruction::Mul) { - // This value is scaled by 'RHS'. - Scale = RHS->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } else if (I->getOpcode() == Instruction::Add) { - // We have X+C. Check to see if we really have (X*C2)+C1, - // where C1 is divisible by C2. - unsigned SubScale; - Value *SubVal = - DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, - Offset, Context); - Offset += RHS->getZExtValue(); - Scale = SubScale; - return SubVal; - } - } - } - - // Otherwise, we can't look past this. - Scale = 1; - Offset = 0; - return Val; -} - - -/// PromoteCastOfAllocation - If we find a cast of an allocation instruction, -/// try to eliminate the cast by moving the type information into the alloc. -Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, - AllocaInst &AI) { - const PointerType *PTy = cast<PointerType>(CI.getType()); - - BuilderTy AllocaBuilder(*Builder); - AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); - - // Remove any uses of AI that are dead. - assert(!CI.use_empty() && "Dead instructions should be removed earlier!"); - - for (Value::use_iterator UI = AI.use_begin(), E = AI.use_end(); UI != E; ) { - Instruction *User = cast<Instruction>(*UI++); - if (isInstructionTriviallyDead(User)) { - while (UI != E && *UI == User) - ++UI; // If this instruction uses AI more than once, don't break UI. - - ++NumDeadInst; - DEBUG(errs() << "IC: DCE: " << *User << '\n'); - EraseInstFromFunction(*User); - } - } - - // This requires TargetData to get the alloca alignment and size information. - if (!TD) return 0; - - // Get the type really allocated and the type casted to. - const Type *AllocElTy = AI.getAllocatedType(); - const Type *CastElTy = PTy->getElementType(); - if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; - - unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy); - unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy); - if (CastElTyAlign < AllocElTyAlign) return 0; - - // If the allocation has multiple uses, only promote it if we are strictly - // increasing the alignment of the resultant allocation. If we keep it the - // same, we open the door to infinite loops of various kinds. (A reference - // from a dbg.declare doesn't count as a use for this purpose.) - if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) && - CastElTyAlign == AllocElTyAlign) return 0; - - uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); - uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); - if (CastElTySize == 0 || AllocElTySize == 0) return 0; - - // See if we can satisfy the modulus by pulling a scale out of the array - // size argument. - unsigned ArraySizeScale; - int ArrayOffset; - Value *NumElements = // See if the array size is a decomposable linear expr. - DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, - ArrayOffset, Context); - - // If we can now satisfy the modulus, by using a non-1 scale, we really can - // do the xform. - if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || - (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; - - unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; - Value *Amt = 0; - if (Scale == 1) { - Amt = NumElements; - } else { - Amt = ConstantInt::get(Type::getInt32Ty(*Context), Scale); - // Insert before the alloca, not before the cast. - Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); - } - - if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { - Value *Off = ConstantInt::get(Type::getInt32Ty(*Context), Offset, true); - Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); - } - - AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); - New->setAlignment(AI.getAlignment()); - New->takeName(&AI); - - // If the allocation has one real use plus a dbg.declare, just remove the - // declare. - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) { - EraseInstFromFunction(*DI); - } - // If the allocation has multiple real uses, insert a cast and change all - // things that used it to use the new cast. This will also hack on CI, but it - // will die soon. - else if (!AI.hasOneUse()) { - // New is the allocation instruction, pointer typed. AI is the original - // allocation instruction, also pointer typed. Thus, cast to use is BitCast. - Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); - AI.replaceAllUsesWith(NewCast); - } - return ReplaceInstUsesWith(CI, New); -} - -/// CanEvaluateInDifferentType - Return true if we can take the specified value -/// and return it as type Ty without inserting any new casts and without -/// changing the computed value. This is used by code that tries to decide -/// whether promoting or shrinking integer operations to wider or smaller types -/// will allow us to eliminate a truncate or extend. -/// -/// This is a truncation operation if Ty is smaller than V->getType(), or an -/// extension operation if Ty is larger. -/// -/// If CastOpc is a truncation, then Ty will be a type smaller than V. We -/// should return true if trunc(V) can be computed by computing V in the smaller -/// type. If V is an instruction, then trunc(inst(x,y)) can be computed as -/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be -/// efficiently truncated. -/// -/// If CastOpc is a sext or zext, we are asking if the low bits of the value can -/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get -/// the final result. -bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, - unsigned CastOpc, - int &NumCastsRemoved){ - // We can always evaluate constants in another type. - if (isa<Constant>(V)) - return true; - - Instruction *I = dyn_cast<Instruction>(V); - if (!I) return false; - - const Type *OrigTy = V->getType(); - - // If this is an extension or truncate, we can often eliminate it. - if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) { - // If this is a cast from the destination type, we can trivially eliminate - // it, and this will remove a cast overall. - if (I->getOperand(0)->getType() == Ty) { - // If the first operand is itself a cast, and is eliminable, do not count - // this as an eliminable cast. We would prefer to eliminate those two - // casts first. - if (!isa<CastInst>(I->getOperand(0)) && I->hasOneUse()) - ++NumCastsRemoved; - return true; - } - } - - // We can't extend or shrink something that has multiple uses: doing so would - // require duplicating the instruction in general, which isn't profitable. - if (!I->hasOneUse()) return false; - - unsigned Opc = I->getOpcode(); - switch (Opc) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - // These operators can all arbitrarily be extended or truncated. - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved) && - CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, - NumCastsRemoved); - - case Instruction::UDiv: - case Instruction::URem: { - // UDiv and URem can be truncated if all the truncated bits are zero. - uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (BitWidth < OrigBitWidth) { - APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); - if (MaskedValueIsZero(I->getOperand(0), Mask) && - MaskedValueIsZero(I->getOperand(1), Mask)) { - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved) && - CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, - NumCastsRemoved); - } - } - break; - } - case Instruction::Shl: - // If we are truncating the result of this SHL, and if it's a shift of a - // constant amount, we can always perform a SHL in a smaller type. - if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (BitWidth < OrigTy->getScalarSizeInBits() && - CI->getLimitedValue(BitWidth) < BitWidth) - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved); - } - break; - case Instruction::LShr: - // If this is a truncate of a logical shr, we can truncate it to a smaller - // lshr iff we know that the bits we would otherwise be shifting in are - // already zeros. - if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); - uint32_t BitWidth = Ty->getScalarSizeInBits(); - if (BitWidth < OrigBitWidth && - MaskedValueIsZero(I->getOperand(0), - APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && - CI->getLimitedValue(BitWidth) < BitWidth) { - return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, - NumCastsRemoved); - } - } - break; - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::Trunc: - // If this is the same kind of case as our original (e.g. zext+zext), we - // can safely replace it. Note that replacing it does not reduce the number - // of casts in the input. - if (Opc == CastOpc) - return true; - - // sext (zext ty1), ty2 -> zext ty2 - if (CastOpc == Instruction::SExt && Opc == Instruction::ZExt) - return true; - break; - case Instruction::Select: { - SelectInst *SI = cast<SelectInst>(I); - return CanEvaluateInDifferentType(SI->getTrueValue(), Ty, CastOpc, - NumCastsRemoved) && - CanEvaluateInDifferentType(SI->getFalseValue(), Ty, CastOpc, - NumCastsRemoved); - } - case Instruction::PHI: { - // We can change a phi if we can change all operands. - PHINode *PN = cast<PHINode>(I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc, - NumCastsRemoved)) - return false; - return true; - } - default: - // TODO: Can handle more cases here. - break; - } - - return false; -} - -/// EvaluateInDifferentType - Given an expression that -/// CanEvaluateInDifferentType returns true for, actually insert the code to -/// evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, - bool isSigned) { - if (Constant *C = dyn_cast<Constant>(V)) - return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); - - // Otherwise, it must be an instruction. - Instruction *I = cast<Instruction>(V); - Instruction *Res = 0; - unsigned Opc = I->getOpcode(); - switch (Opc) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::AShr: - case Instruction::LShr: - case Instruction::Shl: - case Instruction::UDiv: - case Instruction::URem: { - Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); - Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); - Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); - break; - } - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - // If the source type of the cast is the type we're trying for then we can - // just return the source. There's no need to insert it because it is not - // new. - if (I->getOperand(0)->getType() == Ty) - return I->getOperand(0); - - // Otherwise, must be the same type of cast, so just reinsert a new one. - Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),Ty); - break; - case Instruction::Select: { - Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); - Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned); - Res = SelectInst::Create(I->getOperand(0), True, False); - break; - } - case Instruction::PHI: { - PHINode *OPN = cast<PHINode>(I); - PHINode *NPN = PHINode::Create(Ty); - for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { - Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); - NPN->addIncoming(V, OPN->getIncomingBlock(i)); - } - Res = NPN; - break; - } - default: - // TODO: Can handle more cases here. - llvm_unreachable("Unreachable!"); - break; - } - - Res->takeName(I); - return InsertNewInstBefore(Res, *I); -} - -/// @brief Implement the transforms common to all CastInst visitors. -Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { - Value *Src = CI.getOperand(0); - - // Many cases of "cast of a cast" are eliminable. If it's eliminable we just - // eliminate it now. - if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast - if (Instruction::CastOps opc = - isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { - // The first cast (CSrc) is eliminable so we need to fix up or replace - // the second cast (CI). CSrc will then have a good chance of being dead. - return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); - } - } - - // If we are casting a select then fold the cast into the select - if (SelectInst *SI = dyn_cast<SelectInst>(Src)) - if (Instruction *NV = FoldOpIntoSelect(CI, SI, this)) - return NV; - - // If we are casting a PHI then fold the cast into the PHI - if (isa<PHINode>(Src)) { - // We don't do this if this would create a PHI node with an illegal type if - // it is currently legal. - if (!isa<IntegerType>(Src->getType()) || - !isa<IntegerType>(CI.getType()) || - ShouldChangeType(CI.getType(), Src->getType(), TD)) - if (Instruction *NV = FoldOpIntoPhi(CI)) - return NV; - } - - return 0; -} - -/// FindElementAtOffset - Given a type and a constant offset, determine whether -/// or not there is a sequence of GEP indices into the type that will land us at -/// the specified offset. If so, fill them into NewIndices and return the -/// resultant element type, otherwise return null. -static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, - SmallVectorImpl<Value*> &NewIndices, - const TargetData *TD, - LLVMContext *Context) { - if (!TD) return 0; - if (!Ty->isSized()) return 0; - - // Start with the index over the outer type. Note that the type size - // might be zero (even if the offset isn't zero) if the indexed type - // is something like [0 x {int, int}] - const Type *IntPtrTy = TD->getIntPtrType(*Context); - int64_t FirstIdx = 0; - if (int64_t TySize = TD->getTypeAllocSize(Ty)) { - FirstIdx = Offset/TySize; - Offset -= FirstIdx*TySize; - - // Handle hosts where % returns negative instead of values [0..TySize). - if (Offset < 0) { - --FirstIdx; - Offset += TySize; - assert(Offset >= 0); - } - assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); - } - - NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); - - // Index into the types. If we fail, set OrigBase to null. - while (Offset) { - // Indexing into tail padding between struct/array elements. - if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) - return 0; - - if (const StructType *STy = dyn_cast<StructType>(Ty)) { - const StructLayout *SL = TD->getStructLayout(STy); - assert(Offset < (int64_t)SL->getSizeInBytes() && - "Offset must stay within the indexed type"); - - unsigned Elt = SL->getElementContainingOffset(Offset); - NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Elt)); - - Offset -= SL->getElementOffset(Elt); - Ty = STy->getElementType(Elt); - } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { - uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); - assert(EltSize && "Cannot index into a zero-sized array"); - NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); - Offset %= EltSize; - Ty = AT->getElementType(); - } else { - // Otherwise, we can't index into the middle of this atomic type, bail. - return 0; - } - } - - return Ty; -} - -/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) -Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { - Value *Src = CI.getOperand(0); - - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { - // If casting the result of a getelementptr instruction with no offset, turn - // this into a cast of the original pointer! - if (GEP->hasAllZeroIndices()) { - // Changing the cast operand is usually not a good idea but it is safe - // here because the pointer operand is being replaced with another - // pointer operand so the opcode doesn't need to change. - Worklist.Add(GEP); - CI.setOperand(0, GEP->getOperand(0)); - return &CI; - } - - // If the GEP has a single use, and the base pointer is a bitcast, and the - // GEP computes a constant offset, see if we can convert these three - // instructions into fewer. This typically happens with unions and other - // non-type-safe code. - if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) { - if (GEP->hasAllConstantIndices()) { - // We are guaranteed to get a constant from EmitGEPOffset. - ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP, *this)); - int64_t Offset = OffsetV->getSExtValue(); - - // Get the base pointer input of the bitcast, and the type it points to. - Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); - const Type *GEPIdxTy = - cast<PointerType>(OrigBase->getType())->getElementType(); - SmallVector<Value*, 8> NewIndices; - if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices, TD, Context)) { - // If we were able to index down into an element, create the GEP - // and bitcast the result. This eliminates one bitcast, potentially - // two. - Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, - NewIndices.begin(), NewIndices.end()) : - Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); - NGEP->takeName(GEP); - - if (isa<BitCastInst>(CI)) - return new BitCastInst(NGEP, CI.getType()); - assert(isa<PtrToIntInst>(CI)); - return new PtrToIntInst(NGEP, CI.getType()); - } - } - } - } - - return commonCastTransforms(CI); -} - -/// commonIntCastTransforms - This function implements the common transforms -/// for trunc, zext, and sext. -Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { - if (Instruction *Result = commonCastTransforms(CI)) - return Result; - - Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(); - const Type *DestTy = CI.getType(); - uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); - uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - - // See if we can simplify any instructions used by the LHS whose sole - // purpose is to compute bits we don't care about. - if (SimplifyDemandedInstructionBits(CI)) - return &CI; - - // If the source isn't an instruction or has more than one use then we - // can't do anything more. - Instruction *SrcI = dyn_cast<Instruction>(Src); - if (!SrcI || !Src->hasOneUse()) - return 0; - - // Attempt to propagate the cast into the instruction for int->int casts. - int NumCastsRemoved = 0; - // Only do this if the dest type is a simple type, don't convert the - // expression tree to something weird like i93 unless the source is also - // strange. - if ((isa<VectorType>(DestTy) || - ShouldChangeType(SrcI->getType(), DestTy, TD)) && - CanEvaluateInDifferentType(SrcI, DestTy, - CI.getOpcode(), NumCastsRemoved)) { - // If this cast is a truncate, evaluting in a different type always - // eliminates the cast, so it is always a win. If this is a zero-extension, - // we need to do an AND to maintain the clear top-part of the computation, - // so we require that the input have eliminated at least one cast. If this - // is a sign extension, we insert two new casts (to do the extension) so we - // require that two casts have been eliminated. - bool DoXForm = false; - bool JustReplace = false; - switch (CI.getOpcode()) { - default: - // All the others use floating point so we shouldn't actually - // get here because of the check above. - llvm_unreachable("Unknown cast type"); - case Instruction::Trunc: - DoXForm = true; - break; - case Instruction::ZExt: { - DoXForm = NumCastsRemoved >= 1; - - if (!DoXForm && 0) { - // If it's unnecessary to issue an AND to clear the high bits, it's - // always profitable to do this xform. - Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, false); - APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); - if (MaskedValueIsZero(TryRes, Mask)) - return ReplaceInstUsesWith(CI, TryRes); - - if (Instruction *TryI = dyn_cast<Instruction>(TryRes)) - if (TryI->use_empty()) - EraseInstFromFunction(*TryI); - } - break; - } - case Instruction::SExt: { - DoXForm = NumCastsRemoved >= 2; - if (!DoXForm && !isa<TruncInst>(SrcI) && 0) { - // If we do not have to emit the truncate + sext pair, then it's always - // profitable to do this xform. - // - // It's not safe to eliminate the trunc + sext pair if one of the - // eliminated cast is a truncate. e.g. - // t2 = trunc i32 t1 to i16 - // t3 = sext i16 t2 to i32 - // != - // i32 t1 - Value *TryRes = EvaluateInDifferentType(SrcI, DestTy, true); - unsigned NumSignBits = ComputeNumSignBits(TryRes); - if (NumSignBits > (DestBitSize - SrcBitSize)) - return ReplaceInstUsesWith(CI, TryRes); - - if (Instruction *TryI = dyn_cast<Instruction>(TryRes)) - if (TryI->use_empty()) - EraseInstFromFunction(*TryI); - } - break; - } - } - - if (DoXForm) { - DEBUG(errs() << "ICE: EvaluateInDifferentType converting expression type" - " to avoid cast: " << CI); - Value *Res = EvaluateInDifferentType(SrcI, DestTy, - CI.getOpcode() == Instruction::SExt); - if (JustReplace) - // Just replace this cast with the result. - return ReplaceInstUsesWith(CI, Res); - - assert(Res->getType() == DestTy); - switch (CI.getOpcode()) { - default: llvm_unreachable("Unknown cast type!"); - case Instruction::Trunc: - // Just replace this cast with the result. - return ReplaceInstUsesWith(CI, Res); - case Instruction::ZExt: { - assert(SrcBitSize < DestBitSize && "Not a zext?"); - - // If the high bits are already zero, just replace this cast with the - // result. - APInt Mask(APInt::getBitsSet(DestBitSize, SrcBitSize, DestBitSize)); - if (MaskedValueIsZero(Res, Mask)) - return ReplaceInstUsesWith(CI, Res); - - // We need to emit an AND to clear the high bits. - Constant *C = ConstantInt::get(*Context, - APInt::getLowBitsSet(DestBitSize, SrcBitSize)); - return BinaryOperator::CreateAnd(Res, C); - } - case Instruction::SExt: { - // If the high bits are already filled with sign bit, just replace this - // cast with the result. - unsigned NumSignBits = ComputeNumSignBits(Res); - if (NumSignBits > (DestBitSize - SrcBitSize)) - return ReplaceInstUsesWith(CI, Res); - - // We need to emit a cast to truncate, then a cast to sext. - return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy); - } - } - } - } - - Value *Op0 = SrcI->getNumOperands() > 0 ? SrcI->getOperand(0) : 0; - Value *Op1 = SrcI->getNumOperands() > 1 ? SrcI->getOperand(1) : 0; - - switch (SrcI->getOpcode()) { - case Instruction::Add: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - // If we are discarding information, rewrite. - if (DestBitSize < SrcBitSize && DestBitSize != 1) { - // Don't insert two casts unless at least one can be eliminated. - if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || - !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { - Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); - Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); - return BinaryOperator::Create( - cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c); - } - } - - // cast (xor bool X, true) to int --> xor (cast bool X to int), 1 - if (isa<ZExtInst>(CI) && SrcBitSize == 1 && - SrcI->getOpcode() == Instruction::Xor && - Op1 == ConstantInt::getTrue(*Context) && - (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) { - Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName()); - return BinaryOperator::CreateXor(New, - ConstantInt::get(CI.getType(), 1)); - } - break; - - case Instruction::Shl: { - // Canonicalize trunc inside shl, if we can. - ConstantInt *CI = dyn_cast<ConstantInt>(Op1); - if (CI && DestBitSize < SrcBitSize && - CI->getLimitedValue(DestBitSize) < DestBitSize) { - Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); - Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); - return BinaryOperator::CreateShl(Op0c, Op1c); - } - break; - } - } - return 0; -} - -Instruction *InstCombiner::visitTrunc(TruncInst &CI) { - if (Instruction *Result = commonIntCastTransforms(CI)) - return Result; - - Value *Src = CI.getOperand(0); - const Type *Ty = CI.getType(); - uint32_t DestBitWidth = Ty->getScalarSizeInBits(); - uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits(); - - // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0) - if (DestBitWidth == 1) { - Constant *One = ConstantInt::get(Src->getType(), 1); - Src = Builder->CreateAnd(Src, One, "tmp"); - Value *Zero = Constant::getNullValue(Src->getType()); - return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); - } - - // Optimize trunc(lshr(), c) to pull the shift through the truncate. - ConstantInt *ShAmtV = 0; - Value *ShiftOp = 0; - if (Src->hasOneUse() && - match(Src, m_LShr(m_Value(ShiftOp), m_ConstantInt(ShAmtV)))) { - uint32_t ShAmt = ShAmtV->getLimitedValue(SrcBitWidth); - - // Get a mask for the bits shifting in. - APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth)); - if (MaskedValueIsZero(ShiftOp, Mask)) { - if (ShAmt >= DestBitWidth) // All zeros. - return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty)); - - // Okay, we can shrink this. Truncate the input, then return a new - // shift. - Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName()); - Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty); - return BinaryOperator::CreateLShr(V1, V2); - } - } - - return 0; -} - -/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations -/// in order to eliminate the icmp. -Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, - bool DoXform) { - // If we are just checking for a icmp eq of a single bit and zext'ing it - // to an integer, then shift the bit to the appropriate place and then - // cast to integer to avoid the comparison. - if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) { - const APInt &Op1CV = Op1C->getValue(); - - // zext (x <s 0) to i32 --> x>>u31 true if signbit set. - // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. - if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || - (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) { - if (!DoXform) return ICI; - - Value *In = ICI->getOperand(0); - Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getScalarSizeInBits()-1); - In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); - if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp"); - - if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { - Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, In->getName()+".not"); - } - - return ReplaceInstUsesWith(CI, In); - } - - - - // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. - // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - // zext (X == 1) to i32 --> X iff X has only the low bit set. - // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set. - // zext (X != 0) to i32 --> X iff X has only the low bit set. - // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. - // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. - // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV == 0 || Op1CV.isPowerOf2()) && - // This only works for EQ and NE - ICI->isEquality()) { - // If Op1C some other power of two, convert: - uint32_t BitWidth = Op1C->getType()->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne); - - APInt KnownZeroMask(~KnownZero); - if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? - if (!DoXform) return ICI; - - bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; - if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { - // (X&4) == 2 --> false - // (X&4) != 2 --> true - Constant *Res = ConstantInt::get(Type::getInt1Ty(*Context), isNE); - Res = ConstantExpr::getZExt(Res, CI.getType()); - return ReplaceInstUsesWith(CI, Res); - } - - uint32_t ShiftAmt = KnownZeroMask.logBase2(); - Value *In = ICI->getOperand(0); - if (ShiftAmt) { - // Perform a logical shr by shiftamt. - // Insert the shift to put the result in the low bit. - In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), - In->getName()+".lobit"); - } - - if ((Op1CV != 0) == isNE) { // Toggle the low bit. - Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, "tmp"); - } - - if (CI.getType() == In->getType()) - return ReplaceInstUsesWith(CI, In); - else - return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); - } - } - } - - // icmp ne A, B is equal to xor A, B when A and B only really have one bit. - // It is also profitable to transform icmp eq into not(xor(A, B)) because that - // may lead to additional simplifications. - if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { - if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { - uint32_t BitWidth = ITy->getBitWidth(); - Value *LHS = ICI->getOperand(0); - Value *RHS = ICI->getOperand(1); - - APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); - APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); - ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); - - if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { - APInt KnownBits = KnownZeroLHS | KnownOneLHS; - APInt UnknownBit = ~KnownBits; - if (UnknownBit.countPopulation() == 1) { - if (!DoXform) return ICI; - - Value *Result = Builder->CreateXor(LHS, RHS); - - // Mask off any bits that are set and won't be shifted away. - if (KnownOneLHS.uge(UnknownBit)) - Result = Builder->CreateAnd(Result, - ConstantInt::get(ITy, UnknownBit)); - - // Shift the bit we're testing down to the lsb. - Result = Builder->CreateLShr( - Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); - - if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); - Result->takeName(ICI); - return ReplaceInstUsesWith(CI, Result); - } - } - } - } - - return 0; -} - -Instruction *InstCombiner::visitZExt(ZExtInst &CI) { - // If one of the common conversion will work .. - if (Instruction *Result = commonIntCastTransforms(CI)) - return Result; - - Value *Src = CI.getOperand(0); - - // If this is a TRUNC followed by a ZEXT then we are dealing with integral - // types and if the sizes are just right we can convert this into a logical - // 'and' which will be much cheaper than the pair of casts. - if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast - // Get the sizes of the types involved. We know that the intermediate type - // will be smaller than A or C, but don't know the relation between A and C. - Value *A = CSrc->getOperand(0); - unsigned SrcSize = A->getType()->getScalarSizeInBits(); - unsigned MidSize = CSrc->getType()->getScalarSizeInBits(); - unsigned DstSize = CI.getType()->getScalarSizeInBits(); - // If we're actually extending zero bits, then if - // SrcSize < DstSize: zext(a & mask) - // SrcSize == DstSize: a & mask - // SrcSize > DstSize: trunc(a) & mask - if (SrcSize < DstSize) { - APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - Constant *AndConst = ConstantInt::get(A->getType(), AndValue); - Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); - return new ZExtInst(And, CI.getType()); - } - - if (SrcSize == DstSize) { - APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), - AndValue)); - } - if (SrcSize > DstSize) { - Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp"); - APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); - return BinaryOperator::CreateAnd(Trunc, - ConstantInt::get(Trunc->getType(), - AndValue)); - } - } - - if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) - return transformZExtICmp(ICI, CI); - - BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src); - if (SrcI && SrcI->getOpcode() == Instruction::Or) { - // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one - // of the (zext icmp) will be transformed. - ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0)); - ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1)); - if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && - (transformZExtICmp(LHS, CI, false) || - transformZExtICmp(RHS, CI, false))) { - Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); - Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); - return BinaryOperator::Create(Instruction::Or, LCast, RCast); - } - } - - // zext(trunc(t) & C) -> (t & zext(C)). - if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse()) - if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) - if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) { - Value *TI0 = TI->getOperand(0); - if (TI0->getType() == CI.getType()) - return - BinaryOperator::CreateAnd(TI0, - ConstantExpr::getZExt(C, CI.getType())); - } - - // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). - if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse()) - if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) - if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0))) - if (And->getOpcode() == Instruction::And && And->hasOneUse() && - And->getOperand(1) == C) - if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) { - Value *TI0 = TI->getOperand(0); - if (TI0->getType() == CI.getType()) { - Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); - Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp"); - return BinaryOperator::CreateXor(NewAnd, ZC); - } - } - - return 0; -} - -Instruction *InstCombiner::visitSExt(SExtInst &CI) { - if (Instruction *I = commonIntCastTransforms(CI)) - return I; - - Value *Src = CI.getOperand(0); - - // Canonicalize sign-extend from i1 to a select. - if (Src->getType() == Type::getInt1Ty(*Context)) - return SelectInst::Create(Src, - Constant::getAllOnesValue(CI.getType()), - Constant::getNullValue(CI.getType())); - - // See if the value being truncated is already sign extended. If so, just - // eliminate the trunc/sext pair. - if (Operator::getOpcode(Src) == Instruction::Trunc) { - Value *Op = cast<User>(Src)->getOperand(0); - unsigned OpBits = Op->getType()->getScalarSizeInBits(); - unsigned MidBits = Src->getType()->getScalarSizeInBits(); - unsigned DestBits = CI.getType()->getScalarSizeInBits(); - unsigned NumSignBits = ComputeNumSignBits(Op); - - if (OpBits == DestBits) { - // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign - // bits, it is already ready. - if (NumSignBits > DestBits-MidBits) - return ReplaceInstUsesWith(CI, Op); - } else if (OpBits < DestBits) { - // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign - // bits, just sext from i32. - if (NumSignBits > OpBits-MidBits) - return new SExtInst(Op, CI.getType(), "tmp"); - } else { - // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign - // bits, just truncate to i32. - if (NumSignBits > OpBits-MidBits) - return new TruncInst(Op, CI.getType(), "tmp"); - } - } - - // If the input is a shl/ashr pair of a same constant, then this is a sign - // extension from a smaller value. If we could trust arbitrary bitwidth - // integers, we could turn this into a truncate to the smaller bit and then - // use a sext for the whole extension. Since we don't, look deeper and check - // for a truncate. If the source and dest are the same type, eliminate the - // trunc and extend and just do shifts. For example, turn: - // %a = trunc i32 %i to i8 - // %b = shl i8 %a, 6 - // %c = ashr i8 %b, 6 - // %d = sext i8 %c to i32 - // into: - // %a = shl i32 %i, 30 - // %d = ashr i32 %a, 30 - Value *A = 0; - ConstantInt *BA = 0, *CA = 0; - if (match(Src, m_AShr(m_Shl(m_Value(A), m_ConstantInt(BA)), - m_ConstantInt(CA))) && - BA == CA && isa<TruncInst>(A)) { - Value *I = cast<TruncInst>(A)->getOperand(0); - if (I->getType() == CI.getType()) { - unsigned MidSize = Src->getType()->getScalarSizeInBits(); - unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); - unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; - Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); - I = Builder->CreateShl(I, ShAmtV, CI.getName()); - return BinaryOperator::CreateAShr(I, ShAmtV); - } - } - - return 0; -} - -/// FitsInFPType - Return a Constant* for the specified FP constant if it fits -/// in the specified FP type without changing its value. -static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem, - LLVMContext *Context) { - bool losesInfo; - APFloat F = CFP->getValueAPF(); - (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); - if (!losesInfo) - return ConstantFP::get(*Context, F); - return 0; -} - -/// LookThroughFPExtensions - If this is an fp extension instruction, look -/// through it until we get the source value. -static Value *LookThroughFPExtensions(Value *V, LLVMContext *Context) { - if (Instruction *I = dyn_cast<Instruction>(V)) - if (I->getOpcode() == Instruction::FPExt) - return LookThroughFPExtensions(I->getOperand(0), Context); - - // If this value is a constant, return the constant in the smallest FP type - // that can accurately represent it. This allows us to turn - // (float)((double)X+2.0) into x+2.0f. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { - if (CFP->getType() == Type::getPPC_FP128Ty(*Context)) - return V; // No constant folding of this. - // See if the value can be truncated to float and then reextended. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle, Context)) - return V; - if (CFP->getType() == Type::getDoubleTy(*Context)) - return V; // Won't shrink. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble, Context)) - return V; - // Don't try to shrink to various long double types. - } - - return V; -} - -Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { - if (Instruction *I = commonCastTransforms(CI)) - return I; - - // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are - // smaller than the destination type, we can eliminate the truncate by doing - // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well as - // many builtins (sqrt, etc). - BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0)); - if (OpI && OpI->hasOneUse()) { - switch (OpI->getOpcode()) { - default: break; - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - const Type *SrcTy = OpI->getType(); - Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0), Context); - Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1), Context); - if (LHSTrunc->getType() != SrcTy && - RHSTrunc->getType() != SrcTy) { - unsigned DstSize = CI.getType()->getScalarSizeInBits(); - // If the source types were both smaller than the destination type of - // the cast, do this xform. - if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && - RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { - LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); - RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); - return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); - } - } - break; - } - } - return 0; -} - -Instruction *InstCombiner::visitFPExt(CastInst &CI) { - return commonCastTransforms(CI); -} - -Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { - Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); - if (OpI == 0) - return commonCastTransforms(FI); - - // fptoui(uitofp(X)) --> X - // fptoui(sitofp(X)) --> X - // This is safe if the intermediate type has enough bits in its mantissa to - // accurately represent all values of X. For example, do not do this with - // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. - if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && - OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ - OpI->getType()->getFPMantissaWidth()) - return ReplaceInstUsesWith(FI, OpI->getOperand(0)); - - return commonCastTransforms(FI); -} - -Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { - Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); - if (OpI == 0) - return commonCastTransforms(FI); - - // fptosi(sitofp(X)) --> X - // fptosi(uitofp(X)) --> X - // This is safe if the intermediate type has enough bits in its mantissa to - // accurately represent all values of X. For example, do not do this with - // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. - if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && - OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getScalarSizeInBits() <= - OpI->getType()->getFPMantissaWidth()) - return ReplaceInstUsesWith(FI, OpI->getOperand(0)); - - return commonCastTransforms(FI); -} - -Instruction *InstCombiner::visitUIToFP(CastInst &CI) { - return commonCastTransforms(CI); -} - -Instruction *InstCombiner::visitSIToFP(CastInst &CI) { - return commonCastTransforms(CI); -} - -Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { - // If the destination integer type is smaller than the intptr_t type for - // this target, do a ptrtoint to intptr_t then do a trunc. This allows the - // trunc to be exposed to other transforms. Don't do this for extending - // ptrtoint's, because we don't know if the target sign or zero extends its - // pointers. - if (TD && - CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), - "tmp"); - return new TruncInst(P, CI.getType()); - } - - return commonPointerCastTransforms(CI); -} - -Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { - // If the source integer type is larger than the intptr_t type for - // this target, do a trunc to the intptr_t type, then inttoptr of it. This - // allows the trunc to be exposed to other transforms. Don't do this for - // extending inttoptr's, because we don't know if the target sign or zero - // extends to pointers. - if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() > - TD->getPointerSizeInBits()) { - Value *P = Builder->CreateTrunc(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), "tmp"); - return new IntToPtrInst(P, CI.getType()); - } - - if (Instruction *I = commonCastTransforms(CI)) - return I; - - return 0; -} - -Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { - // If the operands are integer typed then apply the integer transforms, - // otherwise just apply the common ones. - Value *Src = CI.getOperand(0); - const Type *SrcTy = Src->getType(); - const Type *DestTy = CI.getType(); - - if (isa<PointerType>(SrcTy)) { - if (Instruction *I = commonPointerCastTransforms(CI)) - return I; - } else { - if (Instruction *Result = commonCastTransforms(CI)) - return Result; - } - - - // Get rid of casts from one type to the same type. These are useless and can - // be replaced by the operand. - if (DestTy == Src->getType()) - return ReplaceInstUsesWith(CI, Src); - - if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) { - const PointerType *SrcPTy = cast<PointerType>(SrcTy); - const Type *DstElTy = DstPTy->getElementType(); - const Type *SrcElTy = SrcPTy->getElementType(); - - // If the address spaces don't match, don't eliminate the bitcast, which is - // required for changing types. - if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) - return 0; - - // If we are casting a alloca to a pointer to a type of the same - // size, rewrite the allocation instruction to allocate the "right" type. - // There is no need to modify malloc calls because it is their bitcast that - // needs to be cleaned up. - if (AllocaInst *AI = dyn_cast<AllocaInst>(Src)) - if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) - return V; - - // If the source and destination are pointers, and this cast is equivalent - // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. - // This can enhance SROA and other transforms that want type-safe pointers. - Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(*Context)); - unsigned NumZeros = 0; - while (SrcElTy != DstElTy && - isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) && - SrcElTy->getNumContainedTypes() /* not "{}" */) { - SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt); - ++NumZeros; - } - - // If we found a path from the src to dest, create the getelementptr now. - if (SrcElTy == DstElTy) { - SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); - return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), "", - ((Instruction*) NULL)); - } - } - - if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { - if (DestVTy->getNumElements() == 1) { - if (!isa<VectorType>(SrcTy)) { - Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); - return InsertElementInst::Create(UndefValue::get(DestTy), Elem, - Constant::getNullValue(Type::getInt32Ty(*Context))); - } - // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) - } - } - - if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { - if (SrcVTy->getNumElements() == 1) { - if (!isa<VectorType>(DestTy)) { - Value *Elem = - Builder->CreateExtractElement(Src, - Constant::getNullValue(Type::getInt32Ty(*Context))); - return CastInst::Create(Instruction::BitCast, Elem, DestTy); - } - } - } - - if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) { - if (SVI->hasOneUse()) { - // Okay, we have (bitconvert (shuffle ..)). Check to see if this is - // a bitconvert to a vector with the same # elts. - if (isa<VectorType>(DestTy) && - cast<VectorType>(DestTy)->getNumElements() == - SVI->getType()->getNumElements() && - SVI->getType()->getNumElements() == - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) { - CastInst *Tmp; - // If either of the operands is a cast from CI.getType(), then - // evaluating the shuffle in the casted destination's type will allow - // us to eliminate at least one cast. - if (((Tmp = dyn_cast<CastInst>(SVI->getOperand(0))) && - Tmp->getOperand(0)->getType() == DestTy) || - ((Tmp = dyn_cast<CastInst>(SVI->getOperand(1))) && - Tmp->getOperand(0)->getType() == DestTy)) { - Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); - Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); - // Return a new shuffle vector. Use the same element ID's, as we - // know the vector types match #elts. - return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); - } - } - } - } - return 0; -} - -/// GetSelectFoldableOperands - We want to turn code that looks like this: -/// %C = or %A, %B -/// %D = select %cond, %C, %A -/// into: -/// %C = select %cond, %B, 0 -/// %D = or %A, %C -/// -/// Assuming that the specified instruction is an operand to the select, return -/// a bitmask indicating which operands of this instruction are foldable if they -/// equal the other incoming value of the select. -/// -static unsigned GetSelectFoldableOperands(Instruction *I) { - switch (I->getOpcode()) { - case Instruction::Add: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - return 3; // Can fold through either operand. - case Instruction::Sub: // Can only fold on the amount subtracted. - case Instruction::Shl: // Can only fold on the shift amount. - case Instruction::LShr: - case Instruction::AShr: - return 1; - default: - return 0; // Cannot fold - } -} - -/// GetSelectFoldableConstant - For the same transformation as the previous -/// function, return the identity constant that goes into the select. -static Constant *GetSelectFoldableConstant(Instruction *I, - LLVMContext *Context) { - switch (I->getOpcode()) { - default: llvm_unreachable("This cannot happen!"); - case Instruction::Add: - case Instruction::Sub: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - return Constant::getNullValue(I->getType()); - case Instruction::And: - return Constant::getAllOnesValue(I->getType()); - case Instruction::Mul: - return ConstantInt::get(I->getType(), 1); - } -} - -/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI -/// have the same opcode and only one use each. Try to simplify this. -Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, - Instruction *FI) { - if (TI->getNumOperands() == 1) { - // If this is a non-volatile load or a cast from the same type, - // merge. - if (TI->isCast()) { - if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) - return 0; - } else { - return 0; // unknown unary op. - } - - // Fold this by inserting a select from the input values. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), - FI->getOperand(0), SI.getName()+".v"); - InsertNewInstBefore(NewSI, SI); - return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, - TI->getType()); - } - - // Only handle binary operators here. - if (!isa<BinaryOperator>(TI)) - return 0; - - // Figure out if the operations have any operands in common. - Value *MatchOp, *OtherOpT, *OtherOpF; - bool MatchIsOpZero; - if (TI->getOperand(0) == FI->getOperand(0)) { - MatchOp = TI->getOperand(0); - OtherOpT = TI->getOperand(1); - OtherOpF = FI->getOperand(1); - MatchIsOpZero = true; - } else if (TI->getOperand(1) == FI->getOperand(1)) { - MatchOp = TI->getOperand(1); - OtherOpT = TI->getOperand(0); - OtherOpF = FI->getOperand(0); - MatchIsOpZero = false; - } else if (!TI->isCommutative()) { - return 0; - } else if (TI->getOperand(0) == FI->getOperand(1)) { - MatchOp = TI->getOperand(0); - OtherOpT = TI->getOperand(1); - OtherOpF = FI->getOperand(0); - MatchIsOpZero = true; - } else if (TI->getOperand(1) == FI->getOperand(0)) { - MatchOp = TI->getOperand(1); - OtherOpT = TI->getOperand(0); - OtherOpF = FI->getOperand(1); - MatchIsOpZero = true; - } else { - return 0; - } - - // If we reach here, they do have operations in common. - SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT, - OtherOpF, SI.getName()+".v"); - InsertNewInstBefore(NewSI, SI); - - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) { - if (MatchIsOpZero) - return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI); - else - return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); - } - llvm_unreachable("Shouldn't get here"); - return 0; -} - -static bool isSelect01(Constant *C1, Constant *C2) { - ConstantInt *C1I = dyn_cast<ConstantInt>(C1); - if (!C1I) - return false; - ConstantInt *C2I = dyn_cast<ConstantInt>(C2); - if (!C2I) - return false; - return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne()); -} - -/// FoldSelectIntoOp - Try fold the select into one of the operands to -/// facilitate further optimization. -Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, - Value *FalseVal) { - // See the comment above GetSelectFoldableOperands for a description of the - // transformation we are doing here. - if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) { - if (TVI->hasOneUse() && TVI->getNumOperands() == 2 && - !isa<Constant>(FalseVal)) { - if (unsigned SFO = GetSelectFoldableOperands(TVI)) { - unsigned OpToFold = 0; - if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { - OpToFold = 1; - } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { - OpToFold = 2; - } - - if (OpToFold) { - Constant *C = GetSelectFoldableConstant(TVI, Context); - Value *OOp = TVI->getOperand(2-OpToFold); - // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. - if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); - InsertNewInstBefore(NewSel, SI); - NewSel->takeName(TVI); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI)) - return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); - llvm_unreachable("Unknown instruction!!"); - } - } - } - } - } - - if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) { - if (FVI->hasOneUse() && FVI->getNumOperands() == 2 && - !isa<Constant>(TrueVal)) { - if (unsigned SFO = GetSelectFoldableOperands(FVI)) { - unsigned OpToFold = 0; - if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { - OpToFold = 1; - } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { - OpToFold = 2; - } - - if (OpToFold) { - Constant *C = GetSelectFoldableConstant(FVI, Context); - Value *OOp = FVI->getOperand(2-OpToFold); - // Avoid creating select between 2 constants unless it's selecting - // between 0 and 1. - if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { - Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); - InsertNewInstBefore(NewSel, SI); - NewSel->takeName(FVI); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI)) - return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); - llvm_unreachable("Unknown instruction!!"); - } - } - } - } - } - - return 0; -} - -/// visitSelectInstWithICmp - Visit a SelectInst that has an -/// ICmpInst as its first operand. -/// -Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, - ICmpInst *ICI) { - bool Changed = false; - ICmpInst::Predicate Pred = ICI->getPredicate(); - Value *CmpLHS = ICI->getOperand(0); - Value *CmpRHS = ICI->getOperand(1); - Value *TrueVal = SI.getTrueValue(); - Value *FalseVal = SI.getFalseValue(); - - // Check cases where the comparison is with a constant that - // can be adjusted to fit the min/max idiom. We may edit ICI in - // place here, so make sure the select is the only user. - if (ICI->hasOneUse()) - if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) { - switch (Pred) { - default: break; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: { - // X < MIN ? T : F --> F - if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) - return ReplaceInstUsesWith(SI, FalseVal); - // X < C ? X : C-1 --> X > C-1 ? C-1 : X - Constant *AdjustedRHS = SubOne(CI); - if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } - break; - } - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: { - // X > MAX ? T : F --> F - if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) - return ReplaceInstUsesWith(SI, FalseVal); - // X > C ? X : C+1 --> X < C+1 ? C+1 : X - Constant *AdjustedRHS = AddOne(CI); - if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || - (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { - Pred = ICmpInst::getSwappedPredicate(Pred); - CmpRHS = AdjustedRHS; - std::swap(FalseVal, TrueVal); - ICI->setPredicate(Pred); - ICI->setOperand(1, CmpRHS); - SI.setOperand(1, TrueVal); - SI.setOperand(2, FalseVal); - Changed = true; - } - break; - } - } - - // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed - // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed - CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; - if (match(TrueVal, m_ConstantInt<-1>()) && - match(FalseVal, m_ConstantInt<0>())) - Pred = ICI->getPredicate(); - else if (match(TrueVal, m_ConstantInt<0>()) && - match(FalseVal, m_ConstantInt<-1>())) - Pred = CmpInst::getInversePredicate(ICI->getPredicate()); - - if (Pred != CmpInst::BAD_ICMP_PREDICATE) { - // If we are just checking for a icmp eq of a single bit and zext'ing it - // to an integer, then shift the bit to the appropriate place and then - // cast to integer to avoid the comparison. - const APInt &Op1CV = CI->getValue(); - - // sext (x <s 0) to i32 --> x>>s31 true if signbit set. - // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. - if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) || - (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { - Value *In = ICI->getOperand(0); - Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getScalarSizeInBits()-1); - In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, - In->getName()+".lobit"), - *ICI); - if (In->getType() != SI.getType()) - In = CastInst::CreateIntegerCast(In, SI.getType(), - true/*SExt*/, "tmp", ICI); - - if (Pred == ICmpInst::ICMP_SGT) - In = InsertNewInstBefore(BinaryOperator::CreateNot(In, - In->getName()+".not"), *ICI); - - return ReplaceInstUsesWith(SI, In); - } - } - } - - if (CmpLHS == TrueVal && CmpRHS == FalseVal) { - // Transform (X == Y) ? X : Y -> Y - if (Pred == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? X : Y -> X - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX - - } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) { - // Transform (X == Y) ? Y : X -> X - if (Pred == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(SI, FalseVal); - // Transform (X != Y) ? Y : X -> Y - if (Pred == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(SI, TrueVal); - /// NOTE: if we wanted to, this is where to detect integer MIN/MAX - } - return Changed ? &SI : 0; -} - - -/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a -/// PHI node (but the two may be in different blocks). See if the true/false -/// values (V) are live in all of the predecessor blocks of the PHI. For -/// example, cases like this cannot be mapped: -/// -/// X = phi [ C1, BB1], [C2, BB2] -/// Y = add -/// Z = select X, Y, 0 -/// -/// because Y is not live in BB1/BB2. -/// -static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, - const SelectInst &SI) { - // If the value is a non-instruction value like a constant or argument, it - // can always be mapped. - const Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return true; - - // If V is a PHI node defined in the same block as the condition PHI, we can - // map the arguments. - const PHINode *CondPHI = cast<PHINode>(SI.getCondition()); - - if (const PHINode *VP = dyn_cast<PHINode>(I)) - if (VP->getParent() == CondPHI->getParent()) - return true; - - // Otherwise, if the PHI and select are defined in the same block and if V is - // defined in a different block, then we can transform it. - if (SI.getParent() == CondPHI->getParent() && - I->getParent() != CondPHI->getParent()) - return true; - - // Otherwise we have a 'hard' case and we can't tell without doing more - // detailed dominator based analysis, punt. - return false; -} - -/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: -/// SPF2(SPF1(A, B), C) -Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, - SelectPatternFlavor SPF1, - Value *A, Value *B, - Instruction &Outer, - SelectPatternFlavor SPF2, Value *C) { - if (C == A || C == B) { - // MAX(MAX(A, B), B) -> MAX(A, B) - // MIN(MIN(a, b), a) -> MIN(a, b) - if (SPF1 == SPF2) - return ReplaceInstUsesWith(Outer, Inner); - - // MAX(MIN(a, b), a) -> a - // MIN(MAX(a, b), a) -> a - if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || - (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || - (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || - (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) - return ReplaceInstUsesWith(Outer, C); - } - - // TODO: MIN(MIN(A, 23), 97) - return 0; -} - - - - -Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { - Value *CondVal = SI.getCondition(); - Value *TrueVal = SI.getTrueValue(); - Value *FalseVal = SI.getFalseValue(); - - // select true, X, Y -> X - // select false, X, Y -> Y - if (ConstantInt *C = dyn_cast<ConstantInt>(CondVal)) - return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal); - - // select C, X, X -> X - if (TrueVal == FalseVal) - return ReplaceInstUsesWith(SI, TrueVal); - - if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X - return ReplaceInstUsesWith(SI, FalseVal); - if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X - return ReplaceInstUsesWith(SI, TrueVal); - if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y - if (isa<Constant>(TrueVal)) - return ReplaceInstUsesWith(SI, TrueVal); - else - return ReplaceInstUsesWith(SI, FalseVal); - } - - if (SI.getType() == Type::getInt1Ty(*Context)) { - if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) { - if (C->getZExtValue()) { - // Change: A = select B, true, C --> A = or B, C - return BinaryOperator::CreateOr(CondVal, FalseVal); - } else { - // Change: A = select B, false, C --> A = and !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return BinaryOperator::CreateAnd(NotCond, FalseVal); - } - } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { - if (C->getZExtValue() == false) { - // Change: A = select B, C, false --> A = and B, C - return BinaryOperator::CreateAnd(CondVal, TrueVal); - } else { - // Change: A = select B, C, true --> A = or !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return BinaryOperator::CreateOr(NotCond, TrueVal); - } - } - - // select a, b, a -> a&b - // select a, a, b -> a|b - if (CondVal == TrueVal) - return BinaryOperator::CreateOr(CondVal, FalseVal); - else if (CondVal == FalseVal) - return BinaryOperator::CreateAnd(CondVal, TrueVal); - } - - // Selecting between two integer constants? - if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal)) - if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) { - // select C, 1, 0 -> zext C to int - if (FalseValC->isZero() && TrueValC->getValue() == 1) { - return CastInst::Create(Instruction::ZExt, CondVal, SI.getType()); - } else if (TrueValC->isZero() && FalseValC->getValue() == 1) { - // select C, 0, 1 -> zext !C to int - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return CastInst::Create(Instruction::ZExt, NotCond, SI.getType()); - } - - if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) { - // If one of the constants is zero (we know they can't both be) and we - // have an icmp instruction with zero, and we have an 'and' with the - // non-constant value, eliminate this whole mess. This corresponds to - // cases like this: ((X & 27) ? 27 : 0) - if (TrueValC->isZero() || FalseValC->isZero()) - if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) && - cast<Constant>(IC->getOperand(1))->isNullValue()) - if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0))) - if (ICA->getOpcode() == Instruction::And && - isa<ConstantInt>(ICA->getOperand(1)) && - (ICA->getOperand(1) == TrueValC || - ICA->getOperand(1) == FalseValC) && - isOneBitSet(cast<ConstantInt>(ICA->getOperand(1)))) { - // Okay, now we know that everything is set up, we just don't - // know whether we have a icmp_ne or icmp_eq and whether the - // true or false val is the zero. - bool ShouldNotVal = !TrueValC->isZero(); - ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; - Value *V = ICA; - if (ShouldNotVal) - V = InsertNewInstBefore(BinaryOperator::Create( - Instruction::Xor, V, ICA->getOperand(1)), SI); - return ReplaceInstUsesWith(SI, V); - } - } - } - - // See if we are selecting two values based on a comparison of the two values. - if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) { - if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) { - // Transform (X == Y) ? X : Y -> Y - if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: - // consider X== -0, Y== +0. - // It becomes safe if either operand is a nonzero constant. - ConstantFP *CFPt, *CFPf; - if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && - !CFPt->getValueAPF().isZero()) || - ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && - !CFPf->getValueAPF().isZero())) - return ReplaceInstUsesWith(SI, FalseVal); - } - // Transform (X != Y) ? X : Y -> X - if (FCI->getPredicate() == FCmpInst::FCMP_ONE) - return ReplaceInstUsesWith(SI, TrueVal); - // NOTE: if we wanted to, this is where to detect MIN/MAX - - } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){ - // Transform (X == Y) ? Y : X -> X - if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) { - // This is not safe in general for floating point: - // consider X== -0, Y== +0. - // It becomes safe if either operand is a nonzero constant. - ConstantFP *CFPt, *CFPf; - if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) && - !CFPt->getValueAPF().isZero()) || - ((CFPf = dyn_cast<ConstantFP>(FalseVal)) && - !CFPf->getValueAPF().isZero())) - return ReplaceInstUsesWith(SI, FalseVal); - } - // Transform (X != Y) ? Y : X -> Y - if (FCI->getPredicate() == FCmpInst::FCMP_ONE) - return ReplaceInstUsesWith(SI, TrueVal); - // NOTE: if we wanted to, this is where to detect MIN/MAX - } - // NOTE: if we wanted to, this is where to detect ABS - } - - // See if we are selecting two values based on a comparison of the two values. - if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal)) - if (Instruction *Result = visitSelectInstWithICmp(SI, ICI)) - return Result; - - if (Instruction *TI = dyn_cast<Instruction>(TrueVal)) - if (Instruction *FI = dyn_cast<Instruction>(FalseVal)) - if (TI->hasOneUse() && FI->hasOneUse()) { - Instruction *AddOp = 0, *SubOp = 0; - - // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) - if (TI->getOpcode() == FI->getOpcode()) - if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) - return IV; - - // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is - // even legal for FP. - if ((TI->getOpcode() == Instruction::Sub && - FI->getOpcode() == Instruction::Add) || - (TI->getOpcode() == Instruction::FSub && - FI->getOpcode() == Instruction::FAdd)) { - AddOp = FI; SubOp = TI; - } else if ((FI->getOpcode() == Instruction::Sub && - TI->getOpcode() == Instruction::Add) || - (FI->getOpcode() == Instruction::FSub && - TI->getOpcode() == Instruction::FAdd)) { - AddOp = TI; SubOp = FI; - } - - if (AddOp) { - Value *OtherAddOp = 0; - if (SubOp->getOperand(0) == AddOp->getOperand(0)) { - OtherAddOp = AddOp->getOperand(1); - } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { - OtherAddOp = AddOp->getOperand(0); - } - - if (OtherAddOp) { - // So at this point we know we have (Y -> OtherAddOp): - // select C, (add X, Y), (sub X, Z) - Value *NegVal; // Compute -Z - if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) { - NegVal = ConstantExpr::getNeg(C); - } else { - NegVal = InsertNewInstBefore( - BinaryOperator::CreateNeg(SubOp->getOperand(1), - "tmp"), SI); - } - - Value *NewTrueOp = OtherAddOp; - Value *NewFalseOp = NegVal; - if (AddOp != TI) - std::swap(NewTrueOp, NewFalseOp); - Instruction *NewSel = - SelectInst::Create(CondVal, NewTrueOp, - NewFalseOp, SI.getName() + ".p"); - - NewSel = InsertNewInstBefore(NewSel, SI); - return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); - } - } - } - - // See if we can fold the select into one of our operands. - if (SI.getType()->isInteger()) { - if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) - return FoldI; - - // MAX(MAX(a, b), a) -> MAX(a, b) - // MIN(MIN(a, b), a) -> MIN(a, b) - // MAX(MIN(a, b), a) -> a - // MIN(MAX(a, b), a) -> a - Value *LHS, *RHS, *LHS2, *RHS2; - if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) { - if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2)) - if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, - SI, SPF, RHS)) - return R; - if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2)) - if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2, - SI, SPF, LHS)) - return R; - } - - // TODO. - // ABS(-X) -> ABS(X) - // ABS(ABS(X)) -> ABS(X) - } - - // See if we can fold the select into a phi node if the condition is a select. - if (isa<PHINode>(SI.getCondition())) - // The true/false values have to be live in the PHI predecessor's blocks. - if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && - CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) - if (Instruction *NV = FoldOpIntoPhi(SI)) - return NV; - - if (BinaryOperator::isNot(CondVal)) { - SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); - SI.setOperand(1, FalseVal); - SI.setOperand(2, TrueVal); - return &SI; - } - - return 0; -} - -/// EnforceKnownAlignment - If the specified pointer points to an object that -/// we control, modify the object's alignment to PrefAlign. This isn't -/// often possible though. If alignment is important, a more reliable approach -/// is to simply align all global variables and allocation instructions to -/// their preferred alignment from the beginning. -/// -static unsigned EnforceKnownAlignment(Value *V, - unsigned Align, unsigned PrefAlign) { - - User *U = dyn_cast<User>(V); - if (!U) return Align; - - switch (Operator::getOpcode(U)) { - default: break; - case Instruction::BitCast: - return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - case Instruction::GetElementPtr: { - // If all indexes are zero, it is just the alignment of the base pointer. - bool AllZeroOperands = true; - for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) - if (!isa<Constant>(*i) || - !cast<Constant>(*i)->isNullValue()) { - AllZeroOperands = false; - break; - } - - if (AllZeroOperands) { - // Treat this like a bitcast. - return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - } - break; - } - } - - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - // If there is a large requested alignment and we can, bump up the alignment - // of the global. - if (!GV->isDeclaration()) { - if (GV->getAlignment() >= PrefAlign) - Align = GV->getAlignment(); - else { - GV->setAlignment(PrefAlign); - Align = PrefAlign; - } - } - } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - // If there is a requested alignment and if this is an alloca, round up. - if (AI->getAlignment() >= PrefAlign) - Align = AI->getAlignment(); - else { - AI->setAlignment(PrefAlign); - Align = PrefAlign; - } - } - - return Align; -} - -/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that -/// we can determine, return it, otherwise return 0. If PrefAlign is specified, -/// and it is more than the alignment of the ultimate object, see if we can -/// increase the alignment of the ultimate object, making this check succeed. -unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, - unsigned PrefAlign) { - unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : - sizeof(PrefAlign) * CHAR_BIT; - APInt Mask = APInt::getAllOnesValue(BitWidth); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne); - unsigned TrailZ = KnownZero.countTrailingOnes(); - unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); - - if (PrefAlign > Align) - Align = EnforceKnownAlignment(V, Align, PrefAlign); - - // We don't need to make any adjustment. - return Align; -} - -Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); - unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); - unsigned MinAlign = std::min(DstAlign, SrcAlign); - unsigned CopyAlign = MI->getAlignment(); - - if (CopyAlign < MinAlign) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), - MinAlign, false)); - return MI; - } - - // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with - // load/store. - ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3)); - if (MemOpLength == 0) return 0; - - // Source and destination pointer types are always "i8*" for intrinsic. See - // if the size is something we can handle with a single primitive load/store. - // A single load+store correctly handles overlapping memory in the memmove - // case. - unsigned Size = MemOpLength->getZExtValue(); - if (Size == 0) return MI; // Delete this mem transfer. - - if (Size > 8 || (Size&(Size-1))) - return 0; // If not 1/2/4/8 bytes, exit. - - // Use an integer load+store unless we can find something better. - Type *NewPtrTy = - PointerType::getUnqual(IntegerType::get(*Context, Size<<3)); - - // Memcpy forces the use of i8* for the source and destination. That means - // that if you're using memcpy to move one double around, you'll get a cast - // from double* to i8*. We'd much rather use a double load+store rather than - // an i64 load+store, here because this improves the odds that the source or - // dest address will be promotable. See if we can find a better type than the - // integer datatype. - if (Value *Op = getBitCastOperand(MI->getOperand(1))) { - const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType(); - if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { - // The SrcETy might be something like {{{double}}} or [1 x double]. Rip - // down through these levels if so. - while (!SrcETy->isSingleValueType()) { - if (const StructType *STy = dyn_cast<StructType>(SrcETy)) { - if (STy->getNumElements() == 1) - SrcETy = STy->getElementType(0); - else - break; - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { - if (ATy->getNumElements() == 1) - SrcETy = ATy->getElementType(); - else - break; - } else - break; - } - - if (SrcETy->isSingleValueType()) - NewPtrTy = PointerType::getUnqual(SrcETy); - } - } - - - // If the memcpy/memmove provides better alignment info than we can - // infer, use it. - SrcAlign = std::max(SrcAlign, CopyAlign); - DstAlign = std::max(DstAlign, CopyAlign); - - Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); - Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy); - Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); - InsertNewInstBefore(L, *MI); - InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); - - // Set the size of the copy to 0, it will be deleted on the next iteration. - MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); - return MI; -} - -Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); - if (MI->getAlignment() < Alignment) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), - Alignment, false)); - return MI; - } - - // Extract the length and alignment and fill if they are constant. - ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); - ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); - if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(*Context)) - return 0; - uint64_t Len = LenC->getZExtValue(); - Alignment = MI->getAlignment(); - - // If the length is zero, this is a no-op - if (Len == 0) return MI; // memset(d,c,0,a) -> noop - - // memset(s,c,n) -> store s, c (for n=1,2,4,8) - if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { - const Type *ITy = IntegerType::get(*Context, Len*8); // n=1 -> i8. - - Value *Dest = MI->getDest(); - Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); - - // Alignment 0 is identity for alignment 1 for memset, but not store. - if (Alignment == 0) Alignment = 1; - - // Extract the fill value and store. - uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), - Dest, false, Alignment), *MI); - - // Set the size of the copy to 0, it will be deleted on the next iteration. - MI->setLength(Constant::getNullValue(LenC->getType())); - return MI; - } - - return 0; -} - - -/// visitCallInst - CallInst simplification. This mostly only handles folding -/// of intrinsic instructions. For normal calls, it allows visitCallSite to do -/// the heavy lifting. -/// -Instruction *InstCombiner::visitCallInst(CallInst &CI) { - if (isFreeCall(&CI)) - return visitFree(CI); - - // If the caller function is nounwind, mark the call as nounwind, even if the - // callee isn't. - if (CI.getParent()->getParent()->doesNotThrow() && - !CI.doesNotThrow()) { - CI.setDoesNotThrow(); - return &CI; - } - - IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); - if (!II) return visitCallSite(&CI); - - // Intrinsics cannot occur in an invoke, so handle them here instead of in - // visitCallSite. - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) { - bool Changed = false; - - // memmove/cpy/set of zero bytes is a noop. - if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { - if (NumBytes->isNullValue()) return EraseInstFromFunction(CI); - - if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) - if (CI->getZExtValue() == 1) { - // Replace the instruction with just byte operations. We would - // transform other cases to loads/stores, but we don't know if - // alignment is sufficient. - } - } - - // If we have a memmove and the source operation is a constant global, - // then the source and dest pointers can't alias, so we can change this - // into a call to memcpy. - if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { - if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource())) - if (GVSrc->isConstant()) { - Module *M = CI.getParent()->getParent()->getParent(); - Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[1]; - Tys[0] = CI.getOperand(3)->getType(); - CI.setOperand(0, - Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); - Changed = true; - } - } - - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { - // memmove(x,x,size) -> noop. - if (MTI->getSource() == MTI->getDest()) - return EraseInstFromFunction(CI); - } - - // If we can determine a pointer alignment that is bigger than currently - // set, update the alignment. - if (isa<MemTransferInst>(MI)) { - if (Instruction *I = SimplifyMemTransfer(MI)) - return I; - } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) { - if (Instruction *I = SimplifyMemSet(MSI)) - return I; - } - - if (Changed) return II; - } - - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::bswap: - // bswap(bswap(x)) -> x - if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1))) - if (Operand->getIntrinsicID() == Intrinsic::bswap) - return ReplaceInstUsesWith(CI, Operand->getOperand(1)); - break; - case Intrinsic::powi: - if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) { - // powi(x, 0) -> 1.0 - if (Power->isZero()) - return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); - // powi(x, 1) -> x - if (Power->isOne()) - return ReplaceInstUsesWith(CI, II->getOperand(1)); - // powi(x, -1) -> 1/x - if (Power->isAllOnesValue()) - return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), - II->getOperand(1)); - } - break; - - case Intrinsic::uadd_with_overflow: { - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); - const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); - uint32_t BitWidth = IT->getBitWidth(); - APInt Mask = APInt::getSignBit(BitWidth); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); - bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; - bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; - - if (LHSKnownNegative || LHSKnownPositive) { - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); - bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; - bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; - if (LHSKnownNegative && RHSKnownNegative) { - // The sign bit is set in both cases: this MUST overflow. - // Create a simple add instruction, and insert it into the struct. - Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI); - Worklist.Add(Add); - Constant *V[] = { - UndefValue::get(LHS->getType()), ConstantInt::getTrue(*Context) - }; - Constant *Struct = ConstantStruct::get(*Context, V, 2, false); - return InsertValueInst::Create(Struct, Add, 0); - } - - if (LHSKnownPositive && RHSKnownPositive) { - // The sign bit is clear in both cases: this CANNOT overflow. - // Create a simple add instruction, and insert it into the struct. - Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI); - Worklist.Add(Add); - Constant *V[] = { - UndefValue::get(LHS->getType()), ConstantInt::getFalse(*Context) - }; - Constant *Struct = ConstantStruct::get(*Context, V, 2, false); - return InsertValueInst::Create(Struct, Add, 0); - } - } - } - // FALL THROUGH uadd into sadd - case Intrinsic::sadd_with_overflow: - // Canonicalize constants into the RHS. - if (isa<Constant>(II->getOperand(1)) && - !isa<Constant>(II->getOperand(2))) { - Value *LHS = II->getOperand(1); - II->setOperand(1, II->getOperand(2)); - II->setOperand(2, LHS); - return II; - } - - // X + undef -> undef - if (isa<UndefValue>(II->getOperand(2))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { - // X + 0 -> {X, false} - if (RHS->isZero()) { - Constant *V[] = { - UndefValue::get(II->getOperand(0)->getType()), - ConstantInt::getFalse(*Context) - }; - Constant *Struct = ConstantStruct::get(*Context, V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); - } - } - break; - case Intrinsic::usub_with_overflow: - case Intrinsic::ssub_with_overflow: - // undef - X -> undef - // X - undef -> undef - if (isa<UndefValue>(II->getOperand(1)) || - isa<UndefValue>(II->getOperand(2))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) { - // X - 0 -> {X, false} - if (RHS->isZero()) { - Constant *V[] = { - UndefValue::get(II->getOperand(1)->getType()), - ConstantInt::getFalse(*Context) - }; - Constant *Struct = ConstantStruct::get(*Context, V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); - } - } - break; - case Intrinsic::umul_with_overflow: - case Intrinsic::smul_with_overflow: - // Canonicalize constants into the RHS. - if (isa<Constant>(II->getOperand(1)) && - !isa<Constant>(II->getOperand(2))) { - Value *LHS = II->getOperand(1); - II->setOperand(1, II->getOperand(2)); - II->setOperand(2, LHS); - return II; - } - - // X * undef -> undef - if (isa<UndefValue>(II->getOperand(2))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) { - // X*0 -> {0, false} - if (RHSI->isZero()) - return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); - - // X * 1 -> {X, false} - if (RHSI->equalsInt(1)) { - Constant *V[] = { - UndefValue::get(II->getOperand(1)->getType()), - ConstantInt::getFalse(*Context) - }; - Constant *Struct = ConstantStruct::get(*Context, V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); - } - } - break; - case Intrinsic::ppc_altivec_lvx: - case Intrinsic::ppc_altivec_lvxl: - case Intrinsic::x86_sse_loadu_ps: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse2_loadu_dq: - // Turn PPC lvx -> load if the pointer is known aligned. - // Turn X86 loadups -> load if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { - Value *Ptr = Builder->CreateBitCast(II->getOperand(1), - PointerType::getUnqual(II->getType())); - return new LoadInst(Ptr); - } - break; - case Intrinsic::ppc_altivec_stvx: - case Intrinsic::ppc_altivec_stvxl: - // Turn stvx -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { - const Type *OpPtrTy = - PointerType::getUnqual(II->getOperand(1)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); - return new StoreInst(II->getOperand(1), Ptr); - } - break; - case Intrinsic::x86_sse_storeu_ps: - case Intrinsic::x86_sse2_storeu_pd: - case Intrinsic::x86_sse2_storeu_dq: - // Turn X86 storeu -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { - const Type *OpPtrTy = - PointerType::getUnqual(II->getOperand(2)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); - return new StoreInst(II->getOperand(2), Ptr); - } - break; - - case Intrinsic::x86_sse_cvttss2si: { - // These intrinsics only demands the 0th element of its input vector. If - // we can simplify the input based on that, do so now. - unsigned VWidth = - cast<VectorType>(II->getOperand(1)->getType())->getNumElements(); - APInt DemandedElts(VWidth, 1); - APInt UndefElts(VWidth, 0); - if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, - UndefElts)) { - II->setOperand(1, V); - return II; - } - break; - } - - case Intrinsic::ppc_altivec_vperm: - // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. - if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) { - assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); - - // Check that all of the elements are integer constants or undefs. - bool AllEltsOk = true; - for (unsigned i = 0; i != 16; ++i) { - if (!isa<ConstantInt>(Mask->getOperand(i)) && - !isa<UndefValue>(Mask->getOperand(i))) { - AllEltsOk = false; - break; - } - } - - if (AllEltsOk) { - // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); - Value *Result = UndefValue::get(Op0->getType()); - - // Only extract each element once. - Value *ExtractedElts[32]; - memset(ExtractedElts, 0, sizeof(ExtractedElts)); - - for (unsigned i = 0; i != 16; ++i) { - if (isa<UndefValue>(Mask->getOperand(i))) - continue; - unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue(); - Idx &= 31; // Match the hardware behavior. - - if (ExtractedElts[Idx] == 0) { - ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, - ConstantInt::get(Type::getInt32Ty(*Context), Idx&15, false), - "tmp"); - } - - // Insert this value into the result vector. - Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], - ConstantInt::get(Type::getInt32Ty(*Context), i, false), - "tmp"); - } - return CastInst::Create(Instruction::BitCast, Result, CI.getType()); - } - } - break; - - case Intrinsic::stackrestore: { - // If the save is right next to the restore, remove the restore. This can - // happen when variable allocas are DCE'd. - if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) { - if (SS->getIntrinsicID() == Intrinsic::stacksave) { - BasicBlock::iterator BI = SS; - if (&*++BI == II) - return EraseInstFromFunction(CI); - } - } - - // Scan down this block to see if there is another stack restore in the - // same block without an intervening call/alloca. - BasicBlock::iterator BI = II; - TerminatorInst *TI = II->getParent()->getTerminator(); - bool CannotRemove = false; - for (++BI; &*BI != TI; ++BI) { - if (isa<AllocaInst>(BI) || isMalloc(BI)) { - CannotRemove = true; - break; - } - if (CallInst *BCI = dyn_cast<CallInst>(BI)) { - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) { - // If there is a stackrestore below this one, remove this one. - if (II->getIntrinsicID() == Intrinsic::stackrestore) - return EraseInstFromFunction(CI); - // Otherwise, ignore the intrinsic. - } else { - // If we found a non-intrinsic call, we can't remove the stack - // restore. - CannotRemove = true; - break; - } - } - } - - // If the stack restore is in a return/unwind block and if there are no - // allocas or calls between the restore and the return, nuke the restore. - if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI))) - return EraseInstFromFunction(CI); - break; - } - } - - return visitCallSite(II); -} - -// InvokeInst simplification -// -Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { - return visitCallSite(&II); -} - -/// isSafeToEliminateVarargsCast - If this cast does not affect the value -/// passed through the varargs area, we can eliminate the use of the cast. -static bool isSafeToEliminateVarargsCast(const CallSite CS, - const CastInst * const CI, - const TargetData * const TD, - const int ix) { - if (!CI->isLosslessCast()) - return false; - - // The size of ByVal arguments is derived from the type, so we - // can't change to a type with a different size. If the size were - // passed explicitly we could avoid this check. - if (!CS.paramHasAttr(ix, Attribute::ByVal)) - return true; - - const Type* SrcTy = - cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); - const Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); - if (!SrcTy->isSized() || !DstTy->isSized()) - return false; - if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) - return false; - return true; -} - -// visitCallSite - Improvements for call and invoke instructions. -// -Instruction *InstCombiner::visitCallSite(CallSite CS) { - bool Changed = false; - - // If the callee is a constexpr cast of a function, attempt to move the cast - // to the arguments of the call/invoke. - if (transformConstExprCastCall(CS)) return 0; - - Value *Callee = CS.getCalledValue(); - - if (Function *CalleeF = dyn_cast<Function>(Callee)) - if (CalleeF->getCallingConv() != CS.getCallingConv()) { - Instruction *OldCall = CS.getInstruction(); - // If the call and callee calling conventions don't match, this call must - // be unreachable, as the call is undefined. - new StoreInst(ConstantInt::getTrue(*Context), - UndefValue::get(Type::getInt1PtrTy(*Context)), - OldCall); - // If OldCall dues not return void then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!OldCall->getType()->isVoidTy()) - OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); - if (isa<CallInst>(OldCall)) // Not worth removing an invoke here. - return EraseInstFromFunction(*OldCall); - return 0; - } - - if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { - // This instruction is not reachable, just remove it. We insert a store to - // undef so that we know that this code is not reachable, despite the fact - // that we can't modify the CFG here. - new StoreInst(ConstantInt::getTrue(*Context), - UndefValue::get(Type::getInt1PtrTy(*Context)), - CS.getInstruction()); - - // If CS dues not return void then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!CS.getInstruction()->getType()->isVoidTy()) - CS.getInstruction()-> - replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); - - if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { - // Don't break the CFG, insert a dummy cond branch. - BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), - ConstantInt::getTrue(*Context), II); - } - return EraseInstFromFunction(*CS.getInstruction()); - } - - if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee)) - if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0))) - if (In->getIntrinsicID() == Intrinsic::init_trampoline) - return transformCallThroughTrampoline(CS); - - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - if (FTy->isVarArg()) { - int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1); - // See if we can optimize any arguments passed through the varargs area of - // the call. - for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), - E = CS.arg_end(); I != E; ++I, ++ix) { - CastInst *CI = dyn_cast<CastInst>(*I); - if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) { - *I = CI->getOperand(0); - Changed = true; - } - } - } - - if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) { - // Inline asm calls cannot throw - mark them 'nounwind'. - CS.setDoesNotThrow(); - Changed = true; - } - - return Changed ? CS.getInstruction() : 0; -} - -// transformConstExprCastCall - If the callee is a constexpr cast of a function, -// attempt to move the cast to the arguments of the call/invoke. -// -bool InstCombiner::transformConstExprCastCall(CallSite CS) { - if (!isa<ConstantExpr>(CS.getCalledValue())) return false; - ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue()); - if (CE->getOpcode() != Instruction::BitCast || - !isa<Function>(CE->getOperand(0))) - return false; - Function *Callee = cast<Function>(CE->getOperand(0)); - Instruction *Caller = CS.getInstruction(); - const AttrListPtr &CallerPAL = CS.getAttributes(); - - // Okay, this is a cast from a function to a different type. Unless doing so - // would cause a type conversion of one of our arguments, change this call to - // be a direct call with arguments casted to the appropriate types. - // - const FunctionType *FT = Callee->getFunctionType(); - const Type *OldRetTy = Caller->getType(); - const Type *NewRetTy = FT->getReturnType(); - - if (isa<StructType>(NewRetTy)) - return false; // TODO: Handle multiple return values. - - // Check to see if we are changing the return type... - if (OldRetTy != NewRetTy) { - if (Callee->isDeclaration() && - // Conversion is ok if changing from one pointer type to another or from - // a pointer to an integer of the same size. - !((isa<PointerType>(OldRetTy) || !TD || - OldRetTy == TD->getIntPtrType(Caller->getContext())) && - (isa<PointerType>(NewRetTy) || !TD || - NewRetTy == TD->getIntPtrType(Caller->getContext())))) - return false; // Cannot transform this return value. - - if (!Caller->use_empty() && - // void -> non-void is handled specially - !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy)) - return false; // Cannot transform this return value. - - if (!CallerPAL.isEmpty() && !Caller->use_empty()) { - Attributes RAttrs = CallerPAL.getRetAttributes(); - if (RAttrs & Attribute::typeIncompatible(NewRetTy)) - return false; // Attribute not compatible with transformed value. - } - - // If the callsite is an invoke instruction, and the return value is used by - // a PHI node in a successor, we cannot change the return type of the call - // because there is no place to put the cast instruction (without breaking - // the critical edge). Bail out in this case. - if (!Caller->use_empty()) - if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) - for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); - UI != E; ++UI) - if (PHINode *PN = dyn_cast<PHINode>(*UI)) - if (PN->getParent() == II->getNormalDest() || - PN->getParent() == II->getUnwindDest()) - return false; - } - - unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); - unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); - - CallSite::arg_iterator AI = CS.arg_begin(); - for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); - const Type *ActTy = (*AI)->getType(); - - if (!CastInst::isCastable(ActTy, ParamTy)) - return false; // Cannot transform this parameter value. - - if (CallerPAL.getParamAttributes(i + 1) - & Attribute::typeIncompatible(ParamTy)) - return false; // Attribute not compatible with transformed value. - - // Converting from one pointer type to another or between a pointer and an - // integer of the same size is safe even if we do not have a body. - bool isConvertible = ActTy == ParamTy || - (TD && ((isa<PointerType>(ParamTy) || - ParamTy == TD->getIntPtrType(Caller->getContext())) && - (isa<PointerType>(ActTy) || - ActTy == TD->getIntPtrType(Caller->getContext())))); - if (Callee->isDeclaration() && !isConvertible) return false; - } - - if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && - Callee->isDeclaration()) - return false; // Do not delete arguments unless we have a function body. - - if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && - !CallerPAL.isEmpty()) - // In this case we have more arguments than the new function type, but we - // won't be dropping them. Check that these extra arguments have attributes - // that are compatible with being a vararg call argument. - for (unsigned i = CallerPAL.getNumSlots(); i; --i) { - if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) - break; - Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs; - if (PAttrs & Attribute::VarArgsIncompatible) - return false; - } - - // Okay, we decided that this is a safe thing to do: go ahead and start - // inserting cast instructions as necessary... - std::vector<Value*> Args; - Args.reserve(NumActualArgs); - SmallVector<AttributeWithIndex, 8> attrVec; - attrVec.reserve(NumCommonArgs); - - // Get any return attributes. - Attributes RAttrs = CallerPAL.getRetAttributes(); - - // If the return value is not being used, the type may not be compatible - // with the existing attributes. Wipe out any problematic attributes. - RAttrs &= ~Attribute::typeIncompatible(NewRetTy); - - // Add the new return attributes. - if (RAttrs) - attrVec.push_back(AttributeWithIndex::get(0, RAttrs)); - - AI = CS.arg_begin(); - for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { - const Type *ParamTy = FT->getParamType(i); - if ((*AI)->getType() == ParamTy) { - Args.push_back(*AI); - } else { - Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, - false, ParamTy, false); - Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp")); - } - - // Add any parameter attributes. - if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) - attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); - } - - // If the function takes more arguments than the call was taking, add them - // now. - for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) - Args.push_back(Constant::getNullValue(FT->getParamType(i))); - - // If we are removing arguments to the function, emit an obnoxious warning. - if (FT->getNumParams() < NumActualArgs) { - if (!FT->isVarArg()) { - errs() << "WARNING: While resolving call to function '" - << Callee->getName() << "' arguments were dropped!\n"; - } else { - // Add all of the arguments in their promoted form to the arg list. - for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { - const Type *PTy = getPromotedType((*AI)->getType()); - if (PTy != (*AI)->getType()) { - // Must promote to pass through va_arg area! - Instruction::CastOps opcode = - CastInst::getCastOpcode(*AI, false, PTy, false); - Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp")); - } else { - Args.push_back(*AI); - } - - // Add any parameter attributes. - if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) - attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); - } - } - } - - if (Attributes FnAttrs = CallerPAL.getFnAttributes()) - attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); - - if (NewRetTy->isVoidTy()) - Caller->setName(""); // Void type should not have a name. - - const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), - attrVec.end()); - - Instruction *NC; - if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { - NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), - Caller->getName(), Caller); - cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); - cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); - } else { - NC = CallInst::Create(Callee, Args.begin(), Args.end(), - Caller->getName(), Caller); - CallInst *CI = cast<CallInst>(Caller); - if (CI->isTailCall()) - cast<CallInst>(NC)->setTailCall(); - cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); - cast<CallInst>(NC)->setAttributes(NewCallerPAL); - } - - // Insert a cast of the return type as necessary. - Value *NV = NC; - if (OldRetTy != NV->getType() && !Caller->use_empty()) { - if (!NV->getType()->isVoidTy()) { - Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, - OldRetTy, false); - NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); - - // If this is an invoke instruction, we should insert it after the first - // non-phi, instruction in the normal successor block. - if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { - BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI(); - InsertNewInstBefore(NC, *I); - } else { - // Otherwise, it's a call, just insert cast right after the call instr - InsertNewInstBefore(NC, *Caller); - } - Worklist.AddUsersToWorkList(*Caller); - } else { - NV = UndefValue::get(Caller->getType()); - } - } - - - if (!Caller->use_empty()) - Caller->replaceAllUsesWith(NV); - - EraseInstFromFunction(*Caller); - return true; -} - -// transformCallThroughTrampoline - Turn a call to a function created by the -// init_trampoline intrinsic into a direct call to the underlying function. -// -Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { - Value *Callee = CS.getCalledValue(); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - const AttrListPtr &Attrs = CS.getAttributes(); - - // If the call already has the 'nest' attribute somewhere then give up - - // otherwise 'nest' would occur twice after splicing in the chain. - if (Attrs.hasAttrSomewhere(Attribute::Nest)) - return 0; - - IntrinsicInst *Tramp = - cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); - - Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts()); - const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); - const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); - - const AttrListPtr &NestAttrs = NestF->getAttributes(); - if (!NestAttrs.isEmpty()) { - unsigned NestIdx = 1; - const Type *NestTy = 0; - Attributes NestAttr = Attribute::None; - - // Look for a parameter marked with the 'nest' attribute. - for (FunctionType::param_iterator I = NestFTy->param_begin(), - E = NestFTy->param_end(); I != E; ++NestIdx, ++I) - if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) { - // Record the parameter type and any other attributes. - NestTy = *I; - NestAttr = NestAttrs.getParamAttributes(NestIdx); - break; - } - - if (NestTy) { - Instruction *Caller = CS.getInstruction(); - std::vector<Value*> NewArgs; - NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); - - SmallVector<AttributeWithIndex, 8> NewAttrs; - NewAttrs.reserve(Attrs.getNumSlots() + 1); - - // Insert the nest argument into the call argument list, which may - // mean appending it. Likewise for attributes. - - // Add any result attributes. - if (Attributes Attr = Attrs.getRetAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(0, Attr)); - - { - unsigned Idx = 1; - CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - do { - if (Idx == NestIdx) { - // Add the chain argument and attributes. - Value *NestVal = Tramp->getOperand(3); - if (NestVal->getType() != NestTy) - NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); - NewArgs.push_back(NestVal); - NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); - } - - if (I == E) - break; - - // Add the original argument and attributes. - NewArgs.push_back(*I); - if (Attributes Attr = Attrs.getParamAttributes(Idx)) - NewAttrs.push_back - (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); - - ++Idx, ++I; - } while (1); - } - - // Add any function attributes. - if (Attributes Attr = Attrs.getFnAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); - - // The trampoline may have been bitcast to a bogus type (FTy). - // Handle this by synthesizing a new function type, equal to FTy - // with the chain parameter inserted. - - std::vector<const Type*> NewTypes; - NewTypes.reserve(FTy->getNumParams()+1); - - // Insert the chain's type into the list of parameter types, which may - // mean appending it. - { - unsigned Idx = 1; - FunctionType::param_iterator I = FTy->param_begin(), - E = FTy->param_end(); - - do { - if (Idx == NestIdx) - // Add the chain's type. - NewTypes.push_back(NestTy); - - if (I == E) - break; - - // Add the original type. - NewTypes.push_back(*I); - - ++Idx, ++I; - } while (1); - } - - // Replace the trampoline call with a direct call. Let the generic - // code sort out any function type mismatches. - FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, - FTy->isVarArg()); - Constant *NewCallee = - NestF->getType() == PointerType::getUnqual(NewFTy) ? - NestF : ConstantExpr::getBitCast(NestF, - PointerType::getUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), - NewAttrs.end()); - - Instruction *NewCaller; - if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { - NewCaller = InvokeInst::Create(NewCallee, - II->getNormalDest(), II->getUnwindDest(), - NewArgs.begin(), NewArgs.end(), - Caller->getName(), Caller); - cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); - cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); - } else { - NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(), - Caller->getName(), Caller); - if (cast<CallInst>(Caller)->isTailCall()) - cast<CallInst>(NewCaller)->setTailCall(); - cast<CallInst>(NewCaller)-> - setCallingConv(cast<CallInst>(Caller)->getCallingConv()); - cast<CallInst>(NewCaller)->setAttributes(NewPAL); - } - if (!Caller->getType()->isVoidTy()) - Caller->replaceAllUsesWith(NewCaller); - Caller->eraseFromParent(); - Worklist.Remove(Caller); - return 0; - } - } - - // Replace the trampoline call with a direct call. Since there is no 'nest' - // parameter, there is no need to adjust the argument list. Let the generic - // code sort out any function type mismatches. - Constant *NewCallee = - NestF->getType() == PTy ? NestF : - ConstantExpr::getBitCast(NestF, PTy); - CS.setCalledFunction(NewCallee); - return CS.getInstruction(); -} - -/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] -/// and if a/b/c and the add's all have a single use, turn this into a phi -/// and a single binop. -Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { - Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); - assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)); - unsigned Opc = FirstInst->getOpcode(); - Value *LHSVal = FirstInst->getOperand(0); - Value *RHSVal = FirstInst->getOperand(1); - - const Type *LHSType = LHSVal->getType(); - const Type *RHSType = RHSVal->getType(); - - // Scan to see if all operands are the same opcode, and all have one use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); - if (!I || I->getOpcode() != Opc || !I->hasOneUse() || - // Verify type of the LHS matches so we don't fold cmp's of different - // types or GEP's with different index types. - I->getOperand(0)->getType() != LHSType || - I->getOperand(1)->getType() != RHSType) - return 0; - - // If they are CmpInst instructions, check their predicates - if (Opc == Instruction::ICmp || Opc == Instruction::FCmp) - if (cast<CmpInst>(I)->getPredicate() != - cast<CmpInst>(FirstInst)->getPredicate()) - return 0; - - // Keep track of which operand needs a phi node. - if (I->getOperand(0) != LHSVal) LHSVal = 0; - if (I->getOperand(1) != RHSVal) RHSVal = 0; - } - - // If both LHS and RHS would need a PHI, don't do this transformation, - // because it would increase the number of PHIs entering the block, - // which leads to higher register pressure. This is especially - // bad when the PHIs are in the header of a loop. - if (!LHSVal && !RHSVal) - return 0; - - // Otherwise, this is safe to transform! - - Value *InLHS = FirstInst->getOperand(0); - Value *InRHS = FirstInst->getOperand(1); - PHINode *NewLHS = 0, *NewRHS = 0; - if (LHSVal == 0) { - NewLHS = PHINode::Create(LHSType, - FirstInst->getOperand(0)->getName() + ".pn"); - NewLHS->reserveOperandSpace(PN.getNumOperands()/2); - NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); - InsertNewInstBefore(NewLHS, PN); - LHSVal = NewLHS; - } - - if (RHSVal == 0) { - NewRHS = PHINode::Create(RHSType, - FirstInst->getOperand(1)->getName() + ".pn"); - NewRHS->reserveOperandSpace(PN.getNumOperands()/2); - NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); - InsertNewInstBefore(NewRHS, PN); - RHSVal = NewRHS; - } - - // Add all operands to the new PHIs. - if (NewLHS || NewRHS) { - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i)); - if (NewLHS) { - Value *NewInLHS = InInst->getOperand(0); - NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i)); - } - if (NewRHS) { - Value *NewInRHS = InInst->getOperand(1); - NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i)); - } - } - } - - if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); - CmpInst *CIOp = cast<CmpInst>(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - LHSVal, RHSVal); -} - -Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { - GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0)); - - SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), - FirstInst->op_end()); - // This is true if all GEP bases are allocas and if all indices into them are - // constants. - bool AllBasePointersAreAllocas = true; - - // We don't want to replace this phi if the replacement would require - // more than one phi, which leads to higher register pressure. This is - // especially bad when the PHIs are in the header of a loop. - bool NeededPhi = false; - - // Scan to see if all operands are the same opcode, and all have one use. - for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { - GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); - if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || - GEP->getNumOperands() != FirstInst->getNumOperands()) - return 0; - - // Keep track of whether or not all GEPs are of alloca pointers. - if (AllBasePointersAreAllocas && - (!isa<AllocaInst>(GEP->getOperand(0)) || - !GEP->hasAllConstantIndices())) - AllBasePointersAreAllocas = false; - - // Compare the operand lists. - for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { - if (FirstInst->getOperand(op) == GEP->getOperand(op)) - continue; - - // Don't merge two GEPs when two operands differ (introducing phi nodes) - // if one of the PHIs has a constant for the index. The index may be - // substantially cheaper to compute for the constants, so making it a - // variable index could pessimize the path. This also handles the case - // for struct indices, which must always be constant. - if (isa<ConstantInt>(FirstInst->getOperand(op)) || - isa<ConstantInt>(GEP->getOperand(op))) - return 0; - - if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) - return 0; - - // If we already needed a PHI for an earlier operand, and another operand - // also requires a PHI, we'd be introducing more PHIs than we're - // eliminating, which increases register pressure on entry to the PHI's - // block. - if (NeededPhi) - return 0; - - FixedOperands[op] = 0; // Needs a PHI. - NeededPhi = true; - } - } - - // If all of the base pointers of the PHI'd GEPs are from allocas, don't - // bother doing this transformation. At best, this will just save a bit of - // offset calculation, but all the predecessors will have to materialize the - // stack address into a register anyway. We'd actually rather *clone* the - // load up into the predecessors so that we have a load of a gep of an alloca, - // which can usually all be folded into the load. - if (AllBasePointersAreAllocas) - return 0; - - // Otherwise, this is safe to transform. Insert PHI nodes for each operand - // that is variable. - SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size()); - - bool HasAnyPHIs = false; - for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { - if (FixedOperands[i]) continue; // operand doesn't need a phi. - Value *FirstOp = FirstInst->getOperand(i); - PHINode *NewPN = PHINode::Create(FirstOp->getType(), - FirstOp->getName()+".pn"); - InsertNewInstBefore(NewPN, PN); - - NewPN->reserveOperandSpace(e); - NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); - OperandPhis[i] = NewPN; - FixedOperands[i] = NewPN; - HasAnyPHIs = true; - } - - - // Add all operands to the new PHIs. - if (HasAnyPHIs) { - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i)); - BasicBlock *InBB = PN.getIncomingBlock(i); - - for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) - if (PHINode *OpPhi = OperandPhis[op]) - OpPhi->addIncoming(InGEP->getOperand(op), InBB); - } - } - - Value *Base = FixedOperands[0]; - return cast<GEPOperator>(FirstInst)->isInBounds() ? - GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, - FixedOperands.end()) : - GetElementPtrInst::Create(Base, FixedOperands.begin()+1, - FixedOperands.end()); -} - - -/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to -/// sink the load out of the block that defines it. This means that it must be -/// obvious the value of the load is not changed from the point of the load to -/// the end of the block it is in. -/// -/// Finally, it is safe, but not profitable, to sink a load targetting a -/// non-address-taken alloca. Doing so will cause us to not promote the alloca -/// to a register. -static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { - BasicBlock::iterator BBI = L, E = L->getParent()->end(); - - for (++BBI; BBI != E; ++BBI) - if (BBI->mayWriteToMemory()) - return false; - - // Check for non-address taken alloca. If not address-taken already, it isn't - // profitable to do this xform. - if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) { - bool isAddressTaken = false; - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E; ++UI) { - if (isa<LoadInst>(UI)) continue; - if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { - // If storing TO the alloca, then the address isn't taken. - if (SI->getOperand(1) == AI) continue; - } - isAddressTaken = true; - break; - } - - if (!isAddressTaken && AI->isStaticAlloca()) - return false; - } - - // If this load is a load from a GEP with a constant offset from an alloca, - // then we don't want to sink it. In its present form, it will be - // load [constant stack offset]. Sinking it will cause us to have to - // materialize the stack addresses in each predecessor in a register only to - // do a shared load from register in the successor. - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0))) - if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0))) - if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) - return false; - - return true; -} - -Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { - LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0)); - - // When processing loads, we need to propagate two bits of information to the - // sunk load: whether it is volatile, and what its alignment is. We currently - // don't sink loads when some have their alignment specified and some don't. - // visitLoadInst will propagate an alignment onto the load when TD is around, - // and if TD isn't around, we can't handle the mixed case. - bool isVolatile = FirstLI->isVolatile(); - unsigned LoadAlignment = FirstLI->getAlignment(); - - // We can't sink the load if the loaded value could be modified between the - // load and the PHI. - if (FirstLI->getParent() != PN.getIncomingBlock(0) || - !isSafeAndProfitableToSinkLoad(FirstLI)) - return 0; - - // If the PHI is of volatile loads and the load block has multiple - // successors, sinking it would remove a load of the volatile value from - // the path through the other successor. - if (isVolatile && - FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; - - // Check to see if all arguments are the same operation. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); - if (!LI || !LI->hasOneUse()) - return 0; - - // We can't sink the load if the loaded value could be modified between - // the load and the PHI. - if (LI->isVolatile() != isVolatile || - LI->getParent() != PN.getIncomingBlock(i) || - !isSafeAndProfitableToSinkLoad(LI)) - return 0; - - // If some of the loads have an alignment specified but not all of them, - // we can't do the transformation. - if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) - return 0; - - LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); - - // If the PHI is of volatile loads and the load block has multiple - // successors, sinking it would remove a load of the volatile value from - // the path through the other successor. - if (isVolatile && - LI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; - } - - // Okay, they are all the same operation. Create a new PHI node of the - // correct type, and PHI together all of the LHS's of the instructions. - PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), - PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); - - Value *InVal = FirstLI->getOperand(0); - NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); - - // Add all operands to the new PHI. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0); - if (NewInVal != InVal) - InVal = 0; - NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); - } - - Value *PhiVal; - if (InVal) { - // The new PHI unions all of the same values together. This is really - // common, so we handle it intelligently here for compile-time speed. - PhiVal = InVal; - delete NewPN; - } else { - InsertNewInstBefore(NewPN, PN); - PhiVal = NewPN; - } - - // If this was a volatile load that we are merging, make sure to loop through - // and mark all the input loads as non-volatile. If we don't do this, we will - // insert a new volatile load and the old ones will not be deletable. - if (isVolatile) - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) - cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false); - - return new LoadInst(PhiVal, "", isVolatile, LoadAlignment); -} - - - -/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" -/// operator and they all are only used by the PHI, PHI together their -/// inputs, and do the operation once, to the result of the PHI. -Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { - Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); - - if (isa<GetElementPtrInst>(FirstInst)) - return FoldPHIArgGEPIntoPHI(PN); - if (isa<LoadInst>(FirstInst)) - return FoldPHIArgLoadIntoPHI(PN); - - // Scan the instruction, looking for input operations that can be folded away. - // If all input operands to the phi are the same instruction (e.g. a cast from - // the same type or "+42") we can pull the operation through the PHI, reducing - // code size and simplifying code. - Constant *ConstantOp = 0; - const Type *CastSrcTy = 0; - - if (isa<CastInst>(FirstInst)) { - CastSrcTy = FirstInst->getOperand(0)->getType(); - - // Be careful about transforming integer PHIs. We don't want to pessimize - // the code by turning an i32 into an i1293. - if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) { - if (!ShouldChangeType(PN.getType(), CastSrcTy, TD)) - return 0; - } - } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) { - // Can fold binop, compare or shift here if the RHS is a constant, - // otherwise call FoldPHIArgBinOpIntoPHI. - ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); - if (ConstantOp == 0) - return FoldPHIArgBinOpIntoPHI(PN); - } else { - return 0; // Cannot fold this operation. - } - - // Check to see if all arguments are the same operation. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); - if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) - return 0; - if (CastSrcTy) { - if (I->getOperand(0)->getType() != CastSrcTy) - return 0; // Cast operation must match. - } else if (I->getOperand(1) != ConstantOp) { - return 0; - } - } - - // Okay, they are all the same operation. Create a new PHI node of the - // correct type, and PHI together all of the LHS's of the instructions. - PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), - PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); - - Value *InVal = FirstInst->getOperand(0); - NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); - - // Add all operands to the new PHI. - for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0); - if (NewInVal != InVal) - InVal = 0; - NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); - } - - Value *PhiVal; - if (InVal) { - // The new PHI unions all of the same values together. This is really - // common, so we handle it intelligently here for compile-time speed. - PhiVal = InVal; - delete NewPN; - } else { - InsertNewInstBefore(NewPN, PN); - PhiVal = NewPN; - } - - // Insert and return the new operation. - if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) - return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); - - if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) - return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); - - CmpInst *CIOp = cast<CmpInst>(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), - PhiVal, ConstantOp); -} - -/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle -/// that is dead. -static bool DeadPHICycle(PHINode *PN, - SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) { - if (PN->use_empty()) return true; - if (!PN->hasOneUse()) return false; - - // Remember this node, and if we find the cycle, return. - if (!PotentiallyDeadPHIs.insert(PN)) - return true; - - // Don't scan crazily complex things. - if (PotentiallyDeadPHIs.size() == 16) - return false; - - if (PHINode *PU = dyn_cast<PHINode>(PN->use_back())) - return DeadPHICycle(PU, PotentiallyDeadPHIs); - - return false; -} - -/// PHIsEqualValue - Return true if this phi node is always equal to -/// NonPhiInVal. This happens with mutually cyclic phi nodes like: -/// z = some value; x = phi (y, z); y = phi (x, z) -static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, - SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) { - // See if we already saw this PHI node. - if (!ValueEqualPHIs.insert(PN)) - return true; - - // Don't scan crazily complex things. - if (ValueEqualPHIs.size() == 16) - return false; - - // Scan the operands to see if they are either phi nodes or are equal to - // the value. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *Op = PN->getIncomingValue(i); - if (PHINode *OpPN = dyn_cast<PHINode>(Op)) { - if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) - return false; - } else if (Op != NonPhiInVal) - return false; - } - - return true; -} - - -namespace { -struct PHIUsageRecord { - unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) - unsigned Shift; // The amount shifted. - Instruction *Inst; // The trunc instruction. - - PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) - : PHIId(pn), Shift(Sh), Inst(User) {} - - bool operator<(const PHIUsageRecord &RHS) const { - if (PHIId < RHS.PHIId) return true; - if (PHIId > RHS.PHIId) return false; - if (Shift < RHS.Shift) return true; - if (Shift > RHS.Shift) return false; - return Inst->getType()->getPrimitiveSizeInBits() < - RHS.Inst->getType()->getPrimitiveSizeInBits(); - } -}; - -struct LoweredPHIRecord { - PHINode *PN; // The PHI that was lowered. - unsigned Shift; // The amount shifted. - unsigned Width; // The width extracted. - - LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) - : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} - - // Ctor form used by DenseMap. - LoweredPHIRecord(PHINode *pn, unsigned Sh) - : PN(pn), Shift(Sh), Width(0) {} -}; -} - -namespace llvm { - template<> - struct DenseMapInfo<LoweredPHIRecord> { - static inline LoweredPHIRecord getEmptyKey() { - return LoweredPHIRecord(0, 0); - } - static inline LoweredPHIRecord getTombstoneKey() { - return LoweredPHIRecord(0, 1); - } - static unsigned getHashValue(const LoweredPHIRecord &Val) { - return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ - (Val.Width>>3); - } - static bool isEqual(const LoweredPHIRecord &LHS, - const LoweredPHIRecord &RHS) { - return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && - LHS.Width == RHS.Width; - } - }; - template <> - struct isPodLike<LoweredPHIRecord> { static const bool value = true; }; -} - - -/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an -/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If -/// so, we split the PHI into the various pieces being extracted. This sort of -/// thing is introduced when SROA promotes an aggregate to large integer values. -/// -/// TODO: The user of the trunc may be an bitcast to float/double/vector or an -/// inttoptr. We should produce new PHIs in the right type. -/// -Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { - // PHIUsers - Keep track of all of the truncated values extracted from a set - // of PHIs, along with their offset. These are the things we want to rewrite. - SmallVector<PHIUsageRecord, 16> PHIUsers; - - // PHIs are often mutually cyclic, so we keep track of a whole set of PHI - // nodes which are extracted from. PHIsToSlice is a set we use to avoid - // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to - // check the uses of (to ensure they are all extracts). - SmallVector<PHINode*, 8> PHIsToSlice; - SmallPtrSet<PHINode*, 8> PHIsInspected; - - PHIsToSlice.push_back(&FirstPhi); - PHIsInspected.insert(&FirstPhi); - - for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { - PHINode *PN = PHIsToSlice[PHIId]; - - // Scan the input list of the PHI. If any input is an invoke, and if the - // input is defined in the predecessor, then we won't be split the critical - // edge which is required to insert a truncate. Because of this, we have to - // bail out. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); - if (II == 0) continue; - if (II->getParent() != PN->getIncomingBlock(i)) - continue; - - // If we have a phi, and if it's directly in the predecessor, then we have - // a critical edge where we need to put the truncate. Since we can't - // split the edge in instcombine, we have to bail out. - return 0; - } - - - for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); - UI != E; ++UI) { - Instruction *User = cast<Instruction>(*UI); - - // If the user is a PHI, inspect its uses recursively. - if (PHINode *UserPN = dyn_cast<PHINode>(User)) { - if (PHIsInspected.insert(UserPN)) - PHIsToSlice.push_back(UserPN); - continue; - } - - // Truncates are always ok. - if (isa<TruncInst>(User)) { - PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); - continue; - } - - // Otherwise it must be a lshr which can only be used by one trunc. - if (User->getOpcode() != Instruction::LShr || - !User->hasOneUse() || !isa<TruncInst>(User->use_back()) || - !isa<ConstantInt>(User->getOperand(1))) - return 0; - - unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue(); - PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); - } - } - - // If we have no users, they must be all self uses, just nuke the PHI. - if (PHIUsers.empty()) - return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); - - // If this phi node is transformable, create new PHIs for all the pieces - // extracted out of it. First, sort the users by their offset and size. - array_pod_sort(PHIUsers.begin(), PHIUsers.end()); - - DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; - for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) - errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; - ); - - // PredValues - This is a temporary used when rewriting PHI nodes. It is - // hoisted out here to avoid construction/destruction thrashing. - DenseMap<BasicBlock*, Value*> PredValues; - - // ExtractedVals - Each new PHI we introduce is saved here so we don't - // introduce redundant PHIs. - DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals; - - for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { - unsigned PHIId = PHIUsers[UserI].PHIId; - PHINode *PN = PHIsToSlice[PHIId]; - unsigned Offset = PHIUsers[UserI].Shift; - const Type *Ty = PHIUsers[UserI].Inst->getType(); - - PHINode *EltPHI; - - // If we've already lowered a user like this, reuse the previously lowered - // value. - if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { - - // Otherwise, Create the new PHI node for this user. - EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); - assert(EltPHI->getType() != PN->getType() && - "Truncate didn't shrink phi?"); - - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *Pred = PN->getIncomingBlock(i); - Value *&PredVal = PredValues[Pred]; - - // If we already have a value for this predecessor, reuse it. - if (PredVal) { - EltPHI->addIncoming(PredVal, Pred); - continue; - } - - // Handle the PHI self-reuse case. - Value *InVal = PN->getIncomingValue(i); - if (InVal == PN) { - PredVal = EltPHI; - EltPHI->addIncoming(PredVal, Pred); - continue; - } - - if (PHINode *InPHI = dyn_cast<PHINode>(PN)) { - // If the incoming value was a PHI, and if it was one of the PHIs we - // already rewrote it, just use the lowered value. - if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { - PredVal = Res; - EltPHI->addIncoming(PredVal, Pred); - continue; - } - } - - // Otherwise, do an extract in the predecessor. - Builder->SetInsertPoint(Pred, Pred->getTerminator()); - Value *Res = InVal; - if (Offset) - Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), - Offset), "extract"); - Res = Builder->CreateTrunc(Res, Ty, "extract.t"); - PredVal = Res; - EltPHI->addIncoming(Res, Pred); - - // If the incoming value was a PHI, and if it was one of the PHIs we are - // rewriting, we will ultimately delete the code we inserted. This - // means we need to revisit that PHI to make sure we extract out the - // needed piece. - if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i))) - if (PHIsInspected.count(OldInVal)) { - unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), - OldInVal)-PHIsToSlice.begin(); - PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, - cast<Instruction>(Res))); - ++UserE; - } - } - PredValues.clear(); - - DEBUG(errs() << " Made element PHI for offset " << Offset << ": " - << *EltPHI << '\n'); - ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; - } - - // Replace the use of this piece with the PHI node. - ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); - } - - // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) - // with undefs. - Value *Undef = UndefValue::get(FirstPhi.getType()); - for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) - ReplaceInstUsesWith(*PHIsToSlice[i], Undef); - return ReplaceInstUsesWith(FirstPhi, Undef); -} - -// PHINode simplification -// -Instruction *InstCombiner::visitPHINode(PHINode &PN) { - // If LCSSA is around, don't mess with Phi nodes - if (MustPreserveLCSSA) return 0; - - if (Value *V = PN.hasConstantValue()) - return ReplaceInstUsesWith(PN, V); - - // If all PHI operands are the same operation, pull them through the PHI, - // reducing code size. - if (isa<Instruction>(PN.getIncomingValue(0)) && - isa<Instruction>(PN.getIncomingValue(1)) && - cast<Instruction>(PN.getIncomingValue(0))->getOpcode() == - cast<Instruction>(PN.getIncomingValue(1))->getOpcode() && - // FIXME: The hasOneUse check will fail for PHIs that use the value more - // than themselves more than once. - PN.getIncomingValue(0)->hasOneUse()) - if (Instruction *Result = FoldPHIArgOpIntoPHI(PN)) - return Result; - - // If this is a trivial cycle in the PHI node graph, remove it. Basically, if - // this PHI only has a single use (a PHI), and if that PHI only has one use (a - // PHI)... break the cycle. - if (PN.hasOneUse()) { - Instruction *PHIUser = cast<Instruction>(PN.use_back()); - if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) { - SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs; - PotentiallyDeadPHIs.insert(&PN); - if (DeadPHICycle(PU, PotentiallyDeadPHIs)) - return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); - } - - // If this phi has a single use, and if that use just computes a value for - // the next iteration of a loop, delete the phi. This occurs with unused - // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this - // common case here is good because the only other things that catch this - // are induction variable analysis (sometimes) and ADCE, which is only run - // late. - if (PHIUser->hasOneUse() && - (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) && - PHIUser->use_back() == &PN) { - return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); - } - } - - // We sometimes end up with phi cycles that non-obviously end up being the - // same value, for example: - // z = some value; x = phi (y, z); y = phi (x, z) - // where the phi nodes don't necessarily need to be in the same block. Do a - // quick check to see if the PHI node only contains a single non-phi value, if - // so, scan to see if the phi cycle is actually equal to that value. - { - unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); - // Scan for the first non-phi operand. - while (InValNo != NumOperandVals && - isa<PHINode>(PN.getIncomingValue(InValNo))) - ++InValNo; - - if (InValNo != NumOperandVals) { - Value *NonPhiInVal = PN.getOperand(InValNo); - - // Scan the rest of the operands to see if there are any conflicts, if so - // there is no need to recursively scan other phis. - for (++InValNo; InValNo != NumOperandVals; ++InValNo) { - Value *OpVal = PN.getIncomingValue(InValNo); - if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal)) - break; - } - - // If we scanned over all operands, then we have one unique value plus - // phi values. Scan PHI nodes to see if they all merge in each other or - // the value. - if (InValNo == NumOperandVals) { - SmallPtrSet<PHINode*, 16> ValueEqualPHIs; - if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) - return ReplaceInstUsesWith(PN, NonPhiInVal); - } - } - } - - // If there are multiple PHIs, sort their operands so that they all list - // the blocks in the same order. This will help identical PHIs be eliminated - // by other passes. Other passes shouldn't depend on this for correctness - // however. - PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin()); - if (&PN != FirstPN) - for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { - BasicBlock *BBA = PN.getIncomingBlock(i); - BasicBlock *BBB = FirstPN->getIncomingBlock(i); - if (BBA != BBB) { - Value *VA = PN.getIncomingValue(i); - unsigned j = PN.getBasicBlockIndex(BBB); - Value *VB = PN.getIncomingValue(j); - PN.setIncomingBlock(i, BBB); - PN.setIncomingValue(i, VB); - PN.setIncomingBlock(j, BBA); - PN.setIncomingValue(j, VA); - // NOTE: Instcombine normally would want us to "return &PN" if we - // modified any of the operands of an instruction. However, since we - // aren't adding or removing uses (just rearranging them) we don't do - // this in this case. - } - } - - // If this is an integer PHI and we know that it has an illegal type, see if - // it is only used by trunc or trunc(lshr) operations. If so, we split the - // PHI into the various pieces being extracted. This sort of thing is - // introduced when SROA promotes an aggregate to a single large integer type. - if (isa<IntegerType>(PN.getType()) && TD && - !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) - if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) - return Res; - - return 0; -} - -Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { - SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); - - if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) - return ReplaceInstUsesWith(GEP, V); - - Value *PtrOp = GEP.getOperand(0); - - if (isa<UndefValue>(GEP.getOperand(0))) - return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); - - // Eliminate unneeded casts for indices. - if (TD) { - bool MadeChange = false; - unsigned PtrSize = TD->getPointerSizeInBits(); - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); - I != E; ++I, ++GTI) { - if (!isa<SequentialType>(*GTI)) continue; - - // If we are using a wider index than needed for this platform, shrink it - // to what we need. If narrower, sign-extend it to what we need. This - // explicit cast can make subsequent optimizations more obvious. - unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth(); - if (OpBits == PtrSize) - continue; - - *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); - MadeChange = true; - } - if (MadeChange) return &GEP; - } - - // Combine Indices - If the source pointer to this getelementptr instruction - // is a getelementptr instruction, combine the indices of the two - // getelementptr instructions into a single instruction. - // - if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { - // Note that if our source is a gep chain itself that we wait for that - // chain to be resolved before we perform this transformation. This - // avoids us creating a TON of code in some cases. - // - if (GetElementPtrInst *SrcGEP = - dyn_cast<GetElementPtrInst>(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2) - return 0; // Wait until our source is folded to completion. - - SmallVector<Value*, 8> Indices; - - // Find out whether the last index in the source GEP is a sequential idx. - bool EndsWithSequential = false; - for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); - I != E; ++I) - EndsWithSequential = !isa<StructType>(*I); - - // Can we combine the two pointer arithmetics offsets? - if (EndsWithSequential) { - // Replace: gep (gep %P, long B), long A, ... - // With: T = long A+B; gep %P, T, ... - // - Value *Sum; - Value *SO1 = Src->getOperand(Src->getNumOperands()-1); - Value *GO1 = GEP.getOperand(1); - if (SO1 == Constant::getNullValue(SO1->getType())) { - Sum = GO1; - } else if (GO1 == Constant::getNullValue(GO1->getType())) { - Sum = SO1; - } else { - // If they aren't the same type, then the input hasn't been processed - // by the loop above yet (which canonicalizes sequential index types to - // intptr_t). Just avoid transforming this until the input has been - // normalized. - if (SO1->getType() != GO1->getType()) - return 0; - Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); - } - - // Update the GEP in place if possible. - if (Src->getNumOperands() == 2) { - GEP.setOperand(0, Src->getOperand(0)); - GEP.setOperand(1, Sum); - return &GEP; - } - Indices.append(Src->op_begin()+1, Src->op_end()-1); - Indices.push_back(Sum); - Indices.append(GEP.op_begin()+2, GEP.op_end()); - } else if (isa<Constant>(*GEP.idx_begin()) && - cast<Constant>(*GEP.idx_begin())->isNullValue() && - Src->getNumOperands() != 1) { - // Otherwise we can do the fold if the first index of the GEP is a zero - Indices.append(Src->op_begin()+1, Src->op_end()); - Indices.append(GEP.idx_begin()+1, GEP.idx_end()); - } - - if (!Indices.empty()) - return (cast<GEPOperator>(&GEP)->isInBounds() && - Src->isInBounds()) ? - GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), - Indices.end(), GEP.getName()) : - GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), - Indices.end(), GEP.getName()); - } - - // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). - if (Value *X = getBitCastOperand(PtrOp)) { - assert(isa<PointerType>(X->getType()) && "Must be cast from pointer"); - - // If the input bitcast is actually "bitcast(bitcast(x))", then we don't - // want to change the gep until the bitcasts are eliminated. - if (getBitCastOperand(X)) { - Worklist.AddValue(PtrOp); - return 0; - } - - bool HasZeroPointerIndex = false; - if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) - HasZeroPointerIndex = C->isZero(); - - // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... - // into : GEP [10 x i8]* X, i32 0, ... - // - // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... - // into : GEP i8* X, ... - // - // This occurs when the program declares an array extern like "int X[];" - if (HasZeroPointerIndex) { - const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); - const PointerType *XTy = cast<PointerType>(X->getType()); - if (const ArrayType *CATy = - dyn_cast<ArrayType>(CPTy->getElementType())) { - // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? - if (CATy->getElementType() == XTy->getElementType()) { - // -> GEP i8* X, ... - SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end()); - return cast<GEPOperator>(&GEP)->isInBounds() ? - GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(), - GEP.getName()) : - GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), - GEP.getName()); - } - - if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){ - // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? - if (CATy->getElementType() == XATy->getElementType()) { - // -> GEP [10 x i8]* X, i32 0, ... - // At this point, we know that the cast source type is a pointer - // to an array of the same type as the destination pointer - // array. Because the array type is never stepped over (there - // is a leading zero) we can fold the cast into this GEP. - GEP.setOperand(0, X); - return &GEP; - } - } - } - } else if (GEP.getNumOperands() == 2) { - // Transform things like: - // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V - // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast - const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType(); - const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); - if (TD && isa<ArrayType>(SrcElTy) && - TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) == - TD->getTypeAllocSize(ResElTy)) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); - Idx[1] = GEP.getOperand(1); - Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : - Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); - // V and GEP are both pointer types --> BitCast - return new BitCastInst(NewGEP, GEP.getType()); - } - - // Transform things like: - // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp - // (where tmp = 8*tmp2) into: - // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - - if (TD && isa<ArrayType>(SrcElTy) && ResElTy == Type::getInt8Ty(*Context)) { - uint64_t ArrayEltSize = - TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); - - // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We - // allow either a mul, shift, or constant here. - Value *NewIdx = 0; - ConstantInt *Scale = 0; - if (ArrayEltSize == 1) { - NewIdx = GEP.getOperand(1); - Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); - } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) { - NewIdx = ConstantInt::get(CI->getType(), 1); - Scale = CI; - } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){ - if (Inst->getOpcode() == Instruction::Shl && - isa<ConstantInt>(Inst->getOperand(1))) { - ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); - uint32_t ShAmtVal = ShAmt->getLimitedValue(64); - Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), - 1ULL << ShAmtVal); - NewIdx = Inst->getOperand(0); - } else if (Inst->getOpcode() == Instruction::Mul && - isa<ConstantInt>(Inst->getOperand(1))) { - Scale = cast<ConstantInt>(Inst->getOperand(1)); - NewIdx = Inst->getOperand(0); - } - } - - // If the index will be to exactly the right offset with the scale taken - // out, perform the transformation. Note, we don't know whether Scale is - // signed or not. We'll use unsigned version of division/modulo - // operation after making sure Scale doesn't have the sign bit set. - if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && - Scale->getZExtValue() % ArrayEltSize == 0) { - Scale = ConstantInt::get(Scale->getType(), - Scale->getZExtValue() / ArrayEltSize); - if (Scale->getZExtValue() != 1) { - Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), - false /*ZExt*/); - NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); - } - - // Insert the new GEP instruction. - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); - Idx[1] = NewIdx; - Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : - Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); - // The NewGEP must be pointer typed, so must the old one -> BitCast - return new BitCastInst(NewGEP, GEP.getType()); - } - } - } - } - - /// See if we can simplify: - /// X = bitcast A* to B* - /// Y = gep X, <...constant indices...> - /// into a gep of the original struct. This is important for SROA and alias - /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { - if (TD && - !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { - // Determine how much the GEP moves the pointer. We are guaranteed to get - // a constant back from EmitGEPOffset. - ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP, *this)); - int64_t Offset = OffsetV->getSExtValue(); - - // If this GEP instruction doesn't move the pointer, just replace the GEP - // with a bitcast of the real input to the dest type. - if (Offset == 0) { - // If the bitcast is of an allocation, and the allocation will be - // converted to match the type of the cast, don't touch this. - if (isa<AllocaInst>(BCI->getOperand(0)) || - isMalloc(BCI->getOperand(0))) { - // See if the bitcast simplifies, if so, don't nuke this GEP yet. - if (Instruction *I = visitBitCast(*BCI)) { - if (I != BCI) { - I->takeName(BCI); - BCI->getParent()->getInstList().insert(BCI, I); - ReplaceInstUsesWith(*BCI, I); - } - return &GEP; - } - } - return new BitCastInst(BCI->getOperand(0), GEP.getType()); - } - - // Otherwise, if the offset is non-zero, we need to find out if there is a - // field at Offset in 'A's type. If so, we can pull the cast through the - // GEP. - SmallVector<Value*, 8> NewIndices; - const Type *InTy = - cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); - if (FindElementAtOffset(InTy, Offset, NewIndices, TD, Context)) { - Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()) : - Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()); - - if (NGEP->getType() == GEP.getType()) - return ReplaceInstUsesWith(GEP, NGEP); - NGEP->takeName(&GEP); - return new BitCastInst(NGEP, GEP.getType()); - } - } - } - - return 0; -} - -Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { - // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 - if (AI.isArrayAllocation()) { // Check C != 1 - if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - const Type *NewTy = - ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); - AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); - New->setAlignment(AI.getAlignment()); - - // Scan to the end of the allocation instructions, to skip over a block of - // allocas if possible...also skip interleaved debug info - // - BasicBlock::iterator It = New; - while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; - - // Now that I is pointing to the first non-allocation-inst in the block, - // insert our getelementptr instruction... - // - Value *NullIdx = Constant::getNullValue(Type::getInt32Ty(*Context)); - Value *Idx[2]; - Idx[0] = NullIdx; - Idx[1] = NullIdx; - Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, - New->getName()+".sub", It); - - // Now make everything use the getelementptr instead of the original - // allocation. - return ReplaceInstUsesWith(AI, V); - } else if (isa<UndefValue>(AI.getArraySize())) { - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - } - } - - if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) { - // If alloca'ing a zero byte object, replace the alloca with a null pointer. - // Note that we only do this for alloca's, because malloc should allocate - // and return a unique pointer, even for a zero byte allocation. - if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - - // If the alignment is 0 (unspecified), assign it the preferred alignment. - if (AI.getAlignment() == 0) - AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); - } - - return 0; -} - -Instruction *InstCombiner::visitFree(Instruction &FI) { - Value *Op = FI.getOperand(1); - - // free undef -> unreachable. - if (isa<UndefValue>(Op)) { - // Insert a new store to null because we cannot modify the CFG here. - new StoreInst(ConstantInt::getTrue(*Context), - UndefValue::get(Type::getInt1PtrTy(*Context)), &FI); - return EraseInstFromFunction(FI); - } - - // If we have 'free null' delete the instruction. This can happen in stl code - // when lots of inlining happens. - if (isa<ConstantPointerNull>(Op)) - return EraseInstFromFunction(FI); - - // If we have a malloc call whose only use is a free call, delete both. - if (isMalloc(Op)) { - if (CallInst* CI = extractMallocCallFromBitCast(Op)) { - if (Op->hasOneUse() && CI->hasOneUse()) { - EraseInstFromFunction(FI); - EraseInstFromFunction(*CI); - return EraseInstFromFunction(*cast<Instruction>(Op)); - } - } else { - // Op is a call to malloc - if (Op->hasOneUse()) { - EraseInstFromFunction(FI); - return EraseInstFromFunction(*cast<Instruction>(Op)); - } - } - } - - return 0; -} - -/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. -static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, - const TargetData *TD) { - User *CI = cast<User>(LI.getOperand(0)); - Value *CastOp = CI->getOperand(0); - LLVMContext *Context = IC.getContext(); - - const PointerType *DestTy = cast<PointerType>(CI->getType()); - const Type *DestPTy = DestTy->getElementType(); - if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { - - // If the address spaces don't match, don't eliminate the cast. - if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) - return 0; - - const Type *SrcPTy = SrcTy->getElementType(); - - if (DestPTy->isInteger() || isa<PointerType>(DestPTy) || - isa<VectorType>(DestPTy)) { - // If the source is an array, the code below will not succeed. Check to - // see if a trivial 'gep P, 0, 0' will help matters. Only do this for - // constants. - if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) - if (Constant *CSrc = dyn_cast<Constant>(CastOp)) - if (ASrcTy->getNumElements() != 0) { - Value *Idxs[2]; - Idxs[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); - Idxs[1] = Idxs[0]; - CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); - SrcTy = cast<PointerType>(CastOp->getType()); - SrcPTy = SrcTy->getElementType(); - } - - if (IC.getTargetData() && - (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || - isa<VectorType>(SrcPTy)) && - // Do not allow turning this into a load of an integer, which is then - // casted to a pointer, this pessimizes pointer analysis a lot. - (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) && - IC.getTargetData()->getTypeSizeInBits(SrcPTy) == - IC.getTargetData()->getTypeSizeInBits(DestPTy)) { - - // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before the load, cast - // the result of the loaded value. - Value *NewLoad = - IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); - // Now cast the result of the load. - return new BitCastInst(NewLoad, LI.getType()); - } - } - } - return 0; -} - -Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { - Value *Op = LI.getOperand(0); - - // Attempt to improve the alignment. - if (TD) { - unsigned KnownAlign = - GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); - if (KnownAlign > - (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : - LI.getAlignment())) - LI.setAlignment(KnownAlign); - } - - // load (cast X) --> cast (load X) iff safe. - if (isa<CastInst>(Op)) - if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) - return Res; - - // None of the following transforms are legal for volatile loads. - if (LI.isVolatile()) return 0; - - // Do really simple store-to-load forwarding and load CSE, to catch cases - // where there are several consequtive memory accesses to the same location, - // separated by a few arithmetic operations. - BasicBlock::iterator BBI = &LI; - if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) - return ReplaceInstUsesWith(LI, AvailableVal); - - // load(gep null, ...) -> unreachable - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { - const Value *GEPI0 = GEPI->getOperand(0); - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ - // Insert a new store to null instruction before the load to indicate - // that this code is not reachable. We do this instead of inserting - // an unreachable instruction directly because we cannot modify the - // CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - } - - // load null/undef -> unreachable - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa<UndefValue>(Op) || - (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { - // Insert a new store to null instruction before the load to indicate that - // this code is not reachable. We do this instead of inserting an - // unreachable instruction directly because we cannot modify the CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - - // Instcombine load (constantexpr_cast global) -> cast (load global) - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) - if (CE->isCast()) - if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) - return Res; - - if (Op->hasOneUse()) { - // Change select and PHI nodes to select values instead of addresses: this - // helps alias analysis out a lot, allows many others simplifications, and - // exposes redundancy in the code. - // - // Note that we cannot do the transformation unless we know that the - // introduced loads cannot trap! Something like this is valid as long as - // the condition is always false: load (select bool %C, int* null, int* %G), - // but it would not be valid if we transformed it to load from null - // unconditionally. - // - if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { - // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). - if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) && - isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { - Value *V1 = Builder->CreateLoad(SI->getOperand(1), - SI->getOperand(1)->getName()+".val"); - Value *V2 = Builder->CreateLoad(SI->getOperand(2), - SI->getOperand(2)->getName()+".val"); - return SelectInst::Create(SI->getCondition(), V1, V2); - } - - // load (select (cond, null, P)) -> load P - if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) - if (C->isNullValue()) { - LI.setOperand(0, SI->getOperand(2)); - return &LI; - } - - // load (select (cond, P, null)) -> load P - if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) - if (C->isNullValue()) { - LI.setOperand(0, SI->getOperand(1)); - return &LI; - } - } - } - return 0; -} - -/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P -/// when possible. This makes it generally easy to do alias analysis and/or -/// SROA/mem2reg of the memory object. -static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { - User *CI = cast<User>(SI.getOperand(1)); - Value *CastOp = CI->getOperand(0); - - const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); - const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); - if (SrcTy == 0) return 0; - - const Type *SrcPTy = SrcTy->getElementType(); - - if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy)) - return 0; - - /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" - /// to its first element. This allows us to handle things like: - /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) - /// on 32-bit hosts. - SmallVector<Value*, 4> NewGEPIndices; - - // If the source is an array, the code below will not succeed. Check to - // see if a trivial 'gep P, 0, 0' will help matters. Only do this for - // constants. - if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) { - // Index through pointer. - Constant *Zero = Constant::getNullValue(Type::getInt32Ty(*IC.getContext())); - NewGEPIndices.push_back(Zero); - - while (1) { - if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) { - if (!STy->getNumElements()) /* Struct can be empty {} */ - break; - NewGEPIndices.push_back(Zero); - SrcPTy = STy->getElementType(0); - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { - NewGEPIndices.push_back(Zero); - SrcPTy = ATy->getElementType(); - } else { - break; - } - } - - SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); - } - - if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy)) - return 0; - - // If the pointers point into different address spaces or if they point to - // values with different sizes, we can't do the transformation. - if (!IC.getTargetData() || - SrcTy->getAddressSpace() != - cast<PointerType>(CI->getType())->getAddressSpace() || - IC.getTargetData()->getTypeSizeInBits(SrcPTy) != - IC.getTargetData()->getTypeSizeInBits(DestPTy)) - return 0; - - // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before - // the store, cast the value to be stored. - Value *NewCast; - Value *SIOp0 = SI.getOperand(0); - Instruction::CastOps opcode = Instruction::BitCast; - const Type* CastSrcTy = SIOp0->getType(); - const Type* CastDstTy = SrcPTy; - if (isa<PointerType>(CastDstTy)) { - if (CastSrcTy->isInteger()) - opcode = Instruction::IntToPtr; - } else if (isa<IntegerType>(CastDstTy)) { - if (isa<PointerType>(SIOp0->getType())) - opcode = Instruction::PtrToInt; - } - - // SIOp0 is a pointer to aggregate and this is a store to the first field, - // emit a GEP to index into its first field. - if (!NewGEPIndices.empty()) - CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), - NewGEPIndices.end()); - - NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, - SIOp0->getName()+".c"); - return new StoreInst(NewCast, CastOp); -} - -/// equivalentAddressValues - Test if A and B will obviously have the same -/// value. This includes recognizing that %t0 and %t1 will have the same -/// value in code like this: -/// %t0 = getelementptr \@a, 0, 3 -/// store i32 0, i32* %t0 -/// %t1 = getelementptr \@a, 0, 3 -/// %t2 = load i32* %t1 -/// -static bool equivalentAddressValues(Value *A, Value *B) { - // Test if the values are trivially equivalent. - if (A == B) return true; - - // Test if the values come form identical arithmetic instructions. - // This uses isIdenticalToWhenDefined instead of isIdenticalTo because - // its only used to compare two uses within the same basic block, which - // means that they'll always either have the same value or one of them - // will have an undefined value. - if (isa<BinaryOperator>(A) || - isa<CastInst>(A) || - isa<PHINode>(A) || - isa<GetElementPtrInst>(A)) - if (Instruction *BI = dyn_cast<Instruction>(B)) - if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) - return true; - - // Otherwise they may not be equivalent. - return false; -} - -// If this instruction has two uses, one of which is a llvm.dbg.declare, -// return the llvm.dbg.declare. -DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { - if (!V->hasNUses(2)) - return 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI)) - return DI; - if (isa<BitCastInst>(UI) && UI->hasOneUse()) { - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin())) - return DI; - } - } - return 0; -} - -Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { - Value *Val = SI.getOperand(0); - Value *Ptr = SI.getOperand(1); - - // If the RHS is an alloca with a single use, zapify the store, making the - // alloca dead. - // If the RHS is an alloca with a two uses, the other one being a - // llvm.dbg.declare, zapify the store and the declare, making the - // alloca dead. We must do this to prevent declare's from affecting - // codegen. - if (!SI.isVolatile()) { - if (Ptr->hasOneUse()) { - if (isa<AllocaInst>(Ptr)) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { - if (isa<AllocaInst>(GEP->getOperand(0))) { - if (GEP->getOperand(0)->hasOneUse()) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { - EraseInstFromFunction(*DI); - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - } - } - } - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { - EraseInstFromFunction(*DI); - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - } - - // Attempt to improve the alignment. - if (TD) { - unsigned KnownAlign = - GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); - if (KnownAlign > - (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : - SI.getAlignment())) - SI.setAlignment(KnownAlign); - } - - // Do really simple DSE, to catch cases where there are several consecutive - // stores to the same location, separated by a few arithmetic operations. This - // situation often occurs with bitfield accesses. - BasicBlock::iterator BBI = &SI; - for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; - --ScanInsts) { - --BBI; - // Don't count debug info directives, lest they affect codegen, - // and we skip pointer-to-pointer bitcasts, which are NOPs. - // It is necessary for correctness to skip those that feed into a - // llvm.dbg.declare, as these are not present when debugging is off. - if (isa<DbgInfoIntrinsic>(BBI) || - (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) { - ScanInsts++; - continue; - } - - if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { - // Prev store isn't volatile, and stores to the same location? - if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), - SI.getOperand(1))) { - ++NumDeadStore; - ++BBI; - EraseInstFromFunction(*PrevSI); - continue; - } - break; - } - - // If this is a load, we have to stop. However, if the loaded value is from - // the pointer we're loading and is producing the pointer we're storing, - // then *this* store is dead (X = load P; store X -> P). - if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { - if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - !SI.isVolatile()) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - // Otherwise, this is a load from some other location. Stores before it - // may not be dead. - break; - } - - // Don't skip over loads or things that can modify memory. - if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) - break; - } - - - if (SI.isVolatile()) return 0; // Don't hack volatile stores. - - // store X, null -> turns into 'unreachable' in SimplifyCFG - if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { - if (!isa<UndefValue>(Val)) { - SI.setOperand(0, UndefValue::get(Val->getType())); - if (Instruction *U = dyn_cast<Instruction>(Val)) - Worklist.Add(U); // Dropped a use. - ++NumCombined; - } - return 0; // Do not modify these! - } - - // store undef, Ptr -> noop - if (isa<UndefValue>(Val)) { - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - - // If the pointer destination is a cast, see if we can fold the cast into the - // source instead. - if (isa<CastInst>(Ptr)) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) - if (CE->isCast()) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - - - // If this store is the last instruction in the basic block (possibly - // excepting debug info instructions and the pointer bitcasts that feed - // into them), and if the block ends with an unconditional branch, try - // to move it to the successor block. - BBI = &SI; - do { - ++BBI; - } while (isa<DbgInfoIntrinsic>(BBI) || - (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))); - if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) - if (BI->isUnconditional()) - if (SimplifyStoreAtEndOfBlock(SI)) - return 0; // xform done! - - return 0; -} - -/// SimplifyStoreAtEndOfBlock - Turn things like: -/// if () { *P = v1; } else { *P = v2 } -/// into a phi node with a store in the successor. -/// -/// Simplify things like: -/// *P = v1; if () { *P = v2; } -/// into a phi node with a store in the successor. -/// -bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { - BasicBlock *StoreBB = SI.getParent(); - - // Check to see if the successor block has exactly two incoming edges. If - // so, see if the other predecessor contains a store to the same location. - // if so, insert a PHI node (if needed) and move the stores down. - BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); - - // Determine whether Dest has exactly two predecessors and, if so, compute - // the other predecessor. - pred_iterator PI = pred_begin(DestBB); - BasicBlock *OtherBB = 0; - if (*PI != StoreBB) - OtherBB = *PI; - ++PI; - if (PI == pred_end(DestBB)) - return false; - - if (*PI != StoreBB) { - if (OtherBB) - return false; - OtherBB = *PI; - } - if (++PI != pred_end(DestBB)) - return false; - - // Bail out if all the relevant blocks aren't distinct (this can happen, - // for example, if SI is in an infinite loop) - if (StoreBB == DestBB || OtherBB == DestBB) - return false; - - // Verify that the other block ends in a branch and is not otherwise empty. - BasicBlock::iterator BBI = OtherBB->getTerminator(); - BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); - if (!OtherBr || BBI == OtherBB->begin()) - return false; - - // If the other block ends in an unconditional branch, check for the 'if then - // else' case. there is an instruction before the branch. - StoreInst *OtherStore = 0; - if (OtherBr->isUnconditional()) { - --BBI; - // Skip over debugging info. - while (isa<DbgInfoIntrinsic>(BBI) || - (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) { - if (BBI==OtherBB->begin()) - return false; - --BBI; - } - // If this isn't a store, isn't a store to the same location, or if the - // alignments differ, bail out. - OtherStore = dyn_cast<StoreInst>(BBI); - if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) - return false; - } else { - // Otherwise, the other block ended with a conditional branch. If one of the - // destinations is StoreBB, then we have the if/then case. - if (OtherBr->getSuccessor(0) != StoreBB && - OtherBr->getSuccessor(1) != StoreBB) - return false; - - // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an - // if/then triangle. See if there is a store to the same ptr as SI that - // lives in OtherBB. - for (;; --BBI) { - // Check to see if we find the matching store. - if ((OtherStore = dyn_cast<StoreInst>(BBI))) { - if (OtherStore->getOperand(1) != SI.getOperand(1) || - OtherStore->getAlignment() != SI.getAlignment()) - return false; - break; - } - // If we find something that may be using or overwriting the stored - // value, or if we run out of instructions, we can't do the xform. - if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || - BBI == OtherBB->begin()) - return false; - } - - // In order to eliminate the store in OtherBr, we have to - // make sure nothing reads or overwrites the stored value in - // StoreBB. - for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { - // FIXME: This should really be AA driven. - if (I->mayReadFromMemory() || I->mayWriteToMemory()) - return false; - } - } - - // Insert a PHI node now if we need it. - Value *MergedVal = OtherStore->getOperand(0); - if (MergedVal != SI.getOperand(0)) { - PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); - PN->reserveOperandSpace(2); - PN->addIncoming(SI.getOperand(0), SI.getParent()); - PN->addIncoming(OtherStore->getOperand(0), OtherBB); - MergedVal = InsertNewInstBefore(PN, DestBB->front()); - } - - // Advance to a place where it is safe to insert the new store and - // insert it. - BBI = DestBB->getFirstNonPHI(); - InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1), - OtherStore->isVolatile(), - SI.getAlignment()), *BBI); - - // Nuke the old stores. - EraseInstFromFunction(SI); - EraseInstFromFunction(*OtherStore); - ++NumCombined; - return true; -} - - -Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { - // Change br (not X), label True, label False to: br X, label False, True - Value *X = 0; - BasicBlock *TrueDest; - BasicBlock *FalseDest; - if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && - !isa<Constant>(X)) { - // Swap Destinations and condition... - BI.setCondition(X); - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); - return &BI; - } - - // Cannonicalize fcmp_one -> fcmp_oeq - FCmpInst::Predicate FPred; Value *Y; - if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest)) && - BI.getCondition()->hasOneUse()) - if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || - FPred == FCmpInst::FCMP_OGE) { - FCmpInst *Cond = cast<FCmpInst>(BI.getCondition()); - Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); - - // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); - Worklist.Add(Cond); - return &BI; - } - - // Cannonicalize icmp_ne -> icmp_eq - ICmpInst::Predicate IPred; - if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest)) && - BI.getCondition()->hasOneUse()) - if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || - IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || - IPred == ICmpInst::ICMP_SGE) { - ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); - Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); - // Swap Destinations and condition. - BI.setSuccessor(0, FalseDest); - BI.setSuccessor(1, TrueDest); - Worklist.Add(Cond); - return &BI; - } - - return 0; -} - -Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { - Value *Cond = SI.getCondition(); - if (Instruction *I = dyn_cast<Instruction>(Cond)) { - if (I->getOpcode() == Instruction::Add) - if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { - // change 'switch (X+4) case 1:' into 'switch (X) case -3' - for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) - SI.setOperand(i, - ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)), - AddRHS)); - SI.setOperand(0, I->getOperand(0)); - Worklist.Add(I); - return &SI; - } - } - return 0; -} - -Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { - Value *Agg = EV.getAggregateOperand(); - - if (!EV.hasIndices()) - return ReplaceInstUsesWith(EV, Agg); - - if (Constant *C = dyn_cast<Constant>(Agg)) { - if (isa<UndefValue>(C)) - return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); - - if (isa<ConstantAggregateZero>(C)) - return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); - - if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) { - // Extract the element indexed by the first index out of the constant - Value *V = C->getOperand(*EV.idx_begin()); - if (EV.getNumIndices() > 1) - // Extract the remaining indices out of the constant indexed by the - // first index - return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); - else - return ReplaceInstUsesWith(EV, V); - } - return 0; // Can't handle other constants - } - if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { - // We're extracting from an insertvalue instruction, compare the indices - const unsigned *exti, *exte, *insi, *inse; - for (exti = EV.idx_begin(), insi = IV->idx_begin(), - exte = EV.idx_end(), inse = IV->idx_end(); - exti != exte && insi != inse; - ++exti, ++insi) { - if (*insi != *exti) - // The insert and extract both reference distinctly different elements. - // This means the extract is not influenced by the insert, and we can - // replace the aggregate operand of the extract with the aggregate - // operand of the insert. i.e., replace - // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 - // %E = extractvalue { i32, { i32 } } %I, 0 - // with - // %E = extractvalue { i32, { i32 } } %A, 0 - return ExtractValueInst::Create(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); - } - if (exti == exte && insi == inse) - // Both iterators are at the end: Index lists are identical. Replace - // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 - // %C = extractvalue { i32, { i32 } } %B, 1, 0 - // with "i32 42" - return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand()); - if (exti == exte) { - // The extract list is a prefix of the insert list. i.e. replace - // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 - // %E = extractvalue { i32, { i32 } } %I, 1 - // with - // %X = extractvalue { i32, { i32 } } %A, 1 - // %E = insertvalue { i32 } %X, i32 42, 0 - // by switching the order of the insert and extract (though the - // insertvalue should be left in, since it may have other uses). - Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); - return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), - insi, inse); - } - if (insi == inse) - // The insert list is a prefix of the extract list - // We can simply remove the common indices from the extract and make it - // operate on the inserted value instead of the insertvalue result. - // i.e., replace - // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 - // %E = extractvalue { i32, { i32 } } %I, 1, 0 - // with - // %E extractvalue { i32 } { i32 42 }, 0 - return ExtractValueInst::Create(IV->getInsertedValueOperand(), - exti, exte); - } - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { - // We're extracting from an intrinsic, see if we're the only user, which - // allows us to simplify multiple result intrinsics to simpler things that - // just get one value.. - if (II->hasOneUse()) { - // Check if we're grabbing the overflow bit or the result of a 'with - // overflow' intrinsic. If it's the latter we can remove the intrinsic - // and replace it with a traditional binary instruction. - switch (II->getIntrinsicID()) { - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); - II->replaceAllUsesWith(UndefValue::get(II->getType())); - EraseInstFromFunction(*II); - return BinaryOperator::CreateAdd(LHS, RHS); - } - break; - case Intrinsic::usub_with_overflow: - case Intrinsic::ssub_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); - II->replaceAllUsesWith(UndefValue::get(II->getType())); - EraseInstFromFunction(*II); - return BinaryOperator::CreateSub(LHS, RHS); - } - break; - case Intrinsic::umul_with_overflow: - case Intrinsic::smul_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); - II->replaceAllUsesWith(UndefValue::get(II->getType())); - EraseInstFromFunction(*II); - return BinaryOperator::CreateMul(LHS, RHS); - } - break; - default: - break; - } - } - } - // Can't simplify extracts from other values. Note that nested extracts are - // already simplified implicitely by the above (extract ( extract (insert) ) - // will be translated into extract ( insert ( extract ) ) first and then just - // the value inserted, if appropriate). - return 0; -} - -/// CheapToScalarize - Return true if the value is cheaper to scalarize than it -/// is to leave as a vector operation. -static bool CheapToScalarize(Value *V, bool isConstant) { - if (isa<ConstantAggregateZero>(V)) - return true; - if (ConstantVector *C = dyn_cast<ConstantVector>(V)) { - if (isConstant) return true; - // If all elts are the same, we can extract. - Constant *Op0 = C->getOperand(0); - for (unsigned i = 1; i < C->getNumOperands(); ++i) - if (C->getOperand(i) != Op0) - return false; - return true; - } - Instruction *I = dyn_cast<Instruction>(V); - if (!I) return false; - - // Insert element gets simplified to the inserted element or is deleted if - // this is constant idx extract element and its a constant idx insertelt. - if (I->getOpcode() == Instruction::InsertElement && isConstant && - isa<ConstantInt>(I->getOperand(2))) - return true; - if (I->getOpcode() == Instruction::Load && I->hasOneUse()) - return true; - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) - if (BO->hasOneUse() && - (CheapToScalarize(BO->getOperand(0), isConstant) || - CheapToScalarize(BO->getOperand(1), isConstant))) - return true; - if (CmpInst *CI = dyn_cast<CmpInst>(I)) - if (CI->hasOneUse() && - (CheapToScalarize(CI->getOperand(0), isConstant) || - CheapToScalarize(CI->getOperand(1), isConstant))) - return true; - - return false; -} - -/// Read and decode a shufflevector mask. -/// -/// It turns undef elements into values that are larger than the number of -/// elements in the input. -static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) { - unsigned NElts = SVI->getType()->getNumElements(); - if (isa<ConstantAggregateZero>(SVI->getOperand(2))) - return std::vector<unsigned>(NElts, 0); - if (isa<UndefValue>(SVI->getOperand(2))) - return std::vector<unsigned>(NElts, 2*NElts); - - std::vector<unsigned> Result; - const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2)); - for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) - if (isa<UndefValue>(*i)) - Result.push_back(NElts*2); // undef -> 8 - else - Result.push_back(cast<ConstantInt>(*i)->getZExtValue()); - return Result; -} - -/// FindScalarElement - Given a vector and an element number, see if the scalar -/// value is already around as a register, for example if it were inserted then -/// extracted from the vector. -static Value *FindScalarElement(Value *V, unsigned EltNo, - LLVMContext *Context) { - assert(isa<VectorType>(V->getType()) && "Not looking at a vector?"); - const VectorType *PTy = cast<VectorType>(V->getType()); - unsigned Width = PTy->getNumElements(); - if (EltNo >= Width) // Out of range access. - return UndefValue::get(PTy->getElementType()); - - if (isa<UndefValue>(V)) - return UndefValue::get(PTy->getElementType()); - else if (isa<ConstantAggregateZero>(V)) - return Constant::getNullValue(PTy->getElementType()); - else if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) - return CP->getOperand(EltNo); - else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { - // If this is an insert to a variable element, we don't know what it is. - if (!isa<ConstantInt>(III->getOperand(2))) - return 0; - unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); - - // If this is an insert to the element we are looking for, return the - // inserted value. - if (EltNo == IIElt) - return III->getOperand(1); - - // Otherwise, the insertelement doesn't modify the value, recurse on its - // vector input. - return FindScalarElement(III->getOperand(0), EltNo, Context); - } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { - unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); - unsigned InEl = getShuffleMask(SVI)[EltNo]; - if (InEl < LHSWidth) - return FindScalarElement(SVI->getOperand(0), InEl, Context); - else if (InEl < LHSWidth*2) - return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth, Context); - else - return UndefValue::get(PTy->getElementType()); - } - - // Otherwise, we don't know. - return 0; -} - -Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { - // If vector val is undef, replace extract with scalar undef. - if (isa<UndefValue>(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - - // If vector val is constant 0, replace extract with scalar 0. - if (isa<ConstantAggregateZero>(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); - - if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) { - // If vector val is constant with all elements the same, replace EI with - // that element. When the elements are not identical, we cannot replace yet - // (we do that below, but only when the index is constant). - Constant *op0 = C->getOperand(0); - for (unsigned i = 1; i != C->getNumOperands(); ++i) - if (C->getOperand(i) != op0) { - op0 = 0; - break; - } - if (op0) - return ReplaceInstUsesWith(EI, op0); - } - - // If extracting a specified index from the vector, see if we can recursively - // find a previously computed scalar that was inserted into the vector. - if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { - unsigned IndexVal = IdxC->getZExtValue(); - unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); - - // If this is extracting an invalid index, turn this into undef, to avoid - // crashing the code below. - if (IndexVal >= VectorWidth) - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - - // This instruction only demands the single element from the input vector. - // If the input vector has a single use, simplify it based on this use - // property. - if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { - APInt UndefElts(VectorWidth, 0); - APInt DemandedMask(VectorWidth, 1 << IndexVal); - if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), - DemandedMask, UndefElts)) { - EI.setOperand(0, V); - return &EI; - } - } - - if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal, Context)) - return ReplaceInstUsesWith(EI, Elt); - - // If the this extractelement is directly using a bitcast from a vector of - // the same number of elements, see if we can find the source element from - // it. In this case, we will end up needing to bitcast the scalars. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { - if (const VectorType *VT = - dyn_cast<VectorType>(BCI->getOperand(0)->getType())) - if (VT->getNumElements() == VectorWidth) - if (Value *Elt = FindScalarElement(BCI->getOperand(0), - IndexVal, Context)) - return new BitCastInst(Elt, EI.getType()); - } - } - - if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { - // Push extractelement into predecessor operation if legal and - // profitable to do so - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - if (I->hasOneUse() && - CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { - Value *newEI0 = - Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), - EI.getName()+".lhs"); - Value *newEI1 = - Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), - EI.getName()+".rhs"); - return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); - } - } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { - // Extracting the inserted element? - if (IE->getOperand(2) == EI.getOperand(1)) - return ReplaceInstUsesWith(EI, IE->getOperand(1)); - // If the inserted and extracted elements are constants, they must not - // be the same value, extract from the pre-inserted value instead. - if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { - Worklist.AddValue(EI.getOperand(0)); - EI.setOperand(0, IE->getOperand(0)); - return &EI; - } - } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { - // If this is extracting an element from a shufflevector, figure out where - // it came from and extract from the appropriate input element instead. - if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { - unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; - Value *Src; - unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); - - if (SrcIdx < LHSWidth) - Src = SVI->getOperand(0); - else if (SrcIdx < LHSWidth*2) { - SrcIdx -= LHSWidth; - Src = SVI->getOperand(1); - } else { - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - } - return ExtractElementInst::Create(Src, - ConstantInt::get(Type::getInt32Ty(*Context), SrcIdx, - false)); - } - } - // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) - } - return 0; -} - -/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns -/// elements from either LHS or RHS, return the shuffle mask and true. -/// Otherwise, return false. -static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, - std::vector<Constant*> &Mask, - LLVMContext *Context) { - assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && - "Invalid CollectSingleShuffleElements"); - unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); - - if (isa<UndefValue>(V)) { - Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context))); - return true; - } else if (V == LHS) { - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i)); - return true; - } else if (V == RHS) { - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i+NumElts)); - return true; - } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { - // If this is an insert of an extract from some other vector, include it. - Value *VecOp = IEI->getOperand(0); - Value *ScalarOp = IEI->getOperand(1); - Value *IdxOp = IEI->getOperand(2); - - if (!isa<ConstantInt>(IdxOp)) - return false; - unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - - if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. - // Okay, we can handle this if the vector we are insertinting into is - // transitively ok. - if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) { - // If so, update the mask to reflect the inserted undef. - Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(*Context)); - return true; - } - } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ - if (isa<ConstantInt>(EI->getOperand(1)) && - EI->getOperand(0)->getType() == V->getType()) { - unsigned ExtractedIdx = - cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); - - // This must be extracting from either LHS or RHS. - if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { - // Okay, we can handle this if the vector we are insertinting into is - // transitively ok. - if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) { - // If so, update the mask to reflect the inserted value. - if (EI->getOperand(0) == LHS) { - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx); - } else { - assert(EI->getOperand(0) == RHS); - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx+NumElts); - - } - return true; - } - } - } - } - } - // TODO: Handle shufflevector here! - - return false; -} - -/// CollectShuffleElements - We are building a shuffle of V, using RHS as the -/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask -/// that computes V and the LHS value of the shuffle. -static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, - Value *&RHS, LLVMContext *Context) { - assert(isa<VectorType>(V->getType()) && - (RHS == 0 || V->getType() == RHS->getType()) && - "Invalid shuffle!"); - unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); - - if (isa<UndefValue>(V)) { - Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context))); - return V; - } else if (isa<ConstantAggregateZero>(V)) { - Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(*Context), 0)); - return V; - } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { - // If this is an insert of an extract from some other vector, include it. - Value *VecOp = IEI->getOperand(0); - Value *ScalarOp = IEI->getOperand(1); - Value *IdxOp = IEI->getOperand(2); - - if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { - if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && - EI->getOperand(0)->getType() == V->getType()) { - unsigned ExtractedIdx = - cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); - unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - - // Either the extracted from or inserted into vector must be RHSVec, - // otherwise we'd end up with a shuffle of three inputs. - if (EI->getOperand(0) == RHS || RHS == 0) { - RHS = EI->getOperand(0); - Value *V = CollectShuffleElements(VecOp, Mask, RHS, Context); - Mask[InsertedIdx % NumElts] = - ConstantInt::get(Type::getInt32Ty(*Context), NumElts+ExtractedIdx); - return V; - } - - if (VecOp == RHS) { - Value *V = CollectShuffleElements(EI->getOperand(0), Mask, - RHS, Context); - // Everything but the extracted element is replaced with the RHS. - for (unsigned i = 0; i != NumElts; ++i) { - if (i != InsertedIdx) - Mask[i] = ConstantInt::get(Type::getInt32Ty(*Context), NumElts+i); - } - return V; - } - - // If this insertelement is a chain that comes from exactly these two - // vectors, return the vector and the effective shuffle. - if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask, - Context)) - return EI->getOperand(0); - - } - } - } - // TODO: Handle shufflevector here! - - // Otherwise, can't do anything fancy. Return an identity vector. - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i)); - return V; -} - -Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { - Value *VecOp = IE.getOperand(0); - Value *ScalarOp = IE.getOperand(1); - Value *IdxOp = IE.getOperand(2); - - // Inserting an undef or into an undefined place, remove this. - if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) - ReplaceInstUsesWith(IE, VecOp); - - // If the inserted element was extracted from some other vector, and if the - // indexes are constant, try to turn this into a shufflevector operation. - if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { - if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) && - EI->getOperand(0)->getType() == IE.getType()) { - unsigned NumVectorElts = IE.getType()->getNumElements(); - unsigned ExtractedIdx = - cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); - unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); - - if (ExtractedIdx >= NumVectorElts) // Out of range extract. - return ReplaceInstUsesWith(IE, VecOp); - - if (InsertedIdx >= NumVectorElts) // Out of range insert. - return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); - - // If we are extracting a value from a vector, then inserting it right - // back into the same place, just use the input vector. - if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) - return ReplaceInstUsesWith(IE, VecOp); - - // If this insertelement isn't used by some other insertelement, turn it - // (and any insertelements it points to), into one big shuffle. - if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { - std::vector<Constant*> Mask; - Value *RHS = 0; - Value *LHS = CollectShuffleElements(&IE, Mask, RHS, Context); - if (RHS == 0) RHS = UndefValue::get(LHS->getType()); - // We now have a shuffle of LHS, RHS, Mask. - return new ShuffleVectorInst(LHS, RHS, - ConstantVector::get(Mask)); - } - } - } - - unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); - APInt UndefElts(VWidth, 0); - APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) - return &IE; - - return 0; -} - - -Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { - Value *LHS = SVI.getOperand(0); - Value *RHS = SVI.getOperand(1); - std::vector<unsigned> Mask = getShuffleMask(&SVI); - - bool MadeChange = false; - - // Undefined shuffle mask -> undefined value. - if (isa<UndefValue>(SVI.getOperand(2))) - return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); - - unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); - - if (VWidth != cast<VectorType>(LHS->getType())->getNumElements()) - return 0; - - APInt UndefElts(VWidth, 0); - APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { - LHS = SVI.getOperand(0); - RHS = SVI.getOperand(1); - MadeChange = true; - } - - // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') - // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). - if (LHS == RHS || isa<UndefValue>(LHS)) { - if (isa<UndefValue>(LHS) && LHS == RHS) { - // shuffle(undef,undef,mask) -> undef. - return ReplaceInstUsesWith(SVI, LHS); - } - - // Remap any references to RHS to use LHS. - std::vector<Constant*> Elts; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= 2*e) - Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); - else { - if ((Mask[i] >= e && isa<UndefValue>(RHS)) || - (Mask[i] < e && isa<UndefValue>(LHS))) { - Mask[i] = 2*e; // Turn into undef. - Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); - } else { - Mask[i] = Mask[i] % e; // Force to LHS. - Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Mask[i])); - } - } - } - SVI.setOperand(0, SVI.getOperand(1)); - SVI.setOperand(1, UndefValue::get(RHS->getType())); - SVI.setOperand(2, ConstantVector::get(Elts)); - LHS = SVI.getOperand(0); - RHS = SVI.getOperand(1); - MadeChange = true; - } - - // Analyze the shuffle, are the LHS or RHS and identity shuffles? - bool isLHSID = true, isRHSID = true; - - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] >= e*2) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == i); - - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } - - // Eliminate identity shuffles. - if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); - if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); - - // If the LHS is a shufflevector itself, see if we can combine it with this - // one without producing an unusual shuffle. Here we are really conservative: - // we are absolutely afraid of producing a shuffle mask not in the input - // program, because the code gen may not be smart enough to turn a merged - // shuffle into two specific shuffles: it may produce worse code. As such, - // we only merge two shuffles if the result is one of the two input shuffle - // masks. In this case, merging the shuffles just removes one instruction, - // which we know is safe. This is good for things like turning: - // (splat(splat)) -> splat. - if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) { - if (isa<UndefValue>(RHS)) { - std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI); - - if (LHSMask.size() == Mask.size()) { - std::vector<unsigned> NewMask; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) - if (Mask[i] >= e) - NewMask.push_back(2*e); - else - NewMask.push_back(LHSMask[Mask[i]]); - - // If the result mask is equal to the src shuffle or this - // shuffle mask, do the replacement. - if (NewMask == LHSMask || NewMask == Mask) { - unsigned LHSInNElts = - cast<VectorType>(LHSSVI->getOperand(0)->getType())-> - getNumElements(); - std::vector<Constant*> Elts; - for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { - if (NewMask[i] >= LHSInNElts*2) { - Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); - } else { - Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), - NewMask[i])); - } - } - return new ShuffleVectorInst(LHSSVI->getOperand(0), - LHSSVI->getOperand(1), - ConstantVector::get(Elts)); - } - } - } - } - - return MadeChange ? &SVI : 0; -} - - - - -/// TryToSinkInstruction - Try to move the specified instruction from its -/// current block into the beginning of DestBlock, which can only happen if it's -/// safe to move the instruction past all of the instructions between it and the -/// end of its block. -static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { - assert(I->hasOneUse() && "Invariants didn't hold!"); - - // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I)) - return false; - - // Do not sink alloca instructions out of the entry block. - if (isa<AllocaInst>(I) && I->getParent() == - &DestBlock->getParent()->getEntryBlock()) - return false; - - // We can only sink load instructions if there is nothing between the load and - // the end of block that could change the value. - if (I->mayReadFromMemory()) { - for (BasicBlock::iterator Scan = I, E = I->getParent()->end(); - Scan != E; ++Scan) - if (Scan->mayWriteToMemory()) - return false; - } - - BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI(); - - CopyPrecedingStopPoint(I, InsertPos); - I->moveBefore(InsertPos); - ++NumSunkInst; - return true; -} - - -/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding -/// all reachable code to the worklist. -/// -/// This has a couple of tricks to make the code faster and more powerful. In -/// particular, we constant fold and DCE instructions as we go, to avoid adding -/// them to the worklist (this significantly speeds up instcombine on code where -/// many instructions are dead or constant). Additionally, if we find a branch -/// whose condition is a known constant, we only visit the reachable successors. -/// -static bool AddReachableCodeToWorklist(BasicBlock *BB, - SmallPtrSet<BasicBlock*, 64> &Visited, - InstCombiner &IC, - const TargetData *TD) { - bool MadeIRChange = false; - SmallVector<BasicBlock*, 256> Worklist; - Worklist.push_back(BB); - - std::vector<Instruction*> InstrsForInstCombineWorklist; - InstrsForInstCombineWorklist.reserve(128); - - SmallPtrSet<ConstantExpr*, 64> FoldedConstants; - - while (!Worklist.empty()) { - BB = Worklist.back(); - Worklist.pop_back(); - - // We have now visited this block! If we've already been here, ignore it. - if (!Visited.insert(BB)) continue; - - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { - Instruction *Inst = BBI++; - - // DCE instruction if trivially dead. - if (isInstructionTriviallyDead(Inst)) { - ++NumDeadInst; - DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); - Inst->eraseFromParent(); - continue; - } - - // ConstantProp instruction if trivially constant. - if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(Inst, TD)) { - DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " - << *Inst << '\n'); - Inst->replaceAllUsesWith(C); - ++NumConstProp; - Inst->eraseFromParent(); - continue; - } - - - - if (TD) { - // See if we can constant fold its operands. - for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); - i != e; ++i) { - ConstantExpr *CE = dyn_cast<ConstantExpr>(i); - if (CE == 0) continue; - - // If we already folded this constant, don't try again. - if (!FoldedConstants.insert(CE)) - continue; - - Constant *NewC = ConstantFoldConstantExpression(CE, TD); - if (NewC && NewC != CE) { - *i = NewC; - MadeIRChange = true; - } - } - } - - - InstrsForInstCombineWorklist.push_back(Inst); - } - - // Recursively visit successors. If this is a branch or switch on a - // constant, only visit the reachable successor. - TerminatorInst *TI = BB->getTerminator(); - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) { - bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue(); - BasicBlock *ReachableBB = BI->getSuccessor(!CondVal); - Worklist.push_back(ReachableBB); - continue; - } - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { - if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) { - // See if this is an explicit destination. - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) - if (SI->getCaseValue(i) == Cond) { - BasicBlock *ReachableBB = SI->getSuccessor(i); - Worklist.push_back(ReachableBB); - continue; - } - - // Otherwise it is the default destination. - Worklist.push_back(SI->getSuccessor(0)); - continue; - } - } - - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - Worklist.push_back(TI->getSuccessor(i)); - } - - // Once we've found all of the instructions to add to instcombine's worklist, - // add them in reverse order. This way instcombine will visit from the top - // of the function down. This jives well with the way that it adds all uses - // of instructions to the worklist after doing a transformation, thus avoiding - // some N^2 behavior in pathological cases. - IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], - InstrsForInstCombineWorklist.size()); - - return MadeIRChange; -} - -bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { - MadeIRChange = false; - - DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " - << F.getNameStr() << "\n"); - - { - // Do a depth-first traversal of the function, populate the worklist with - // the reachable instructions. Ignore blocks that are not reachable. Keep - // track of which blocks we visit. - SmallPtrSet<BasicBlock*, 64> Visited; - MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); - - // Do a quick scan over the function. If we find any blocks that are - // unreachable, remove any instructions inside of them. This prevents - // the instcombine code from having to deal with some bad special cases. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (!Visited.count(BB)) { - Instruction *Term = BB->getTerminator(); - while (Term != BB->begin()) { // Remove instrs bottom-up - BasicBlock::iterator I = Term; --I; - - DEBUG(errs() << "IC: DCE: " << *I << '\n'); - // A debug intrinsic shouldn't force another iteration if we weren't - // going to do one without it. - if (!isa<DbgInfoIntrinsic>(I)) { - ++NumDeadInst; - MadeIRChange = true; - } - - // If I is not void type then replaceAllUsesWith undef. - // This allows ValueHandlers and custom metadata to adjust itself. - if (!I->getType()->isVoidTy()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); - I->eraseFromParent(); - } - } - } - - while (!Worklist.isEmpty()) { - Instruction *I = Worklist.RemoveOne(); - if (I == 0) continue; // skip null values. - - // Check to see if we can DCE the instruction. - if (isInstructionTriviallyDead(I)) { - DEBUG(errs() << "IC: DCE: " << *I << '\n'); - EraseInstFromFunction(*I); - ++NumDeadInst; - MadeIRChange = true; - continue; - } - - // Instruction isn't dead, see if we can constant propagate it. - if (!I->use_empty() && isa<Constant>(I->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(I, TD)) { - DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); - - // Add operands to the worklist. - ReplaceInstUsesWith(*I, C); - ++NumConstProp; - EraseInstFromFunction(*I); - MadeIRChange = true; - continue; - } - - // See if we can trivially sink this instruction to a successor basic block. - if (I->hasOneUse()) { - BasicBlock *BB = I->getParent(); - Instruction *UserInst = cast<Instruction>(I->use_back()); - BasicBlock *UserParent; - - // Get the block the use occurs in. - if (PHINode *PN = dyn_cast<PHINode>(UserInst)) - UserParent = PN->getIncomingBlock(I->use_begin().getUse()); - else - UserParent = UserInst->getParent(); - - if (UserParent != BB) { - bool UserIsSuccessor = false; - // See if the user is one of our successors. - for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) - if (*SI == UserParent) { - UserIsSuccessor = true; - break; - } - - // If the user is one of our immediate successors, and if that successor - // only has us as a predecessors (we'd have to split the critical edge - // otherwise), we can keep going. - if (UserIsSuccessor && UserParent->getSinglePredecessor()) - // Okay, the CFG is simple enough, try to sink this instruction. - MadeIRChange |= TryToSinkInstruction(I, UserParent); - } - } - - // Now that we have an instruction, try combining it to simplify it. - Builder->SetInsertPoint(I->getParent(), I); - -#ifndef NDEBUG - std::string OrigI; -#endif - DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); - DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); - - if (Instruction *Result = visit(*I)) { - ++NumCombined; - // Should we replace the old instruction with a new one? - if (Result != I) { - DEBUG(errs() << "IC: Old = " << *I << '\n' - << " New = " << *Result << '\n'); - - // Everything uses the new instruction now. - I->replaceAllUsesWith(Result); - - // Push the new instruction and any users onto the worklist. - Worklist.Add(Result); - Worklist.AddUsersToWorkList(*Result); - - // Move the name to the new instruction first. - Result->takeName(I); - - // Insert the new instruction into the basic block... - BasicBlock *InstParent = I->getParent(); - BasicBlock::iterator InsertPos = I; - - if (!isa<PHINode>(Result)) // If combining a PHI, don't insert - while (isa<PHINode>(InsertPos)) // middle of a block of PHIs. - ++InsertPos; - - InstParent->getInstList().insert(InsertPos, Result); - - EraseInstFromFunction(*I); - } else { -#ifndef NDEBUG - DEBUG(errs() << "IC: Mod = " << OrigI << '\n' - << " New = " << *I << '\n'); -#endif - - // If the instruction was modified, it's possible that it is now dead. - // if so, remove it. - if (isInstructionTriviallyDead(I)) { - EraseInstFromFunction(*I); - } else { - Worklist.Add(I); - Worklist.AddUsersToWorkList(*I); - } - } - MadeIRChange = true; - } - } - - Worklist.Zap(); - return MadeIRChange; -} - - -bool InstCombiner::runOnFunction(Function &F) { - MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - Context = &F.getContext(); - TD = getAnalysisIfAvailable<TargetData>(); - - - /// Builder - This is an IRBuilder that automatically inserts new - /// instructions into the worklist when they are created. - IRBuilder<true, TargetFolder, InstCombineIRInserter> - TheBuilder(F.getContext(), TargetFolder(TD), - InstCombineIRInserter(Worklist)); - Builder = &TheBuilder; - - bool EverMadeChange = false; - - // Iterate while there is work to do. - unsigned Iteration = 0; - while (DoOneIteration(F, Iteration++)) - EverMadeChange = true; - - Builder = 0; - return EverMadeChange; -} - -FunctionPass *llvm::createInstructionCombiningPass() { - return new InstCombiner(); -} diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 7e6cf79..9531311 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -89,7 +89,7 @@ namespace { bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs, BasicBlock *SuccBB); bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, - BasicBlock *PredBB); + const SmallVectorImpl<BasicBlock *> &PredBBs); typedef SmallVectorImpl<std::pair<ConstantInt*, BasicBlock*> > PredValueInfo; @@ -102,7 +102,8 @@ namespace { bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); - bool ProcessJumpOnPHI(PHINode *PN); + bool ProcessBranchOnPHI(PHINode *PN); + bool ProcessBranchOnXOR(BinaryOperator *BO); bool SimplifyPartiallyRedundantLoad(LoadInst *LI); }; @@ -118,16 +119,15 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } /// runOnFunction - Top level algorithm. /// bool JumpThreading::runOnFunction(Function &F) { - DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n"); + DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TD = getAnalysisIfAvailable<TargetData>(); LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0; FindLoopHeaders(F); - bool AnotherIteration = true, EverChanged = false; - while (AnotherIteration) { - AnotherIteration = false; - bool Changed = false; + bool Changed, EverChanged = false; + do { + Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E;) { BasicBlock *BB = I; // Thread all of the branches we can over this block. @@ -140,7 +140,7 @@ bool JumpThreading::runOnFunction(Function &F) { // edges which simplifies the CFG. if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) { - DEBUG(errs() << " JT: Deleting dead block '" << BB->getName() + DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName() << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); DeleteDeadBlock(BB); @@ -176,9 +176,8 @@ bool JumpThreading::runOnFunction(Function &F) { } } } - AnotherIteration = Changed; EverChanged |= Changed; - } + } while (Changed); LoopHeaders.clear(); return EverChanged; @@ -490,7 +489,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // terminator to an unconditional branch. This can occur due to threading in // other blocks. if (isa<ConstantInt>(Condition)) { - DEBUG(errs() << " In block '" << BB->getName() + DEBUG(dbgs() << " In block '" << BB->getName() << "' folding terminator: " << *BB->getTerminator() << '\n'); ++NumFolds; ConstantFoldTerminator(BB); @@ -509,7 +508,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD); } - DEBUG(errs() << " In block '" << BB->getName() + DEBUG(dbgs() << " In block '" << BB->getName() << "' folding undef terminator: " << *BBTerm << '\n'); BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); @@ -552,11 +551,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } - // See if this is a phi node in the current block. - if (PHINode *PN = dyn_cast<PHINode>(CondInst)) - if (PN->getParent() == BB) - return ProcessJumpOnPHI(PN); - if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) { if (!LVI && (!isa<PHINode>(CondCmp->getOperand(0)) || @@ -585,8 +579,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // we see one, check to see if it's partially redundant. If so, insert a PHI // which can then be used to thread the values. // - // This is particularly important because reg2mem inserts loads and stores all - // over the place, and this blocks jump threading if we don't zap them. Value *SimplifyValue = CondInst; if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue)) if (isa<Constant>(CondCmp->getOperand(1))) @@ -606,9 +598,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { if (ProcessThreadableEdges(CondInst, BB)) return true; + // If this is an otherwise-unfoldable branch on a phi node in the current + // block, see if we can simplify. + if (PHINode *PN = dyn_cast<PHINode>(CondInst)) + if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator())) + return ProcessBranchOnPHI(PN); + + + // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify. + if (CondInst->getOpcode() == Instruction::Xor && + CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator())) + return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst)); + // TODO: If we have: "br (X > 0)" and we have a predecessor where we know - // "(X == 4)" thread through this block. + // "(X == 4)", thread through this block. return false; } @@ -636,7 +640,7 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, else if (PredBI->getSuccessor(0) != BB) BranchDir = false; else { - DEBUG(errs() << " In block '" << PredBB->getName() + DEBUG(dbgs() << " In block '" << PredBB->getName() << "' folding terminator: " << *PredBB->getTerminator() << '\n'); ++NumFolds; ConstantFoldTerminator(PredBB); @@ -648,7 +652,7 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, // If the dest block has one predecessor, just fix the branch condition to a // constant and fold it. if (BB->getSinglePredecessor()) { - DEBUG(errs() << " In block '" << BB->getName() + DEBUG(dbgs() << " In block '" << BB->getName() << "' folding condition to '" << BranchDir << "': " << *BB->getTerminator() << '\n'); ++NumFolds; @@ -727,8 +731,8 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, // Otherwise, we're safe to make the change. Make sure that the edge from // DestSI to DestSucc is not critical and has no PHI nodes. - DEBUG(errs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI); - DEBUG(errs() << "THROUGH: " << *DestSI); + DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI); + DEBUG(dbgs() << "THROUGH: " << *DestSI); // If the destination has PHI nodes, just split the edge for updating // simplicity. @@ -979,14 +983,14 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { assert(!PredValues.empty() && "ComputeValueKnownInPredecessors returned true with no values"); - DEBUG(errs() << "IN BB: " << *BB; + DEBUG(dbgs() << "IN BB: " << *BB; for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { - errs() << " BB '" << BB->getName() << "': FOUND condition = "; + dbgs() << " BB '" << BB->getName() << "': FOUND condition = "; if (PredValues[i].first) - errs() << *PredValues[i].first; + dbgs() << *PredValues[i].first; else - errs() << "UNDEF"; - errs() << " for pred '" << PredValues[i].second->getName() + dbgs() << "UNDEF"; + dbgs() << " for pred '" << PredValues[i].second->getName() << "'.\n"; }); @@ -1070,36 +1074,110 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { return ThreadEdge(BB, PredsToFactor, MostPopularDest); } -/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in -/// the current block. See if there are any simplifications we can do based on -/// inputs to the phi node. +/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on +/// a PHI node in the current block. See if there are any simplifications we +/// can do based on inputs to the phi node. /// -bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { +bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) { BasicBlock *BB = PN->getParent(); - // If any of the predecessor blocks end in an unconditional branch, we can - // *duplicate* the jump into that block in order to further encourage jump - // threading and to eliminate cases where we have branch on a phi of an icmp - // (branch on icmp is much better). - - // We don't want to do this tranformation for switches, because we don't - // really want to duplicate a switch. - if (isa<SwitchInst>(BB->getTerminator())) - return false; + // TODO: We could make use of this to do it once for blocks with common PHI + // values. + SmallVector<BasicBlock*, 1> PredBBs; + PredBBs.resize(1); - // Look for unconditional branch predecessors. + // If any of the predecessor blocks end in an unconditional branch, we can + // *duplicate* the conditional branch into that block in order to further + // encourage jump threading and to eliminate cases where we have branch on a + // phi of an icmp (branch on icmp is much better). for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = PN->getIncomingBlock(i); if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator())) - if (PredBr->isUnconditional() && - // Try to duplicate BB into PredBB. - DuplicateCondBranchOnPHIIntoPred(BB, PredBB)) - return true; + if (PredBr->isUnconditional()) { + PredBBs[0] = PredBB; + // Try to duplicate BB into PredBB. + if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs)) + return true; + } } return false; } +/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on +/// a xor instruction in the current block. See if there are any +/// simplifications we can do based on inputs to the xor. +/// +bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { + BasicBlock *BB = BO->getParent(); + + // If either the LHS or RHS of the xor is a constant, don't do this + // optimization. + if (isa<ConstantInt>(BO->getOperand(0)) || + isa<ConstantInt>(BO->getOperand(1))) + return false; + + // If we have a xor as the branch input to this block, and we know that the + // LHS or RHS of the xor in any predecessor is true/false, then we can clone + // the condition into the predecessor and fix that value to true, saving some + // logical ops on that path and encouraging other paths to simplify. + // + // This copies something like this: + // + // BB: + // %X = phi i1 [1], [%X'] + // %Y = icmp eq i32 %A, %B + // %Z = xor i1 %X, %Y + // br i1 %Z, ... + // + // Into: + // BB': + // %Y = icmp ne i32 %A, %B + // br i1 %Z, ... + + SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues; + bool isLHS = true; + if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) { + assert(XorOpValues.empty()); + if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues)) + return false; + isLHS = false; + } + + assert(!XorOpValues.empty() && + "ComputeValueKnownInPredecessors returned true with no values"); + + // Scan the information to see which is most popular: true or false. The + // predecessors can be of the set true, false, or undef. + unsigned NumTrue = 0, NumFalse = 0; + for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { + if (!XorOpValues[i].first) continue; // Ignore undefs for the count. + if (XorOpValues[i].first->isZero()) + ++NumFalse; + else + ++NumTrue; + } + + // Determine which value to split on, true, false, or undef if neither. + ConstantInt *SplitVal = 0; + if (NumTrue > NumFalse) + SplitVal = ConstantInt::getTrue(BB->getContext()); + else if (NumTrue != 0 || NumFalse != 0) + SplitVal = ConstantInt::getFalse(BB->getContext()); + + // Collect all of the blocks that this can be folded into so that we can + // factor this once and clone it once. + SmallVector<BasicBlock*, 8> BlocksToFoldInto; + for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { + if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue; + + BlocksToFoldInto.push_back(XorOpValues[i].second); + } + + // Try to duplicate BB into PredBB. + return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); +} + /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new /// predecessor to the PHIBB block. If it has PHI nodes, add entries for @@ -1133,7 +1211,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *SuccBB) { // If threading to the same block as we come from, we would infinite loop. if (SuccBB == BB) { - DEBUG(errs() << " Not threading across BB '" << BB->getName() + DEBUG(dbgs() << " Not threading across BB '" << BB->getName() << "' - would thread to self!\n"); return false; } @@ -1141,7 +1219,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { - DEBUG(errs() << " Not threading across loop header BB '" << BB->getName() + DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName() << "' to dest BB '" << SuccBB->getName() << "' - it might create an irreducible loop!\n"); return false; @@ -1149,7 +1227,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); if (JumpThreadCost > Threshold) { - DEBUG(errs() << " Not threading BB '" << BB->getName() + DEBUG(dbgs() << " Not threading BB '" << BB->getName() << "' - Cost is too high: " << JumpThreadCost << "\n"); return false; } @@ -1159,14 +1237,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, if (PredBBs.size() == 1) PredBB = PredBBs[0]; else { - DEBUG(errs() << " Factoring out " << PredBBs.size() + DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), ".thr_comm", this); } // And finally, do it! - DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '" + DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '" << SuccBB->getName() << "' with cost: " << JumpThreadCost << ", across block:\n " << *BB << "\n"); @@ -1235,7 +1313,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, if (UsesToRename.empty()) continue; - DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks @@ -1246,7 +1324,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); - DEBUG(errs() << "\n"); + DEBUG(dbgs() << "\n"); } @@ -1263,20 +1341,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. - BI = NewBB->begin(); - for (BasicBlock::iterator E = NewBB->end(); BI != E; ) { - Instruction *Inst = BI++; - - if (Value *V = SimplifyInstruction(Inst, TD)) { - WeakVH BIHandle(BI); - ReplaceAndSimplifyAllUses(Inst, V, TD); - if (BIHandle == 0) - BI = NewBB->begin(); - continue; - } - - RecursivelyDeleteTriviallyDeadInstructions(Inst); - } + SimplifyInstructionsInBlock(NewBB, TD); // Threaded an edge! ++NumThreads; @@ -1289,30 +1354,52 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, /// improves the odds that the branch will be on an analyzable instruction like /// a compare. bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, - BasicBlock *PredBB) { + const SmallVectorImpl<BasicBlock *> &PredBBs) { + assert(!PredBBs.empty() && "Can't handle an empty set"); + // If BB is a loop header, then duplicating this block outside the loop would // cause us to transform this into an irreducible loop, don't do this. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { - DEBUG(errs() << " Not duplicating loop header '" << BB->getName() - << "' into predecessor block '" << PredBB->getName() + DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName() + << "' into predecessor block '" << PredBBs[0]->getName() << "' - it might create an irreducible loop!\n"); return false; } unsigned DuplicationCost = getJumpThreadDuplicationCost(BB); if (DuplicationCost > Threshold) { - DEBUG(errs() << " Not duplicating BB '" << BB->getName() + DEBUG(dbgs() << " Not duplicating BB '" << BB->getName() << "' - Cost is too high: " << DuplicationCost << "\n"); return false; } + // And finally, do it! Start by factoring the predecessors is needed. + BasicBlock *PredBB; + if (PredBBs.size() == 1) + PredBB = PredBBs[0]; + else { + DEBUG(dbgs() << " Factoring out " << PredBBs.size() + << " common predecessors.\n"); + PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), + ".thr_comm", this); + } + // Okay, we decided to do this! Clone all the instructions in BB onto the end // of PredBB. - DEBUG(errs() << " Duplicating block '" << BB->getName() << "' into end of '" + DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '" << PredBB->getName() << "' to eliminate branch on phi. Cost: " << DuplicationCost << " block is:" << *BB << "\n"); + // Unless PredBB ends with an unconditional branch, split the edge so that we + // can just clone the bits from BB into the end of the new PredBB. + BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); + + if (!OldPredBranch->isUnconditional()) { + PredBB = SplitEdge(PredBB, BB, this); + OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); + } + // We are going to have to map operands from the original BB block into the // PredBB block. Evaluate PHI nodes in BB. DenseMap<Instruction*, Value*> ValueMapping; @@ -1321,15 +1408,10 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); - BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); - // Clone the non-phi instructions of BB into PredBB, keeping track of the // mapping and using it to remap operands in the cloned instructions. for (; BI != BB->end(); ++BI) { Instruction *New = BI->clone(); - New->setName(BI->getName()); - PredBB->getInstList().insert(OldPredBranch, New); - ValueMapping[BI] = New; // Remap operands to patch up intra-block references. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) @@ -1338,6 +1420,19 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, if (I != ValueMapping.end()) New->setOperand(i, I->second); } + + // If this instruction can be simplified after the operands are updated, + // just use the simplified value instead. This frequently happens due to + // phi translation. + if (Value *IV = SimplifyInstruction(New, TD)) { + delete New; + ValueMapping[BI] = IV; + } else { + // Otherwise, insert the new instruction into the block. + New->setName(BI->getName()); + PredBB->getInstList().insert(OldPredBranch, New); + ValueMapping[BI] = New; + } } // Check to see if the targets of the branch had PHI nodes. If so, we need to @@ -1373,7 +1468,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, if (UsesToRename.empty()) continue; - DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks @@ -1384,7 +1479,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); - DEBUG(errs() << "\n"); + DEBUG(dbgs() << "\n"); } // PredBB no longer jumps to BB, remove entries in the PHI node for the edge diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 99f3ae0..81f9ae6 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -384,10 +384,6 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { Size = AA->getTypeStoreSize(LI->getType()); return !pointerInvalidatedByLoop(LI->getOperand(0), Size); } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { - if (isa<DbgStopPointInst>(CI)) { - // Don't hoist/sink dbgstoppoints, we handle them separately - return false; - } // Handle obvious cases efficiently. AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == AliasAnalysis::DoesNotAccessMemory) @@ -461,7 +457,7 @@ bool LICM::isLoopInvariantInst(Instruction &I) { /// position, and may either delete it or move it to outside of the loop. /// void LICM::sink(Instruction &I) { - DEBUG(errs() << "LICM sinking instruction: " << I); + DEBUG(dbgs() << "LICM sinking instruction: " << I); SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); @@ -603,7 +599,7 @@ void LICM::sink(Instruction &I) { /// that is safe to hoist, this instruction is called to do the dirty work. /// void LICM::hoist(Instruction &I) { - DEBUG(errs() << "LICM hoisting to " << Preheader->getName() << ": " + DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); // Remove the instruction from its current basic block... but don't delete the @@ -859,7 +855,7 @@ void LICM::FindPromotableValuesInLoop( for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI)); - DEBUG(errs() << "LICM: Promoting value: " << *V << "\n"); + DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n"); } } diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 1d9dd68..16d3f2f 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -708,7 +708,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, } while (!WorkList.empty()) { - BasicBlock *BB = WorkList.back(); WorkList.pop_back(); + BasicBlock *BB = WorkList.pop_back_val(); LPM->deleteSimpleAnalysisValue(BB, LP); for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); BBI != BBE; ) { @@ -726,7 +726,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, // Update Frontier BBs' dominator info. while (!FrontierBBs.empty()) { - BasicBlock *FBB = FrontierBBs.back(); FrontierBBs.pop_back(); + BasicBlock *FBB = FrontierBBs.pop_back_val(); BasicBlock *NewDominator = FBB->getSinglePredecessor(); if (!NewDominator) { pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 85f7368..fa820ed 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2723,7 +2723,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // At this point, it is worth checking to see if any recurrence PHIs are also // dead, so that we can remove them as well. - DeleteDeadPHIs(L->getHeader()); + Changed |= DeleteDeadPHIs(L->getHeader()); return Changed; } diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index c2bf9f2..ee8cb4f 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -89,7 +89,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { LoopInfo *LI = &getAnalysis<LoopInfo>(); BasicBlock *Header = L->getHeader(); - DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName() + DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() << "] Loop %" << Header->getName() << "\n"); (void)Header; @@ -111,13 +111,13 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Enforce the threshold. if (UnrollThreshold != NoThreshold) { unsigned LoopSize = ApproximateLoopSize(L); - DEBUG(errs() << " Loop Size = " << LoopSize << "\n"); + DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); uint64_t Size = (uint64_t)LoopSize*Count; if (TripCount != 1 && Size > UnrollThreshold) { - DEBUG(errs() << " Too large to fully unroll with count: " << Count + DEBUG(dbgs() << " Too large to fully unroll with count: " << Count << " because size: " << Size << ">" << UnrollThreshold << "\n"); if (!UnrollAllowPartial) { - DEBUG(errs() << " will not try to unroll partially because " + DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } @@ -127,10 +127,10 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { Count--; } if (Count < 2) { - DEBUG(errs() << " could not unroll partially\n"); + DEBUG(dbgs() << " could not unroll partially\n"); return false; } - DEBUG(errs() << " partially unrolling with count: " << Count << "\n"); + DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n"); } } diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 0c19133..527a7b5 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -436,7 +436,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ if (Metrics.NumInsts > Threshold || Metrics.NumBlocks * 5 > Threshold || Metrics.NeverInline) { - DEBUG(errs() << "NOT unswitching loop %" + DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << ", cost too high: " << currentLoop->getBlocks().size() << "\n"); return false; @@ -522,7 +522,7 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock) { - DEBUG(errs() << "loop-unswitch: Trivial-Unswitch loop %" + DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %" << loopHeader->getName() << " [" << L->getBlocks().size() << " blocks] in Function " << L->getHeader()->getParent()->getName() << " on cond: " << *Val << " == " << *Cond << "\n"); @@ -581,7 +581,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L, void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, Loop *L) { Function *F = loopHeader->getParent(); - DEBUG(errs() << "loop-unswitch: Unswitching loop %" + DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << loopHeader->getName() << " [" << L->getBlocks().size() << " blocks] in Function " << F->getName() << " when '" << *Val << "' == " << *LIC << "\n"); @@ -707,7 +707,7 @@ static void RemoveFromWorklist(Instruction *I, static void ReplaceUsesOfWith(Instruction *I, Value *V, std::vector<Instruction*> &Worklist, Loop *L, LPPassManager *LPM) { - DEBUG(errs() << "Replace with '" << *V << "': " << *I); + DEBUG(dbgs() << "Replace with '" << *V << "': " << *I); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -769,7 +769,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, return; } - DEBUG(errs() << "Nuking dead block: " << *BB); + DEBUG(dbgs() << "Nuking dead block: " << *BB); // Remove the instructions in the basic block from the worklist. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { @@ -867,7 +867,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC // in the loop with the appropriate one directly. if (IsEqual || (isa<ConstantInt>(Val) && - Val->getType() == Type::getInt1Ty(Val->getContext()))) { + Val->getType()->isInteger(1))) { Value *Replacement; if (IsEqual) Replacement = Val; @@ -968,7 +968,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // Simple DCE. if (isInstructionTriviallyDead(I)) { - DEBUG(errs() << "Remove dead instruction '" << *I); + DEBUG(dbgs() << "Remove dead instruction '" << *I); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -993,10 +993,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { case Instruction::And: if (isa<ConstantInt>(I->getOperand(0)) && // constant -> RHS - I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) + I->getOperand(0)->getType()->isInteger(1)) cast<BinaryOperator>(I)->swapOperands(); if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) - if (CB->getType() == Type::getInt1Ty(I->getContext())) { + if (CB->getType()->isInteger(1)) { if (CB->isOne()) // X & 1 -> X ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); else // X & 0 -> 0 @@ -1007,10 +1007,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { case Instruction::Or: if (isa<ConstantInt>(I->getOperand(0)) && // constant -> RHS - I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) + I->getOperand(0)->getType()->isInteger(1)) cast<BinaryOperator>(I)->swapOperands(); if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) - if (CB->getType() == Type::getInt1Ty(I->getContext())) { + if (CB->getType()->isInteger(1)) { if (CB->isOne()) // X | 1 -> 1 ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); else // X | 0 -> X @@ -1029,7 +1029,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { if (!SinglePred) continue; // Nothing to do. assert(SinglePred == Pred && "CFG broken"); - DEBUG(errs() << "Merging blocks: " << Pred->getName() << " <- " + DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " << Succ->getName() << "\n"); // Resolve any single entry PHI nodes in Succ. @@ -1057,7 +1057,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // remove dead blocks. break; // FIXME: Enable. - DEBUG(errs() << "Folded branch: " << *BI); + DEBUG(dbgs() << "Folded branch: " << *BI); BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue()); BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue()); DeadSucc->removePredecessor(BI->getParent(), true); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index c922814..e0aa491 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -42,7 +42,7 @@ static Value *isBytewiseValue(Value *V) { LLVMContext &Context = V->getContext(); // All byte-wide stores are splatable, even of arbitrary variables. - if (V->getType() == Type::getInt8Ty(Context)) return V; + if (V->getType()->isInteger(8)) return V; // Constant float and double values can be handled as integer values if the // corresponding integer value is "byteable". An important case is 0.0. @@ -456,10 +456,10 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment) }; Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt); - DEBUG(errs() << "Replace stores:\n"; + DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) - errs() << *Range.TheStores[i]; - errs() << "With: " << *C); C=C; + dbgs() << *Range.TheStores[i]; + dbgs() << "With: " << *C); C=C; // Don't invalidate the iterator BBI = BI; @@ -562,8 +562,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), srcAlloca->use_end()); while (!srcUseList.empty()) { - User *UI = srcUseList.back(); - srcUseList.pop_back(); + User *UI = srcUseList.pop_back_val(); if (isa<BitCastInst>(UI)) { for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); @@ -725,7 +724,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { AliasAnalysis::NoAlias) return false; - DEBUG(errs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); + DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); // If not, then we know we can transform this. Module *Mod = M->getParent()->getParent()->getParent(); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 827b47d..4a99f4a 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -60,12 +60,12 @@ namespace { /// static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) { Module *M = I->getParent()->getParent()->getParent(); - errs() << Instruction::getOpcodeName(I->getOpcode()) << " " + dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " << *Ops[0].Op->getType() << '\t'; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - errs() << "[ "; - WriteAsOperand(errs(), Ops[i].Op, false, M); - errs() << ", #" << Ops[i].Rank << "] "; + dbgs() << "[ "; + WriteAsOperand(dbgs(), Ops[i].Op, false, M); + dbgs() << ", #" << Ops[i].Rank << "] "; } } #endif @@ -186,7 +186,7 @@ unsigned Reassociate::getRank(Value *V) { (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) ++Rank; - //DEBUG(errs() << "Calculated Rank[" << V->getName() << "] = " + //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " // << Rank << "\n"); return ValueRankMap[I] = Rank; @@ -226,7 +226,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) { isReassociableOp(RHS, I->getOpcode()) && "Not an expression that needs linearization?"); - DEBUG(errs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); + DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); // Move the RHS instruction to live immediately before I, avoiding breaking // dominator properties. @@ -239,7 +239,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) { ++NumLinear; MadeChange = true; - DEBUG(errs() << "Linearized: " << *I << '\n'); + DEBUG(dbgs() << "Linearized: " << *I << '\n'); // If D is part of this expression tree, tail recurse. if (isReassociableOp(I->getOperand(1), I->getOpcode())) @@ -335,10 +335,10 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, if (I->getOperand(0) != Ops[i].Op || I->getOperand(1) != Ops[i+1].Op) { Value *OldLHS = I->getOperand(0); - DEBUG(errs() << "RA: " << *I << '\n'); + DEBUG(dbgs() << "RA: " << *I << '\n'); I->setOperand(0, Ops[i].Op); I->setOperand(1, Ops[i+1].Op); - DEBUG(errs() << "TO: " << *I << '\n'); + DEBUG(dbgs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; @@ -351,9 +351,9 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, assert(i+2 < Ops.size() && "Ops index out of range!"); if (I->getOperand(1) != Ops[i].Op) { - DEBUG(errs() << "RA: " << *I << '\n'); + DEBUG(dbgs() << "RA: " << *I << '\n'); I->setOperand(1, Ops[i].Op); - DEBUG(errs() << "TO: " << *I << '\n'); + DEBUG(dbgs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; } @@ -414,6 +414,10 @@ static Value *NegateValue(Value *V, Instruction *BI) { // non-instruction value) or right after the definition. These negates will // be zapped by reassociate later, so we don't need much finesse here. BinaryOperator *TheNeg = cast<BinaryOperator>(*UI); + + // Verify that the negate is in this function, V might be a constant expr. + if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) + continue; BasicBlock::iterator InsertPt; if (Instruction *InstInput = dyn_cast<Instruction>(V)) { @@ -480,7 +484,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub, Sub->replaceAllUsesWith(New); Sub->eraseFromParent(); - DEBUG(errs() << "Negated: " << *New << '\n'); + DEBUG(dbgs() << "Negated: " << *New << '\n'); return New; } @@ -788,6 +792,11 @@ Value *Reassociate::OptimizeAdd(Instruction *I, Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); SmallVector<Value*, 4> NewMulOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + // Only try to remove factors from expressions we're allowed to. + BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); + if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) + continue; + if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { NewMulOps.push_back(V); Ops.erase(Ops.begin()+i); @@ -797,14 +806,15 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // No need for extra uses anymore. delete DummyInst; - + unsigned NumAddedValues = NewMulOps.size(); Value *V = EmitAddTreeOfValues(I, NewMulOps); - + // Now that we have inserted the add tree, optimize it. This allows us to // handle cases that require multiple factoring steps, such as this: // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) assert(NumAddedValues > 1 && "Each occurrence should contribute a value"); + (void)NumAddedValues; V = ReassociateExpression(cast<BinaryOperator>(V)); // Create the multiply. @@ -928,6 +938,10 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { if (BI->getOpcode() == Instruction::Sub) { if (ShouldBreakUpSubtract(BI)) { BI = BreakUpSubtract(BI, ValueRankMap); + // Reset the BBI iterator in case BreakUpSubtract changed the + // instruction it points to. + BBI = BI; + ++BBI; MadeChange = true; } else if (BinaryOperator::isNeg(BI)) { // Otherwise, this is a negation. See if the operand is a multiply tree @@ -967,7 +981,7 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { SmallVector<ValueEntry, 8> Ops; LinearizeExprTree(I, Ops); - DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << '\n'); + DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); // Now that we have linearized the tree to a list and have gathered all of // the operands and their ranks, sort the operands by their rank. Use a @@ -982,7 +996,7 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { if (Value *V = OptimizeExpression(I, Ops)) { // This expression tree simplified to something that isn't a tree, // eliminate it. - DEBUG(errs() << "Reassoc to scalar: " << *V << '\n'); + DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); I->replaceAllUsesWith(V); RemoveDeadBinaryOp(I); ++NumAnnihil; @@ -1001,7 +1015,7 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { Ops.insert(Ops.begin(), Tmp); } - DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << '\n'); + DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); if (Ops.size() == 1) { // This expression tree simplified to something that isn't a tree, diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index d8c59b1..02b45a1 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -218,7 +218,7 @@ public: /// This returns true if the block was not considered live before. bool MarkBlockExecutable(BasicBlock *BB) { if (!BBExecutable.insert(BB)) return false; - DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n"); + DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n"); BBWorkList.push_back(BB); // Add the block to the work list! return true; } @@ -316,7 +316,7 @@ private: // void markConstant(LatticeVal &IV, Value *V, Constant *C) { if (!IV.markConstant(C)) return; - DEBUG(errs() << "markConstant: " << *C << ": " << *V << '\n'); + DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n'); InstWorkList.push_back(V); } @@ -328,7 +328,7 @@ private: void markForcedConstant(Value *V, Constant *C) { assert(!isa<StructType>(V->getType()) && "Should use other method"); ValueState[V].markForcedConstant(C); - DEBUG(errs() << "markForcedConstant: " << *C << ": " << *V << '\n'); + DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n'); InstWorkList.push_back(V); } @@ -339,11 +339,11 @@ private: void markOverdefined(LatticeVal &IV, Value *V) { if (!IV.markOverdefined()) return; - DEBUG(errs() << "markOverdefined: "; + DEBUG(dbgs() << "markOverdefined: "; if (Function *F = dyn_cast<Function>(V)) - errs() << "Function '" << F->getName() << "'\n"; + dbgs() << "Function '" << F->getName() << "'\n"; else - errs() << *V << '\n'); + dbgs() << *V << '\n'); // Only instructions go on the work list OverdefinedInstWorkList.push_back(V); } @@ -431,7 +431,7 @@ private: // If the destination is already executable, we just made an *edge* // feasible that wasn't before. Revisit the PHI nodes in the block // because they have potentially new operands. - DEBUG(errs() << "Marking Edge Executable: " << Source->getName() + DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() << " -> " << Dest->getName() << "\n"); PHINode *PN; @@ -516,7 +516,7 @@ private: void visitInstruction(Instruction &I) { // If a new instruction is added to LLVM that we don't handle. - errs() << "SCCP: Don't know how to handle: " << I; + dbgs() << "SCCP: Don't know how to handle: " << I; markAnythingOverdefined(&I); // Just in case } }; @@ -580,7 +580,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, } #ifndef NDEBUG - errs() << "Unknown terminator instruction: " << TI << '\n'; + dbgs() << "Unknown terminator instruction: " << TI << '\n'; #endif llvm_unreachable("SCCP: Don't know how to handle this terminator!"); } @@ -640,7 +640,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { return true; #ifndef NDEBUG - errs() << "Unknown terminator instruction: " << *TI << '\n'; + dbgs() << "Unknown terminator instruction: " << *TI << '\n'; #endif llvm_unreachable(0); } @@ -1324,7 +1324,7 @@ void SCCPSolver::Solve() { while (!OverdefinedInstWorkList.empty()) { Value *I = OverdefinedInstWorkList.pop_back_val(); - DEBUG(errs() << "\nPopped off OI-WL: " << *I << '\n'); + DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n'); // "I" got into the work list because it either made the transition from // bottom to constant @@ -1343,7 +1343,7 @@ void SCCPSolver::Solve() { while (!InstWorkList.empty()) { Value *I = InstWorkList.pop_back_val(); - DEBUG(errs() << "\nPopped off I-WL: " << *I << '\n'); + DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n'); // "I" got into the work list because it made the transition from undef to // constant. @@ -1364,7 +1364,7 @@ void SCCPSolver::Solve() { BasicBlock *BB = BBWorkList.back(); BBWorkList.pop_back(); - DEBUG(errs() << "\nPopped off BBWL: " << *BB << '\n'); + DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n'); // Notify all instructions in this basic block that they are newly // executable. @@ -1597,7 +1597,7 @@ FunctionPass *llvm::createSCCPPass() { } static void DeleteInstructionInBlock(BasicBlock *BB) { - DEBUG(errs() << " BasicBlock Dead:" << *BB); + DEBUG(dbgs() << " BasicBlock Dead:" << *BB); ++NumDeadBlocks; // Delete the instructions backwards, as it has a reduced likelihood of @@ -1616,7 +1616,7 @@ static void DeleteInstructionInBlock(BasicBlock *BB) { // and return true if the function was modified. // bool SCCP::runOnFunction(Function &F) { - DEBUG(errs() << "SCCP on function '" << F.getName() << "'\n"); + DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); SCCPSolver Solver(getAnalysisIfAvailable<TargetData>()); // Mark the first block of the function as being executable. @@ -1630,7 +1630,7 @@ bool SCCP::runOnFunction(Function &F) { bool ResolvedUndefs = true; while (ResolvedUndefs) { Solver.Solve(); - DEBUG(errs() << "RESOLVING UNDEFs\n"); + DEBUG(dbgs() << "RESOLVING UNDEFs\n"); ResolvedUndefs = Solver.ResolvedUndefsIn(F); } @@ -1665,7 +1665,7 @@ bool SCCP::runOnFunction(Function &F) { Constant *Const = IV.isConstant() ? IV.getConstant() : UndefValue::get(Inst->getType()); - DEBUG(errs() << " Constant: " << *Const << " = " << *Inst); + DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst); // Replaces all of the uses of a variable with uses of the constant. Inst->replaceAllUsesWith(Const); @@ -1775,7 +1775,7 @@ bool IPSCCP::runOnModule(Module &M) { while (ResolvedUndefs) { Solver.Solve(); - DEBUG(errs() << "RESOLVING UNDEFS\n"); + DEBUG(dbgs() << "RESOLVING UNDEFS\n"); ResolvedUndefs = false; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) ResolvedUndefs |= Solver.ResolvedUndefsIn(*F); @@ -1802,7 +1802,7 @@ bool IPSCCP::runOnModule(Module &M) { Constant *CST = IV.isConstant() ? IV.getConstant() : UndefValue::get(AI->getType()); - DEBUG(errs() << "*** Arg " << *AI << " = " << *CST <<"\n"); + DEBUG(dbgs() << "*** Arg " << *AI << " = " << *CST <<"\n"); // Replaces all of the uses of a variable with uses of the // constant. @@ -1847,7 +1847,7 @@ bool IPSCCP::runOnModule(Module &M) { Constant *Const = IV.isConstant() ? IV.getConstant() : UndefValue::get(Inst->getType()); - DEBUG(errs() << " Constant: " << *Const << " = " << *Inst); + DEBUG(dbgs() << " Constant: " << *Const << " = " << *Inst); // Replaces all of the uses of a variable with uses of the // constant. @@ -1944,7 +1944,7 @@ bool IPSCCP::runOnModule(Module &M) { GlobalVariable *GV = I->first; assert(!I->second.isOverdefined() && "Overdefined values should have been taken out of the map!"); - DEBUG(errs() << "Found that GV '" << GV->getName() << "' is constant!\n"); + DEBUG(dbgs() << "Found that GV '" << GV->getName() << "' is constant!\n"); while (!GV->use_empty()) { StoreInst *SI = cast<StoreInst>(GV->use_back()); SI->eraseFromParent(); diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp index f91fbda..9685a29 100644 --- a/lib/Transforms/Scalar/SCCVN.cpp +++ b/lib/Transforms/Scalar/SCCVN.cpp @@ -678,8 +678,7 @@ bool SCCVN::runOnFunction(Function& F) { stack.push_back(*PI); while (!stack.empty()) { - BasicBlock* CurrBB = stack.back(); - stack.pop_back(); + BasicBlock* CurrBB = stack.pop_back_val(); visited.insert(CurrBB); ValueNumberScope* S = BBMap[CurrBB]; diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 79bb7c5..9e1e79a 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -252,8 +252,8 @@ bool SROA::performScalarRepl(Function &F) { // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { - DEBUG(errs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(errs() << " memcpy = " << *TheCopy << '\n'); + DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2)); AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); TheCopy->eraseFromParent(); // Don't mutate the global. @@ -314,14 +314,14 @@ bool SROA::performScalarRepl(Function &F) { // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) { - DEBUG(errs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " + DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " << *VectorTy << '\n'); // Create and insert the vector alloca. NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); } else { - DEBUG(errs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); @@ -345,7 +345,7 @@ bool SROA::performScalarRepl(Function &F) { /// predicate, do SROA now. void SROA::DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList) { - DEBUG(errs() << "Found inst to SROA: " << *AI << '\n'); + DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); SmallVector<AllocaInst*, 32> ElementAllocas; if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); @@ -919,7 +919,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, IntegerType::get(SI->getContext(), AllocaSizeBits), "", SI); - DEBUG(errs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI + DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI << '\n'); // There are two forms here: AI could be an array or struct. Both cases @@ -1029,7 +1029,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, const Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI + DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); // There are two forms here: AI could be an array or struct. Both cases @@ -1153,7 +1153,7 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { isSafeForScalarRepl(AI, AI, 0, Info); if (Info.isUnsafe) { - DEBUG(errs() << "Cannot transform: " << *AI << '\n'); + DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); return 0; } @@ -1181,7 +1181,7 @@ void SROA::CleanupAllocaUsers(Value *V) { if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { // Safe to remove debug info uses. while (!DbgInUses.empty()) { - DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back(); + DbgInfoIntrinsic *DI = DbgInUses.pop_back_val(); DI->eraseFromParent(); } I->eraseFromParent(); diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index a36da78..43447de 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -99,9 +99,8 @@ static bool MarkAliveBlocks(BasicBlock *BB, SmallVector<BasicBlock*, 128> Worklist; Worklist.push_back(BB); bool Changed = false; - while (!Worklist.empty()) { - BB = Worklist.back(); - Worklist.pop_back(); + do { + BB = Worklist.pop_back_val(); if (!Reachable.insert(BB)) continue; @@ -150,7 +149,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, Changed |= ConstantFoldTerminator(BB); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) Worklist.push_back(*SI); - } + } while (!Worklist.empty()); return Changed; } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 3c28ad2..9183f3a 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -80,7 +80,7 @@ public: /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B); - + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, @@ -101,10 +101,11 @@ public: /// EmitMemSet - Emit a call to the memset function Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B); - /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. - /// 'floor'). This function is known to take a single of type matching 'Op' - /// and returns one value with the same type. If 'Op' is a long double, 'l' - /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. + /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' + /// (e.g. 'floor'). This function is known to take a single of type matching + /// 'Op' and returns one value with the same type. If 'Op' is a long double, + /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f' + /// suffix. Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B, const AttrListPtr &Attrs); @@ -163,7 +164,7 @@ Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) { Module *M = Caller->getParent(); AttributeWithIndex AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); - + const Type *I8Ptr = Type::getInt8PtrTy(*Context); const Type *I32Ty = Type::getInt32Ty(*Context); Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), @@ -236,8 +237,8 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), Type::getInt32Ty(*Context), - Type::getInt8PtrTy(*Context), - Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), TD->getIntPtrType(*Context), NULL); CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "memcmp"); @@ -504,8 +505,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { // Must be a Constant Array ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); - if (!Array || - Array->getType()->getElementType() != Type::getInt8Ty(V->getContext())) + if (!Array || !Array->getType()->getElementType()->isInteger(8)) return false; // Get the number of elements in the array @@ -677,8 +677,7 @@ struct StrChrOpt : public LibCallOptimization { if (!TD) return 0; uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || - FT->getParamType(1) != Type::getInt32Ty(*Context)) // memchr needs i32. + if (Len == 0 || !FT->getParamType(1)->isInteger(32)) // memchr needs i32. return 0; return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. @@ -720,7 +719,7 @@ struct StrCmpOpt : public LibCallOptimization { // Verify the "strcmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - FT->getReturnType() != Type::getInt32Ty(*Context) || + !FT->getReturnType()->isInteger(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; @@ -768,7 +767,7 @@ struct StrNCmpOpt : public LibCallOptimization { // Verify the "strncmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - FT->getReturnType() != Type::getInt32Ty(*Context) || + !FT->getReturnType()->isInteger(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa<IntegerType>(FT->getParamType(2))) @@ -949,20 +948,20 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(x, x) -> x. if (CI->getOperand(1) == CI->getOperand(2)) return B.CreateBitCast(CI->getOperand(1), CI->getType()); - + // See if either input string is a constant string. std::string SearchStr, ToFindStr; bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr); bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr); - + // fold strstr(x, "") -> x. if (HasStr2 && ToFindStr.empty()) return B.CreateBitCast(CI->getOperand(1), CI->getType()); - + // If both strings are known, constant fold it. if (HasStr1 && HasStr2) { std::string::size_type Offset = SearchStr.find(ToFindStr); - + if (Offset == std::string::npos) // strstr("foo", "bar") -> null return Constant::getNullValue(CI->getType()); @@ -971,7 +970,7 @@ struct StrStrOpt : public LibCallOptimization { Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); return B.CreateBitCast(Result, CI->getType()); } - + // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B), @@ -979,7 +978,7 @@ struct StrStrOpt : public LibCallOptimization { return 0; } }; - + //===---------------------------------------===// // 'memcmp' Optimizations @@ -989,7 +988,7 @@ struct MemCmpOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) || !isa<PointerType>(FT->getParamType(1)) || - FT->getReturnType() != Type::getInt32Ty(*Context)) + !FT->getReturnType()->isInteger(32)) return 0; Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); @@ -1096,27 +1095,6 @@ struct MemSetOpt : public LibCallOptimization { //===----------------------------------------------------------------------===// //===---------------------------------------===// -// 'object size' -namespace { -struct SizeOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // TODO: We can do more with this, but delaying to here should be no change - // in behavior. - ConstantInt *Const = dyn_cast<ConstantInt>(CI->getOperand(2)); - - if (!Const) return 0; - - const Type *Ty = Callee->getFunctionType()->getReturnType(); - - if (Const->getZExtValue() == 0) - return Constant::getAllOnesValue(Ty); - else - return ConstantInt::get(Ty, 0); - } -}; -} - -//===---------------------------------------===// // 'memcpy_chk' Optimizations struct MemCpyChkOpt : public LibCallOptimization { @@ -1351,7 +1329,7 @@ struct FFSOpt : public LibCallOptimization { // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 1 || - FT->getReturnType() != Type::getInt32Ty(*Context) || + !FT->getReturnType()->isInteger(32) || !isa<IntegerType>(FT->getParamType(0))) return 0; @@ -1387,7 +1365,7 @@ struct IsDigitOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || - FT->getParamType(0) != Type::getInt32Ty(*Context)) + !FT->getParamType(0)->isInteger(32)) return 0; // isdigit(c) -> (c-'0') <u 10 @@ -1408,7 +1386,7 @@ struct IsAsciiOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || - FT->getParamType(0) != Type::getInt32Ty(*Context)) + !FT->getParamType(0)->isInteger(32)) return 0; // isascii(c) -> c <u 128 @@ -1449,7 +1427,7 @@ struct ToAsciiOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); // We require i32(i32) if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != Type::getInt32Ty(*Context)) + !FT->getParamType(0)->isInteger(32)) return 0; // isascii(c) -> c & 0x7f @@ -1558,7 +1536,8 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. - ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1),1,B); + ConstantInt::get + (TD->getIntPtrType(*Context), FormatStr.size()+1),1,B); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1688,8 +1667,9 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()), + EmitFWrite(CI->getOperand(2), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), CI->getOperand(1), B); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1744,7 +1724,6 @@ namespace { FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; // Object Size Checking - SizeOpt ObjectSize; MemCpyChkOpt MemCpyChk; MemSetChkOpt MemSetChk; MemMoveChkOpt MemMoveChk; bool Modified; // This is only used by doInitialization. @@ -1854,8 +1833,6 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["fprintf"] = &FPrintF; // Object Size Checking - Optimizations["llvm.objectsize.i32"] = &ObjectSize; - Optimizations["llvm.objectsize.i64"] = &ObjectSize; Optimizations["__memcpy_chk"] = &MemCpyChk; Optimizations["__memset_chk"] = &MemSetChk; Optimizations["__memmove_chk"] = &MemMoveChk; @@ -1896,8 +1873,8 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { Value *Result = LCO->OptimizeCall(CI, TD, Builder); if (Result == 0) continue; - DEBUG(errs() << "SimplifyLibCalls simplified: " << *CI; - errs() << " into: " << *Result << "\n"); + DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI; + dbgs() << " into: " << *Result << "\n"); // Something changed! Changed = true; diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index b06ae3d..2306a77 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -243,13 +243,13 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { BasicBlock *DestBlock = Branch->getSuccessor(0); assert(SourceBlock != DestBlock && "Our predicate is broken!"); - DEBUG(errs() << "TailDuplication[" << SourceBlock->getParent()->getName() + DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName() << "]: Eliminating branch: " << *Branch); // See if we can avoid duplicating code by moving it up to a dominator of both // blocks. if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { - DEBUG(errs() << "Found shared dominator: " << DomBlock->getName() << "\n"); + DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n"); // If there are non-phi instructions in DestBlock that have no operands // defined in DestBlock, and if the instruction has no side effects, we can @@ -272,7 +272,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { // Remove from DestBlock, move right before the term in DomBlock. DestBlock->getInstList().remove(I); DomBlock->getInstList().insert(DomBlock->getTerminator(), I); - DEBUG(errs() << "Hoisted: " << *I); + DEBUG(dbgs() << "Hoisted: " << *I); } } } diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index 135a621..8c4aa59 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -17,6 +17,7 @@ #include "llvm/Instruction.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" @@ -54,8 +55,8 @@ void ExtAddrMode::print(raw_ostream &OS) const { } void ExtAddrMode::dump() const { - print(errs()); - errs() << '\n'; + print(dbgs()); + dbgs() << '\n'; } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 2962e84..e902688 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -78,7 +78,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) { /// is dead. Also recursively delete any operands that become dead as /// a result. This includes tracing the def-use list from the PHI to see if /// it is ultimately unused or if it reaches an unused cycle. -void llvm::DeleteDeadPHIs(BasicBlock *BB) { +bool llvm::DeleteDeadPHIs(BasicBlock *BB) { // Recursively deleting a PHI may cause multiple PHIs to be deleted // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. SmallVector<WeakVH, 8> PHIs; @@ -86,9 +86,12 @@ void llvm::DeleteDeadPHIs(BasicBlock *BB) { PHINode *PN = dyn_cast<PHINode>(I); ++I) PHIs.push_back(PN); + bool Changed = false; for (unsigned i = 0, e = PHIs.size(); i != e; ++i) if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*())) - RecursivelyDeleteDeadPHINode(PN); + Changed |= RecursivelyDeleteDeadPHINode(PN); + + return Changed; } /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, @@ -252,7 +255,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { Value *RetVal = 0; // Create a value to return... if the function doesn't return null... - if (BB->getParent()->getReturnType() != Type::getVoidTy(TI->getContext())) + if (!BB->getParent()->getReturnType()->isVoidTy()) RetVal = Constant::getNullValue(BB->getParent()->getReturnType()); // Create the return... @@ -673,16 +676,3 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, return 0; } -/// CopyPrecedingStopPoint - If I is immediately preceded by a StopPoint, -/// make a copy of the stoppoint before InsertPos (presumably before copying -/// or moving I). -void llvm::CopyPrecedingStopPoint(Instruction *I, - BasicBlock::iterator InsertPos) { - if (I != I->getParent()->begin()) { - BasicBlock::iterator BBI = I; --BBI; - if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BBI)) { - CallInst *newDSPI = cast<CallInst>(DSPI->clone()); - newDSPI->insertBefore(InsertPos); - } - } -} diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp index b5ffe06..c580b8f 100644 --- a/lib/Transforms/Utils/BasicInliner.cpp +++ b/lib/Transforms/Utils/BasicInliner.cpp @@ -89,7 +89,7 @@ void BasicInlinerImpl::inlineFunctions() { } } - DEBUG(errs() << ": " << CallSites.size() << " call sites.\n"); + DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); // Inline call sites. bool Changed = false; @@ -109,21 +109,21 @@ void BasicInlinerImpl::inlineFunctions() { } InlineCost IC = CA.getInlineCost(CS, NeverInline); if (IC.isAlways()) { - DEBUG(errs() << " Inlining: cost=always" + DEBUG(dbgs() << " Inlining: cost=always" <<", call: " << *CS.getInstruction()); } else if (IC.isNever()) { - DEBUG(errs() << " NOT Inlining: cost=never" + DEBUG(dbgs() << " NOT Inlining: cost=never" <<", call: " << *CS.getInstruction()); continue; } else { int Cost = IC.getValue(); if (Cost >= (int) BasicInlineThreshold) { - DEBUG(errs() << " NOT Inlining: cost = " << Cost + DEBUG(dbgs() << " NOT Inlining: cost = " << Cost << ", call: " << *CS.getInstruction()); continue; } else { - DEBUG(errs() << " Inlining: cost = " << Cost + DEBUG(dbgs() << " Inlining: cost = " << Cost << ", call: " << *CS.getInstruction()); } } diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index c287747..bd750cc 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -184,7 +184,6 @@ namespace { const char *NameSuffix; ClonedCodeInfo *CodeInfo; const TargetData *TD; - Value *DbgFnStart; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, DenseMap<const Value*, Value*> &valueMap, @@ -193,7 +192,7 @@ namespace { ClonedCodeInfo *codeInfo, const TargetData *td) : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns), - NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td), DbgFnStart(NULL) { + NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { } /// CloneBlock - The specified block is found to be reachable, clone it and @@ -235,19 +234,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, continue; } - // Do not clone llvm.dbg.region.end. It will be adjusted by the inliner. - if (const DbgFuncStartInst *DFSI = dyn_cast<DbgFuncStartInst>(II)) { - if (DbgFnStart == NULL) { - DISubprogram SP(DFSI->getSubprogram()); - if (SP.describes(BB->getParent())) - DbgFnStart = DFSI->getSubprogram(); - } - } - if (const DbgRegionEndInst *DREIS = dyn_cast<DbgRegionEndInst>(II)) { - if (DREIS->getContext() == DbgFnStart) - continue; - } - Instruction *NewInst = II->clone(); if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp index 7e000a1..38928dc 100644 --- a/lib/Transforms/Utils/CloneLoop.cpp +++ b/lib/Transforms/Utils/CloneLoop.cpp @@ -91,7 +91,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, Loop *NewParentLoop = NULL; - while (!LoopNest.empty()) { + do { Loop *L = LoopNest.pop_back_val(); Loop *NewLoop = new Loop(); @@ -123,7 +123,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, // Process sub loops for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) LoopNest.push_back(*I); - } + } while (!LoopNest.empty()); // Remap instructions to reference operands from ValueMap. for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(), diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index f966681..b208494 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include <algorithm> #include <set> @@ -44,8 +45,8 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, namespace { class CodeExtractor { - typedef std::vector<Value*> Values; - std::set<BasicBlock*> BlocksToExtract; + typedef SetVector<Value*> Values; + SetVector<BasicBlock*> BlocksToExtract; DominatorTree* DT; bool AggregateArgs; unsigned NumExitBlocks; @@ -135,7 +136,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // We only want to code extract the second block now, and it becomes the new // header of the region. BasicBlock *OldPred = Header; - BlocksToExtract.erase(OldPred); + BlocksToExtract.remove(OldPred); BlocksToExtract.insert(NewBB); Header = NewBB; @@ -180,7 +181,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { } void CodeExtractor::splitReturnBlocks() { - for (std::set<BasicBlock*>::iterator I = BlocksToExtract.begin(), + for (SetVector<BasicBlock*>::iterator I = BlocksToExtract.begin(), E = BlocksToExtract.end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) { BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); @@ -206,7 +207,7 @@ void CodeExtractor::splitReturnBlocks() { // void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) { std::set<BasicBlock*> ExitBlocks; - for (std::set<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(), + for (SetVector<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(), ce = BlocksToExtract.end(); ci != ce; ++ci) { BasicBlock *BB = *ci; @@ -215,13 +216,13 @@ void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) { // instruction is used outside the region, it's an output. for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O) if (definedInCaller(*O)) - inputs.push_back(*O); + inputs.insert(*O); // Consider uses of this instruction (outputs). for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) if (!definedInRegion(*UI)) { - outputs.push_back(I); + outputs.insert(I); break; } } // for: insts @@ -234,12 +235,6 @@ void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) { } // for: basic blocks NumExitBlocks = ExitBlocks.size(); - - // Eliminate duplicates. - std::sort(inputs.begin(), inputs.end()); - inputs.erase(std::unique(inputs.begin(), inputs.end()), inputs.end()); - std::sort(outputs.begin(), outputs.end()); - outputs.erase(std::unique(outputs.begin(), outputs.end()), outputs.end()); } /// constructFunction - make a function based on inputs and outputs, as follows: @@ -252,8 +247,8 @@ Function *CodeExtractor::constructFunction(const Values &inputs, BasicBlock *newHeader, Function *oldFunction, Module *M) { - DEBUG(errs() << "inputs: " << inputs.size() << "\n"); - DEBUG(errs() << "outputs: " << outputs.size() << "\n"); + DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); + DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); // This function returns unsigned, outputs will go back by reference. switch (NumExitBlocks) { @@ -269,25 +264,25 @@ Function *CodeExtractor::constructFunction(const Values &inputs, for (Values::const_iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) { const Value *value = *i; - DEBUG(errs() << "value used in func: " << *value << "\n"); + DEBUG(dbgs() << "value used in func: " << *value << "\n"); paramTy.push_back(value->getType()); } // Add the types of the output values to the function's argument list. for (Values::const_iterator I = outputs.begin(), E = outputs.end(); I != E; ++I) { - DEBUG(errs() << "instr used in func: " << **I << "\n"); + DEBUG(dbgs() << "instr used in func: " << **I << "\n"); if (AggregateArgs) paramTy.push_back((*I)->getType()); else paramTy.push_back(PointerType::getUnqual((*I)->getType())); } - DEBUG(errs() << "Function type: " << *RetTy << " f("); + DEBUG(dbgs() << "Function type: " << *RetTy << " f("); for (std::vector<const Type*>::iterator i = paramTy.begin(), e = paramTy.end(); i != e; ++i) - DEBUG(errs() << **i << ", "); - DEBUG(errs() << ")\n"); + DEBUG(dbgs() << **i << ", "); + DEBUG(dbgs() << ")\n"); if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { PointerType *StructPtr = @@ -482,7 +477,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, std::map<BasicBlock*, BasicBlock*> ExitBlockMap; unsigned switchVal = 0; - for (std::set<BasicBlock*>::const_iterator i = BlocksToExtract.begin(), + for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(), e = BlocksToExtract.end(); i != e; ++i) { TerminatorInst *TI = (*i)->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) @@ -593,7 +588,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // this should be rewritten as a `ret' // Check if the function should return a value - if (OldFnRetTy == Type::getVoidTy(Context)) { + if (OldFnRetTy->isVoidTy()) { ReturnInst::Create(Context, 0, TheSwitch); // Return void } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { // return what we have @@ -633,7 +628,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - for (std::set<BasicBlock*>::const_iterator i = BlocksToExtract.begin(), + for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(), e = BlocksToExtract.end(); i != e; ++i) { // Delete the basic block from the old function, and the list of blocks oldBlocks.remove(*i); diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 043046c..17f8827 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -210,34 +210,6 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } -/// findFnRegionEndMarker - This is a utility routine that is used by -/// InlineFunction. Return llvm.dbg.region.end intrinsic that corresponds -/// to the llvm.dbg.func.start of the function F. Otherwise return NULL. -/// -static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) { - - MDNode *FnStart = NULL; - const DbgRegionEndInst *FnEnd = NULL; - for (Function::const_iterator FI = F->begin(), FE =F->end(); FI != FE; ++FI) - for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); BI != BE; - ++BI) { - if (FnStart == NULL) { - if (const DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI)) { - DISubprogram SP(FSI->getSubprogram()); - assert (SP.isNull() == false && "Invalid llvm.dbg.func.start"); - if (SP.describes(F)) - FnStart = SP.getNode(); - } - continue; - } - - if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI)) - if (REI->getContext() == FnStart) - FnEnd = REI; - } - return FnEnd; -} - // InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. @@ -364,23 +336,6 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, ValueMap[I] = ActualArg; } - // Adjust llvm.dbg.region.end. If the CalledFunc has region end - // marker then clone that marker after next stop point at the - // call site. The function body cloner does not clone original - // region end marker from the CalledFunc. This will ensure that - // inlined function's scope ends at the right place. - if (const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc)) { - for (BasicBlock::iterator BI = TheCall, BE = TheCall->getParent()->end(); - BI != BE; ++BI) { - if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BI)) { - if (DbgRegionEndInst *NewDREI = - dyn_cast<DbgRegionEndInst>(DREI->clone())) - NewDREI->insertAfter(DSPI); - break; - } - } - } - // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 7f11acf..090af95 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -32,7 +32,7 @@ namespace { bool runOnFunction(Function &F) { for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) - if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext())) + if (!AI->hasName() && !AI->getType()->isVoidTy()) AI->setName("arg"); for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { @@ -40,7 +40,7 @@ namespace { BB->setName("bb"); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext())) + if (!I->hasName() && !I->getType()->isVoidTy()) I->setName("tmp"); } return true; diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 2426e3e..90e929e 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -268,16 +268,17 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { /// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a /// trivially dead instruction, delete it. If that makes any of its operands -/// trivially dead, delete them too, recursively. -void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { +/// trivially dead, delete them too, recursively. Return true if any +/// instructions were deleted. +bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { Instruction *I = dyn_cast<Instruction>(V); if (!I || !I->use_empty() || !isInstructionTriviallyDead(I)) - return; + return false; SmallVector<Instruction*, 16> DeadInsts; DeadInsts.push_back(I); - while (!DeadInsts.empty()) { + do { I = DeadInsts.pop_back_val(); // Null out all of the instruction's operands to see if any operand becomes @@ -297,22 +298,25 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { } I->eraseFromParent(); - } + } while (!DeadInsts.empty()); + + return true; } /// RecursivelyDeleteDeadPHINode - If the specified value is an effectively /// dead PHI node, due to being a def-use chain of single-use nodes that /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them -/// too, recursively. -void +/// too, recursively. Return true if the PHI node is actually deleted. +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { // We can remove a PHI if it is on a cycle in the def-use graph // where each node in the cycle has degree one, i.e. only one use, // and is an instruction with no side effects. if (!PN->hasOneUse()) - return; + return false; + bool Changed = false; SmallPtrSet<PHINode *, 4> PHIs; PHIs.insert(PN); for (Instruction *J = cast<Instruction>(*PN->use_begin()); @@ -324,9 +328,35 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { if (!PHIs.insert(cast<PHINode>(JP))) { // Break the cycle and delete the PHI and its operands. JP->replaceAllUsesWith(UndefValue::get(JP->getType())); - RecursivelyDeleteTriviallyDeadInstructions(JP); + (void)RecursivelyDeleteTriviallyDeadInstructions(JP); + Changed = true; break; } + return Changed; +} + +/// SimplifyInstructionsInBlock - Scan the specified basic block and try to +/// simplify any instructions in it and recursively delete dead instructions. +/// +/// This returns true if it changed the code, note that it can delete +/// instructions in other blocks as well in this block. +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { + bool MadeChange = false; + for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { + Instruction *Inst = BI++; + + if (Value *V = SimplifyInstruction(Inst, TD)) { + WeakVH BIHandle(BI); + ReplaceAndSimplifyAllUses(Inst, V, TD); + MadeChange = true; + if (BIHandle == 0) + BI = BB->begin(); + continue; + } + + MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + return MadeChange; } //===----------------------------------------------------------------------===// @@ -421,7 +451,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); - DEBUG(errs() << "Looking to fold " << BB->getName() << " into " + DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " << Succ->getName() << "\n"); // Shortcut, if there is only a single predecessor it must be BB and merging // is always safe @@ -456,7 +486,7 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { PI != PE; PI++) { if (BBPN->getIncomingValueForBlock(*PI) != PN->getIncomingValueForBlock(*PI)) { - DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with " << BBPN->getName() << " with regard to common predecessor " << (*PI)->getName() << "\n"); @@ -471,7 +501,7 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // one for BB, in which case this phi node will not prevent the merging // of the block. if (Val != PN->getIncomingValueForBlock(*PI)) { - DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with regard to common " << "predecessor " << (*PI)->getName() << "\n"); return false; @@ -525,7 +555,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { } } - DEBUG(errs() << "Killing Trivial BB: \n" << *BB); + DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); if (isa<PHINode>(Succ->begin())) { // If there is more than one pred of succ, and there are PHI nodes in diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 6b2c591..53117a0 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -63,7 +63,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { if (OnlyPred->getTerminator()->getNumSuccessors() != 1) return 0; - DEBUG(errs() << "Merging: " << *BB << "into: " << *OnlyPred); + DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); // Resolve any PHI nodes at the start of the block. They are all // guaranteed to have exactly one entry if they exist, unless there are @@ -110,13 +110,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { - DEBUG(errs() << " Can't unroll; loop preheader-insertion failed.\n"); + DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); return false; } BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) { - DEBUG(errs() << " Can't unroll; loop exit-block-insertion failed.\n"); + DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n"); return false; } @@ -125,7 +125,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. - DEBUG(errs() << + DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } @@ -138,9 +138,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) TripMultiple = L->getSmallConstantTripMultiple(); if (TripCount != 0) - DEBUG(errs() << " Trip Count = " << TripCount << "\n"); + DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) - DEBUG(errs() << " Trip Multiple = " << TripMultiple << "\n"); + DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. @@ -166,17 +166,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) } if (CompletelyUnroll) { - DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName() + DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); } else { - DEBUG(errs() << "UNROLLING loop %" << Header->getName() + DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { - DEBUG(errs() << " with a breakout at trip " << BreakoutTrip); + DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); } else if (TripMultiple != 1) { - DEBUG(errs() << " with " << TripMultiple << " trips per branch"); + DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); } - DEBUG(errs() << "!\n"); + DEBUG(dbgs() << "!\n"); } std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 6e6e8d2..766c4d9 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -255,7 +255,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { // Insert a return instruction. This really should be a "barrier", as it // is unreachable. ReturnInst::Create(F.getContext(), - F.getReturnType() == Type::getVoidTy(F.getContext()) ? + F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getReturnType()), UI); // Remove the unwind instruction now. diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 743bb6e..468a5fe 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -137,12 +137,12 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, unsigned Mid = Size / 2; std::vector<CaseRange> LHS(Begin, Begin + Mid); - DEBUG(errs() << "LHS: " << LHS << "\n"); + DEBUG(dbgs() << "LHS: " << LHS << "\n"); std::vector<CaseRange> RHS(Begin + Mid, End); - DEBUG(errs() << "RHS: " << RHS << "\n"); + DEBUG(dbgs() << "RHS: " << RHS << "\n"); CaseRange& Pivot = *(Begin + Mid); - DEBUG(errs() << "Pivot ==> " + DEBUG(dbgs() << "Pivot ==> " << cast<ConstantInt>(Pivot.Low)->getValue() << " -" << cast<ConstantInt>(Pivot.High)->getValue() << "\n"); @@ -306,9 +306,9 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { CaseVector Cases; unsigned numCmps = Clusterify(Cases, SI); - DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size() + DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() << ". Total compares: " << numCmps << "\n"); - DEBUG(errs() << "Cases: " << Cases << "\n"); + DEBUG(dbgs() << "Cases: " << Cases << "\n"); (void)numCmps; BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val, diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 846e432..baaa130 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -448,13 +448,13 @@ void PromoteMem2Reg::run() { // std::vector<RenamePassData> RenamePassWorkList; RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); - while (!RenamePassWorkList.empty()) { + do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); - } + } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 9881b3c..161bf21 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -191,7 +191,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); - DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI; } @@ -352,7 +352,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { InsertedPHI->eraseFromParent(); InsertedVal = ConstVal; } else { - DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp index 1c4afff..4e813dd 100644 --- a/lib/Transforms/Utils/SSI.cpp +++ b/lib/Transforms/Utils/SSI.cpp @@ -416,7 +416,7 @@ bool SSIEverything::runOnFunction(Function &F) { for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) - if (I->getType() != Type::getVoidTy(F.getContext())) + if (!I->getType()->isVoidTy()) Insts.push_back(I); ssi.createSSI(Insts); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index d7ca45e..cb53296 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -459,7 +459,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // Remove PHI node entries for the dead edge. ThisCases[0].second->removePredecessor(TI->getParent()); - DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorInstAndDCECond(TI); @@ -472,7 +472,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, for (unsigned i = 0, e = PredCases.size(); i != e; ++i) DeadCases.insert(PredCases[i].first); - DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); for (unsigned i = SI->getNumCases()-1; i != 0; --i) @@ -481,7 +481,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, SI->removeCase(i); } - DEBUG(errs() << "Leaving: " << *TI << "\n"); + DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } } @@ -524,7 +524,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, Instruction *NI = BranchInst::Create(TheRealDest, TI); (void) NI; - DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorInstAndDCECond(TI); @@ -753,7 +753,7 @@ HoistTerminator: // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); BIParent->getInstList().insert(BI, NT); - if (NT->getType() != Type::getVoidTy(BB1->getContext())) { + if (!NT->getType()->isVoidTy()) { I1->replaceAllUsesWith(NT); I2->replaceAllUsesWith(NT); NT->takeName(I1); @@ -1011,7 +1011,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB; if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) && - CB->getType() == Type::getInt1Ty(BB->getContext())) { + CB->getType()->isInteger(1)) { // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); @@ -1111,7 +1111,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN) { if (NumPhis > 2) return false; - DEBUG(errs() << "FOUND IF CONDITION! " << *IfCond << " T: " + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); // Loop over the PHI's seeing if we can promote them all to select @@ -1295,7 +1295,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { ReturnInst::Create(BI->getContext(), TrueValue, BI); (void) RI; - DEBUG(errs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" + DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); @@ -1377,7 +1377,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { else continue; - DEBUG(errs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { @@ -1511,7 +1511,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - DEBUG(errs() << "FOLDING BRs:" << *PBI->getParent() + DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); @@ -1531,7 +1531,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { OtherDest = InfLoopBlock; } - DEBUG(errs() << *PBI->getParent()->getParent()); + DEBUG(dbgs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. @@ -1581,8 +1581,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { } } - DEBUG(errs() << "INTO: " << *PBI->getParent()); - DEBUG(errs() << *PBI->getParent()->getParent()); + DEBUG(dbgs() << "INTO: " << *PBI->getParent()); + DEBUG(dbgs() << *PBI->getParent()->getParent()); // This basic block is probably dead. We know it has at least // one fewer predecessor. @@ -1608,7 +1608,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { // Remove basic blocks that have no predecessors... or that just have themself // as a predecessor. These are unreachable. if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) { - DEBUG(errs() << "Removing BB: \n" << *BB); + DEBUG(dbgs() << "Removing BB: \n" << *BB); DeleteDeadBlock(BB); return true; } @@ -1651,20 +1651,13 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { if (!UncondBranchPreds.empty()) { while (!UncondBranchPreds.empty()) { BasicBlock *Pred = UncondBranchPreds.pop_back_val(); - DEBUG(errs() << "FOLDING: " << *BB + DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); Instruction *UncondBranch = Pred->getTerminator(); // Clone the return and add it to the end of the predecessor. Instruction *NewRet = RI->clone(); Pred->getInstList().push_back(NewRet); - BasicBlock::iterator BBI = RI; - if (BBI != BB->begin()) { - // Move region end info into the predecessor. - if (DbgRegionEndInst *DREI = dyn_cast<DbgRegionEndInst>(--BBI)) - DREI->moveBefore(NewRet); - } - // If the return instruction returns a value, and if the value was a // PHI node in "BB", propagate the right value into the return. for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 30cb94d..3fa8b70 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -112,7 +112,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { "UnifiedReturnBlock", &F); PHINode *PN = 0; - if (F.getReturnType() == Type::getVoidTy(F.getContext())) { + if (F.getReturnType()->isVoidTy()) { ReturnInst::Create(F.getContext(), NULL, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index d3c9d77..eff2c77 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -563,11 +564,14 @@ static SlotTracker *createSlotTracker(const Value *V) { if (const Function *Func = dyn_cast<Function>(V)) return new SlotTracker(Func); + if (isa<MDNode>(V)) + return new SlotTracker((Function *)0); + return 0; } #if 0 -#define ST_DEBUG(X) errs() << X +#define ST_DEBUG(X) dbgs() << X #else #define ST_DEBUG(X) #endif @@ -614,8 +618,7 @@ void SlotTracker::processModule() { E = TheModule->named_metadata_end(); I != E; ++I) { const NamedMDNode *NMD = I; for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - // FIXME: Change accessor to be type safe. - if (MDNode *MD = cast_or_null<MDNode>(NMD->getOperand(i))) + if (MDNode *MD = NMD->getOperand(i)) CreateMetadataSlot(MD); } } @@ -832,7 +835,7 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { static void WriteConstantInt(raw_ostream &Out, const Constant *CV, TypePrinting &TypePrinter, SlotTracker *Machine) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - if (CI->getType() == Type::getInt1Ty(CV->getContext())) { + if (CI->getType()->isInteger(1)) { Out << (CI->getZExtValue() ? "true" : "false"); return; } @@ -1136,6 +1139,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, return; } + if (!Machine) + Machine = createSlotTracker(V); Out << '!' << Machine->getMetadataSlot(N); return; } @@ -1369,10 +1374,10 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) { Out << "!" << NMD->getName() << " = !{"; for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { if (i) Out << ", "; - // FIXME: Change accessor to be typesafe. - // FIXME: This doesn't handle null?? - MDNode *MD = cast_or_null<MDNode>(NMD->getOperand(i)); - Out << '!' << Machine.getMetadataSlot(MD); + if (MDNode *MD = NMD->getOperand(i)) + Out << '!' << Machine.getMetadataSlot(MD); + else + Out << "null"; } Out << "}\n"; } @@ -2057,8 +2062,9 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { else W.printAlias(cast<GlobalAlias>(GV)); } else if (const MDNode *N = dyn_cast<MDNode>(this)) { - SlotTracker SlotTable((Function*)0); - AssemblyWriter W(OS, SlotTable, 0, AAW); + Function *F = N->getFunction(); + SlotTracker SlotTable(F); + AssemblyWriter W(OS, SlotTable, getModuleFromVal(F), AAW); W.printMDNodeBody(N); } else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(this)) { SlotTracker SlotTable(N->getParent()); @@ -2085,17 +2091,17 @@ void Value::printCustom(raw_ostream &OS) const { } // Value::dump - allow easy printing of Values from the debugger. -void Value::dump() const { print(errs()); errs() << '\n'; } +void Value::dump() const { print(dbgs()); dbgs() << '\n'; } // Type::dump - allow easy printing of Types from the debugger. // This one uses type names from the given context module void Type::dump(const Module *Context) const { - WriteTypeSymbolic(errs(), this, Context); - errs() << '\n'; + WriteTypeSymbolic(dbgs(), this, Context); + dbgs() << '\n'; } // Type::dump - allow easy printing of Types from the debugger. void Type::dump() const { dump(0); } // Module::dump() - Allow printing of Modules from the debugger. -void Module::dump() const { print(errs(), 0); } +void Module::dump() const { print(dbgs(), 0); } diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index d68bba3..a371c6f 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/System/Atomic.h" #include "llvm/System/Mutex.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -318,11 +319,11 @@ AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const { } void AttrListPtr::dump() const { - errs() << "PAL[ "; + dbgs() << "PAL[ "; for (unsigned i = 0; i < getNumSlots(); ++i) { const AttributeWithIndex &PAWI = getSlot(i); - errs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} "; + dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} "; } - errs() << "]\n"; + dbgs() << "]\n"; } diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 77ab19f..2161841 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -480,61 +480,42 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) { } } -/// This function checks debug info intrinsics. If an intrinsic is invalid -/// then this function simply removes the intrinsic. +/// This function strips all debug info intrinsics, except for llvm.dbg.declare. +/// If an llvm.dbg.declare intrinsic is invalid, then this function simply +/// strips that use. void llvm::CheckDebugInfoIntrinsics(Module *M) { if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) { - if (!FuncStart->use_empty()) { - DbgFuncStartInst *DFSI = cast<DbgFuncStartInst>(FuncStart->use_back()); - if (!isa<MDNode>(DFSI->getOperand(1))) { - while (!FuncStart->use_empty()) { - CallInst *CI = cast<CallInst>(FuncStart->use_back()); - CI->eraseFromParent(); - } - FuncStart->eraseFromParent(); - } + while (!FuncStart->use_empty()) { + CallInst *CI = cast<CallInst>(FuncStart->use_back()); + CI->eraseFromParent(); } + FuncStart->eraseFromParent(); } - + if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) { - if (!StopPoint->use_empty()) { - DbgStopPointInst *DSPI = cast<DbgStopPointInst>(StopPoint->use_back()); - if (!isa<MDNode>(DSPI->getOperand(3))) { - while (!StopPoint->use_empty()) { - CallInst *CI = cast<CallInst>(StopPoint->use_back()); - CI->eraseFromParent(); - } - StopPoint->eraseFromParent(); - } + while (!StopPoint->use_empty()) { + CallInst *CI = cast<CallInst>(StopPoint->use_back()); + CI->eraseFromParent(); } + StopPoint->eraseFromParent(); } if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) { - if (!RegionStart->use_empty()) { - DbgRegionStartInst *DRSI = cast<DbgRegionStartInst>(RegionStart->use_back()); - if (!isa<MDNode>(DRSI->getOperand(1))) { - while (!RegionStart->use_empty()) { - CallInst *CI = cast<CallInst>(RegionStart->use_back()); - CI->eraseFromParent(); - } - RegionStart->eraseFromParent(); - } + while (!RegionStart->use_empty()) { + CallInst *CI = cast<CallInst>(RegionStart->use_back()); + CI->eraseFromParent(); } + RegionStart->eraseFromParent(); } if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) { - if (!RegionEnd->use_empty()) { - DbgRegionEndInst *DREI = cast<DbgRegionEndInst>(RegionEnd->use_back()); - if (!isa<MDNode>(DREI->getOperand(1))) { - while (!RegionEnd->use_empty()) { - CallInst *CI = cast<CallInst>(RegionEnd->use_back()); - CI->eraseFromParent(); - } - RegionEnd->eraseFromParent(); - } + while (!RegionEnd->use_empty()) { + CallInst *CI = cast<CallInst>(RegionEnd->use_back()); + CI->eraseFromParent(); } + RegionEnd->eraseFromParent(); } if (Function *Declare = M->getFunction("llvm.dbg.declare")) { diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 2449739..3a24389 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -1162,7 +1162,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context, } // i1 can be simplified in many cases. - if (C1->getType() == Type::getInt1Ty(Context)) { + if (C1->getType()->isInteger(1)) { switch (Opcode) { case Instruction::Add: case Instruction::Sub: @@ -1229,10 +1229,10 @@ static int IdxCompare(LLVMContext &Context, Constant *C1, Constant *C2, // Ok, we have two differing integer indices. Sign extend them to be the same // type. Long is always big enough, so we use it. - if (C1->getType() != Type::getInt64Ty(Context)) + if (!C1->getType()->isInteger(64)) C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context)); - if (C2->getType() != Type::getInt64Ty(Context)) + if (!C2->getType()->isInteger(64)) C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context)); if (C1 == C2) return 0; // They are equal @@ -1587,7 +1587,7 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context, } // If the comparison is a comparison between two i1's, simplify it. - if (C1->getType() == Type::getInt1Ty(Context)) { + if (C1->getType()->isInteger(1)) { switch(pred) { case ICmpInst::ICMP_EQ: if (isa<ConstantInt>(C2)) @@ -2042,10 +2042,10 @@ Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context, // Before adding, extend both operands to i64 to avoid // overflow trouble. - if (PrevIdx->getType() != Type::getInt64Ty(Context)) + if (!PrevIdx->getType()->isInteger(64)) PrevIdx = ConstantExpr::getSExt(PrevIdx, Type::getInt64Ty(Context)); - if (Div->getType() != Type::getInt64Ty(Context)) + if (!Div->getType()->isInteger(64)) Div = ConstantExpr::getSExt(Div, Type::getInt64Ty(Context)); diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index e3c6144..cc8961f 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -110,7 +110,7 @@ void Constant::destroyConstantImpl() { Value *V = use_back(); #ifndef NDEBUG // Only in -g mode... if (!isa<Constant>(V)) { - errs() << "While deleting: " << *this + dbgs() << "While deleting: " << *this << "\n\nUse still stuck around after Def is destroyed: " << *V << "\n\n"; } @@ -197,6 +197,24 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const { if (const BlockAddress *BA = dyn_cast<BlockAddress>(this)) return BA->getFunction()->getRelocationInfo(); + // While raw uses of blockaddress need to be relocated, differences between + // two of them don't when they are for labels in the same function. This is a + // common idiom when creating a table for the indirect goto extension, so we + // handle it efficiently here. + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(this)) + if (CE->getOpcode() == Instruction::Sub) { + ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0)); + ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1)); + if (LHS && RHS && + LHS->getOpcode() == Instruction::PtrToInt && + RHS->getOpcode() == Instruction::PtrToInt && + isa<BlockAddress>(LHS->getOperand(0)) && + isa<BlockAddress>(RHS->getOperand(0)) && + cast<BlockAddress>(LHS->getOperand(0))->getFunction() == + cast<BlockAddress>(RHS->getOperand(0))->getFunction()) + return NoRelocation; + } + PossibleRelocationsTy Result = NoRelocation; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) Result = std::max(Result, @@ -910,7 +928,7 @@ void ConstantArray::destroyConstant() { /// if the elements of the array are all ConstantInt's. bool ConstantArray::isString() const { // Check the element type for i8... - if (getType()->getElementType() != Type::getInt8Ty(getContext())) + if (!getType()->getElementType()->isInteger(8)) return false; // Check the elements to make sure they are all integers, not constant // expressions. @@ -925,7 +943,7 @@ bool ConstantArray::isString() const { /// null bytes except its terminator. bool ConstantArray::isCString() const { // Check the element type for i8... - if (getType()->getElementType() != Type::getInt8Ty(getContext())) + if (!getType()->getElementType()->isInteger(8)) return false; // Last element must be a null. @@ -1671,7 +1689,7 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val, Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) { assert(isa<VectorType>(Val->getType()) && "Tried to create extractelement operation on non-vector type!"); - assert(Idx->getType() == Type::getInt32Ty(Val->getContext()) && + assert(Idx->getType()->isInteger(32) && "Extractelement index must be i32 type!"); return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(), Val, Idx); @@ -1698,7 +1716,7 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, "Tried to create insertelement operation on non-vector type!"); assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType() && "Insertelement types must match!"); - assert(Idx->getType() == Type::getInt32Ty(Val->getContext()) && + assert(Idx->getType()->isInteger(32) && "Insertelement index must be i32 type!"); return getInsertElementTy(Val->getType(), Val, Elt, Idx); } diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index 268a660..08224e4 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -764,7 +764,7 @@ public: } void dump() const { - DEBUG(errs() << "Constant.cpp: ConstantUniqueMap\n"); + DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n"); } }; diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 449e967..984d245 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -89,7 +89,7 @@ void LLVMSetTarget(LLVMModuleRef M, const char *Triple) { } /*--.. Type names ..........................................................--*/ -int LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) { +LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) { return unwrap(M)->addTypeName(Name, unwrap(Ty)); } @@ -237,7 +237,7 @@ LLVMTypeRef LLVMPPCFP128Type(void) { LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType, LLVMTypeRef *ParamTypes, unsigned ParamCount, - int IsVarArg) { + LLVMBool IsVarArg) { std::vector<const Type*> Tys; for (LLVMTypeRef *I = ParamTypes, *E = ParamTypes + ParamCount; I != E; ++I) Tys.push_back(unwrap(*I)); @@ -245,7 +245,7 @@ LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType, return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0)); } -int LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) { +LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) { return unwrap<FunctionType>(FunctionTy)->isVarArg(); } @@ -267,7 +267,7 @@ void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) { /*--.. Operations on struct types ..........................................--*/ LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, - unsigned ElementCount, int Packed) { + unsigned ElementCount, LLVMBool Packed) { std::vector<const Type*> Tys; for (LLVMTypeRef *I = ElementTypes, *E = ElementTypes + ElementCount; I != E; ++I) @@ -277,7 +277,7 @@ LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, } LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, - unsigned ElementCount, int Packed) { + unsigned ElementCount, LLVMBool Packed) { return LLVMStructTypeInContext(LLVMGetGlobalContext(), ElementTypes, ElementCount, Packed); } @@ -294,7 +294,7 @@ void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) { *Dest++ = wrap(*I); } -int LLVMIsPackedStruct(LLVMTypeRef StructTy) { +LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) { return unwrap<StructType>(StructTy)->isPacked(); } @@ -442,17 +442,17 @@ LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) { return wrap(UndefValue::get(unwrap(Ty))); } -int LLVMIsConstant(LLVMValueRef Ty) { +LLVMBool LLVMIsConstant(LLVMValueRef Ty) { return isa<Constant>(unwrap(Ty)); } -int LLVMIsNull(LLVMValueRef Val) { +LLVMBool LLVMIsNull(LLVMValueRef Val) { if (Constant *C = dyn_cast<Constant>(unwrap(Val))) return C->isNullValue(); return false; } -int LLVMIsUndef(LLVMValueRef Val) { +LLVMBool LLVMIsUndef(LLVMValueRef Val) { return isa<UndefValue>(unwrap(Val)); } @@ -464,7 +464,7 @@ LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) { /*--.. Operations on scalar constants ......................................--*/ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N, - int SignExtend) { + LLVMBool SignExtend) { return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0)); } @@ -504,7 +504,8 @@ long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) { /*--.. Operations on composite constants ...................................--*/ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str, - unsigned Length, int DontNullTerminate) { + unsigned Length, + LLVMBool DontNullTerminate) { /* Inverted the sense of AddNull because ', 0)' is a better mnemonic for null termination than ', 1)'. */ return wrap(ConstantArray::get(*unwrap(C), std::string(Str, Length), @@ -512,14 +513,14 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str, } LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, LLVMValueRef *ConstantVals, - unsigned Count, int Packed) { + unsigned Count, LLVMBool Packed) { return wrap(ConstantStruct::get(*unwrap(C), unwrap<Constant>(ConstantVals, Count), Count, Packed != 0)); } LLVMValueRef LLVMConstString(const char *Str, unsigned Length, - int DontNullTerminate) { + LLVMBool DontNullTerminate) { return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length, DontNullTerminate); } @@ -530,7 +531,7 @@ LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy, Length)); } LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count, - int Packed) { + LLVMBool Packed) { return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count, Packed); } @@ -820,7 +821,7 @@ LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal, } LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType, - unsigned isSigned) { + LLVMBool isSigned) { return wrap(ConstantExpr::getIntegerCast( unwrap<Constant>(ConstantVal), unwrap(ToType), @@ -883,10 +884,11 @@ LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, IdxList, NumIdx)); } -LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, - const char *Constraints, int HasSideEffects, - int IsAlignStack) { - return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString, +LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, + const char *Constraints, + LLVMBool HasSideEffects, + LLVMBool IsAlignStack) { + return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString, Constraints, HasSideEffects, IsAlignStack)); } @@ -896,7 +898,7 @@ LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) { return wrap(unwrap<GlobalValue>(Global)->getParent()); } -int LLVMIsDeclaration(LLVMValueRef Global) { +LLVMBool LLVMIsDeclaration(LLVMValueRef Global) { return unwrap<GlobalValue>(Global)->isDeclaration(); } @@ -1079,19 +1081,19 @@ void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) { ->setInitializer(unwrap<Constant>(ConstantVal)); } -int LLVMIsThreadLocal(LLVMValueRef GlobalVar) { +LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar) { return unwrap<GlobalVariable>(GlobalVar)->isThreadLocal(); } -void LLVMSetThreadLocal(LLVMValueRef GlobalVar, int IsThreadLocal) { +void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal) { unwrap<GlobalVariable>(GlobalVar)->setThreadLocal(IsThreadLocal != 0); } -int LLVMIsGlobalConstant(LLVMValueRef GlobalVar) { +LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar) { return unwrap<GlobalVariable>(GlobalVar)->isConstant(); } -void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, int IsConstant) { +void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) { unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0); } @@ -1285,7 +1287,7 @@ LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) { return wrap(static_cast<Value*>(unwrap(BB))); } -int LLVMValueIsBasicBlock(LLVMValueRef Val) { +LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val) { return isa<BasicBlock>(unwrap(Val)); } @@ -1452,11 +1454,11 @@ void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, /*--.. Operations on call instructions (only) ..............................--*/ -int LLVMIsTailCall(LLVMValueRef Call) { +LLVMBool LLVMIsTailCall(LLVMValueRef Call) { return unwrap<CallInst>(Call)->isTailCall(); } -void LLVMSetTailCall(LLVMValueRef Call, int isTailCall) { +void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) { unwrap<CallInst>(Call)->setTailCall(isTailCall); } @@ -1973,9 +1975,11 @@ void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) { /*===-- Memory buffers ----------------------------------------------------===*/ -int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path, - LLVMMemoryBufferRef *OutMemBuf, - char **OutMessage) { +LLVMBool LLVMCreateMemoryBufferWithContentsOfFile( + const char *Path, + LLVMMemoryBufferRef *OutMemBuf, + char **OutMessage) { + std::string Error; if (MemoryBuffer *MB = MemoryBuffer::getFile(Path, &Error)) { *OutMemBuf = wrap(MB); @@ -1986,8 +1990,8 @@ int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path, return 1; } -int LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, - char **OutMessage) { +LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, + char **OutMessage) { MemoryBuffer *MB = MemoryBuffer::getSTDIN(); if (!MB->getBufferSize()) { delete MB; diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index e04b6d6..f00f6ee 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -189,7 +189,7 @@ void Function::BuildLazyArguments() const { // Create the arguments vector, all arguments start out unnamed. const FunctionType *FT = getFunctionType(); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { - assert(FT->getParamType(i) != Type::getVoidTy(FT->getContext()) && + assert(!FT->getParamType(i)->isVoidTy() && "Cannot have void typed arguments!"); ArgumentList.push_back(new Argument(FT->getParamType(i))); } diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp index 16de1af..ec21773 100644 --- a/lib/VMCore/InlineAsm.cpp +++ b/lib/VMCore/InlineAsm.cpp @@ -217,7 +217,7 @@ bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) { switch (NumOutputs) { case 0: - if (Ty->getReturnType() != Type::getVoidTy(Ty->getContext())) return false; + if (!Ty->getReturnType()->isVoidTy()) return false; break; case 1: if (isa<StructType>(Ty->getReturnType())) return false; diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index a5500e6..3fabfd0 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -374,37 +374,6 @@ bool Instruction::isCommutative(unsigned op) { } } -// Code here matches isMalloc from MemoryBuiltins, which is not in VMCore. -static bool isMalloc(const Value* I) { - const CallInst *CI = dyn_cast<CallInst>(I); - if (!CI) { - const BitCastInst *BCI = dyn_cast<BitCastInst>(I); - if (!BCI) return false; - - CI = dyn_cast<CallInst>(BCI->getOperand(0)); - } - - if (!CI) - return false; - Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "malloc") - return false; - - // Check malloc prototype. - // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin - // attribute will exist. - const FunctionType *FTy = Callee->getFunctionType(); - if (FTy->getNumParams() != 1) - return false; - if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) { - if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64) - return false; - return true; - } - - return false; -} - bool Instruction::isSafeToSpeculativelyExecute() const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) if (Constant *C = dyn_cast<Constant>(getOperand(i))) @@ -430,7 +399,9 @@ bool Instruction::isSafeToSpeculativelyExecute() const { case Load: { if (cast<LoadInst>(this)->isVolatile()) return false; - if (isa<AllocaInst>(getOperand(0)) || isMalloc(getOperand(0))) + // Note that it is not safe to speculate into a malloc'd region because + // malloc may return null. + if (isa<AllocaInst>(getOperand(0))) return true; if (GlobalVariable *GV = dyn_cast<GlobalVariable>(getOperand(0))) return !GV->hasExternalWeakLinkage(); diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 3e9950e..2619047 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -523,8 +523,7 @@ static Instruction *createMalloc(Instruction *InsertBefore, MCall->setCallingConv(F->getCallingConv()); if (!F->doesNotAlias(0)) F->setDoesNotAlias(0); } - assert(MCall->getType() != Type::getVoidTy(BB->getContext()) && - "Malloc has void return type"); + assert(!MCall->getType()->isVoidTy() && "Malloc has void return type"); return Result; } @@ -788,7 +787,7 @@ BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const { void BranchInst::AssertOK() { if (isConditional()) - assert(getCondition()->getType() == Type::getInt1Ty(getContext()) && + assert(getCondition()->getType()->isInteger(1) && "May only branch on boolean predicates!"); } @@ -893,7 +892,7 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) { else { assert(!isa<BasicBlock>(Amt) && "Passed basic block into allocation size parameter! Use other ctor"); - assert(Amt->getType() == Type::getInt32Ty(Context) && + assert(Amt->getType()->isInteger(32) && "Allocation array size is not a 32-bit integer!"); } return Amt; @@ -904,7 +903,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertBefore) { setAlignment(0); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -913,7 +912,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertAtEnd) { setAlignment(0); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -922,7 +921,7 @@ AllocaInst::AllocaInst(const Type *Ty, const Twine &Name, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), 0), InsertBefore) { setAlignment(0); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -931,7 +930,7 @@ AllocaInst::AllocaInst(const Type *Ty, const Twine &Name, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), 0), InsertAtEnd) { setAlignment(0); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -940,7 +939,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertBefore) { setAlignment(Align); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -949,7 +948,7 @@ AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align, : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, getAISize(Ty->getContext(), ArraySize), InsertAtEnd) { setAlignment(Align); - assert(Ty != Type::getVoidTy(Ty->getContext()) && "Cannot allocate void!"); + assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -1392,8 +1391,7 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index, bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) { - if (!isa<VectorType>(Val->getType()) || - Index->getType() != Type::getInt32Ty(Val->getContext())) + if (!isa<VectorType>(Val->getType()) || !Index->getType()->isInteger(32)) return false; return true; } @@ -1440,7 +1438,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType()) return false;// Second operand of insertelement must be vector element type. - if (Index->getType() != Type::getInt32Ty(Vec->getContext())) + if (!Index->getType()->isInteger(32)) return false; // Third operand of insertelement must be i32. return true; } @@ -1492,7 +1490,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType()); if (!isa<Constant>(Mask) || MaskTy == 0 || - MaskTy->getElementType() != Type::getInt32Ty(V1->getContext())) + !MaskTy->getElementType()->isInteger(32)) return false; return true; } @@ -2287,7 +2285,8 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, bool isSigned, const Twine &Name, Instruction *InsertBefore) { - assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast"); + assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() && + "Invalid integer cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); Instruction::CastOps opcode = diff --git a/lib/VMCore/IntrinsicInst.cpp b/lib/VMCore/IntrinsicInst.cpp index 5e0f42e..cb9252e 100644 --- a/lib/VMCore/IntrinsicInst.cpp +++ b/lib/VMCore/IntrinsicInst.cpp @@ -8,11 +8,7 @@ //===----------------------------------------------------------------------===// // // This file implements methods that make it really easy to deal with intrinsic -// functions with the isa/dyncast family of functions. In particular, this -// allows you to do things like: -// -// if (DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(Inst)) -// ... SPI->getFileName() ... SPI->getDirectory() ... +// functions. // // All intrinsic function calls are instances of the call instruction, so these // are all subclasses of the CallInst class. Note that none of these classes @@ -55,25 +51,13 @@ Value *DbgInfoIntrinsic::StripCast(Value *C) { } //===----------------------------------------------------------------------===// -/// DbgStopPointInst - This represents the llvm.dbg.stoppoint instruction. +/// DbgValueInst - This represents the llvm.dbg.value instruction. /// -Value *DbgStopPointInst::getFileName() const { - // Once the operand indices are verified, update this assert - assert(LLVMDebugVersion == (7 << 16) && "Verify operand indices"); - return getContext()->getOperand(3); -} - -Value *DbgStopPointInst::getDirectory() const { - // Once the operand indices are verified, update this assert - assert(LLVMDebugVersion == (7 << 16) && "Verify operand indices"); - return getContext()->getOperand(4); +const Value *DbgValueInst::getValue() const { + return cast<MDNode>(getOperand(1))->getOperand(0); } -//===----------------------------------------------------------------------===// -/// DbgValueInst - This represents the llvm.dbg.value instruction. -/// - -Value *DbgValueInst::getValue() const { +Value *DbgValueInst::getValue() { return cast<MDNode>(getOperand(1))->getOperand(0); } diff --git a/lib/VMCore/Mangler.cpp b/lib/VMCore/Mangler.cpp index 33eb044..7d9f330 100644 --- a/lib/VMCore/Mangler.cpp +++ b/lib/VMCore/Mangler.cpp @@ -16,7 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -24,57 +24,57 @@ static char HexDigit(int V) { return V < 10 ? V+'0' : V+'A'-10; } -static std::string MangleLetter(unsigned char C) { - char Result[] = { '_', HexDigit(C >> 4), HexDigit(C & 15), '_', 0 }; - return Result; +static void MangleLetter(SmallVectorImpl<char> &OutName, unsigned char C) { + OutName.push_back('_'); + OutName.push_back(HexDigit(C >> 4)); + OutName.push_back(HexDigit(C & 15)); + OutName.push_back('_'); } /// makeNameProper - We don't want identifier names non-C-identifier characters /// in them, so mangle them as appropriate. /// -std::string Mangler::makeNameProper(const std::string &X, - ManglerPrefixTy PrefixTy) { +/// FIXME: This is deprecated, new code should use getNameWithPrefix and use +/// MCSymbol printing to handle quotes or not etc. +/// +void Mangler::makeNameProper(SmallVectorImpl<char> &OutName, + const Twine &TheName, + ManglerPrefixTy PrefixTy) { + SmallString<256> TmpData; + StringRef X = TheName.toStringRef(TmpData); assert(!X.empty() && "Cannot mangle empty strings"); if (!UseQuotes) { - std::string Result; - // If X does not start with (char)1, add the prefix. - bool NeedPrefix = true; - std::string::const_iterator I = X.begin(); + StringRef::iterator I = X.begin(); if (*I == 1) { - NeedPrefix = false; - ++I; // Skip over the marker. + ++I; // Skip over the no-prefix marker. + } else { + if (PrefixTy == Mangler::Private) + OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); + else if (PrefixTy == Mangler::LinkerPrivate) + OutName.append(LinkerPrivatePrefix, + LinkerPrivatePrefix+strlen(LinkerPrivatePrefix)); + OutName.append(Prefix, Prefix+strlen(Prefix)); } // Mangle the first letter specially, don't allow numbers unless the target // explicitly allows them. if (!SymbolsCanStartWithDigit && *I >= '0' && *I <= '9') - Result += MangleLetter(*I++); + MangleLetter(OutName, *I++); - for (std::string::const_iterator E = X.end(); I != E; ++I) { + for (StringRef::iterator E = X.end(); I != E; ++I) { if (!isCharAcceptable(*I)) - Result += MangleLetter(*I); + MangleLetter(OutName, *I); else - Result += *I; - } - - if (NeedPrefix) { - Result = Prefix + Result; - - if (PrefixTy == Mangler::Private) - Result = PrivatePrefix + Result; - else if (PrefixTy == Mangler::LinkerPrivate) - Result = LinkerPrivatePrefix + Result; + OutName.push_back(*I); } - - return Result; + return; } bool NeedPrefix = true; bool NeedQuotes = false; - std::string Result; - std::string::const_iterator I = X.begin(); + StringRef::iterator I = X.begin(); if (*I == 1) { NeedPrefix = false; ++I; // Skip over the marker. @@ -87,7 +87,7 @@ std::string Mangler::makeNameProper(const std::string &X, // Do an initial scan of the string, checking to see if we need quotes or // to escape a '"' or not. if (!NeedQuotes) - for (std::string::const_iterator E = X.end(); I != E; ++I) + for (StringRef::iterator E = X.end(); I != E; ++I) if (!isCharAcceptable(*I)) { NeedQuotes = true; break; @@ -95,43 +95,57 @@ std::string Mangler::makeNameProper(const std::string &X, // In the common case, we don't need quotes. Handle this quickly. if (!NeedQuotes) { - if (!NeedPrefix) - return X.substr(1); // Strip off the \001. - - Result = Prefix + X; + if (!NeedPrefix) { + OutName.append(X.begin()+1, X.end()); // Strip off the \001. + return; + } if (PrefixTy == Mangler::Private) - Result = PrivatePrefix + Result; + OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); else if (PrefixTy == Mangler::LinkerPrivate) - Result = LinkerPrivatePrefix + Result; - - return Result; - } - - if (NeedPrefix) - Result = X.substr(0, I-X.begin()); + OutName.append(LinkerPrivatePrefix, + LinkerPrivatePrefix+strlen(LinkerPrivatePrefix)); - // Otherwise, construct the string the expensive way. - for (std::string::const_iterator E = X.end(); I != E; ++I) { - if (*I == '"') - Result += "_QQ_"; - else if (*I == '\n') - Result += "_NL_"; + if (Prefix[0] == 0) + ; // Common noop, no prefix. + else if (Prefix[1] == 0) + OutName.push_back(Prefix[0]); // Common, one character prefix. else - Result += *I; + OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary prefix. + OutName.append(X.begin(), X.end()); + return; } + // Add leading quote. + OutName.push_back('"'); + + // Add prefixes unless disabled. if (NeedPrefix) { - Result = Prefix + Result; - if (PrefixTy == Mangler::Private) - Result = PrivatePrefix + Result; + OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); else if (PrefixTy == Mangler::LinkerPrivate) - Result = LinkerPrivatePrefix + Result; + OutName.append(LinkerPrivatePrefix, + LinkerPrivatePrefix+strlen(LinkerPrivatePrefix)); + OutName.append(Prefix, Prefix+strlen(Prefix)); + } + + // Add the piece that we already scanned through. + OutName.append(X.begin()+!NeedPrefix, I); + + // Otherwise, construct the string the expensive way. + for (StringRef::iterator E = X.end(); I != E; ++I) { + if (*I == '"') { + const char *Quote = "_QQ_"; + OutName.append(Quote, Quote+4); + } else if (*I == '\n') { + const char *Newline = "_NL_"; + OutName.append(Newline, Newline+4); + } else + OutName.push_back(*I); } - Result = '"' + Result + '"'; - return Result; + // Add trailing quote. + OutName.push_back('"'); } /// getMangledName - Returns the mangled name of V, an LLVM Value, @@ -139,6 +153,9 @@ std::string Mangler::makeNameProper(const std::string &X, /// specified suffix. If 'ForcePrivate' is specified, the label is specified /// to have a private label prefix. /// +/// FIXME: This is deprecated, new code should use getNameWithPrefix and use +/// MCSymbol printing to handle quotes or not etc. +/// std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix, bool ForcePrivate) { assert((!isa<Function>(GV) || !cast<Function>(GV)->isIntrinsic()) && @@ -148,8 +165,11 @@ std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix, (GV->hasPrivateLinkage() || ForcePrivate) ? Mangler::Private : GV->hasLinkerPrivateLinkage() ? Mangler::LinkerPrivate : Mangler::Default; - if (GV->hasName()) - return makeNameProper(GV->getNameStr() + Suffix, PrefixTy); + SmallString<128> Result; + if (GV->hasName()) { + makeNameProper(Result, GV->getNameStr() + Suffix, PrefixTy); + return Result.str().str(); + } // Get the ID for the global, assigning a new one if we haven't got one // already. @@ -157,7 +177,38 @@ std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix, if (ID == 0) ID = NextAnonGlobalID++; // Must mangle the global into a unique ID. - return makeNameProper("__unnamed_" + utostr(ID) + Suffix, PrefixTy); + makeNameProper(Result, "__unnamed_" + utostr(ID) + Suffix, PrefixTy); + return Result.str().str(); +} + +/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix +/// and the specified name as the global variable name. GVName must not be +/// empty. +void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, + const Twine &GVName, ManglerPrefixTy PrefixTy) { + SmallString<256> TmpData; + StringRef Name = GVName.toStringRef(TmpData); + assert(!Name.empty() && "getNameWithPrefix requires non-empty name"); + + // If the global name is not led with \1, add the appropriate prefixes. + if (Name[0] != '\1') { + if (PrefixTy == Mangler::Private) + OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); + else if (PrefixTy == Mangler::LinkerPrivate) + OutName.append(LinkerPrivatePrefix, + LinkerPrivatePrefix+strlen(LinkerPrivatePrefix)); + + if (Prefix[0] == 0) + ; // Common noop, no prefix. + else if (Prefix[1] == 0) + OutName.push_back(Prefix[0]); // Common, one character prefix. + else + OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary prefix. + } else { + Name = Name.substr(1); + } + + OutName.append(Name.begin(), Name.end()); } @@ -167,33 +218,28 @@ std::string Mangler::getMangledName(const GlobalValue *GV, const char *Suffix, void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV, bool isImplicitlyPrivate) { - - // If the global is anonymous or not led with \1, then add the appropriate - // prefix. - if (!GV->hasName() || GV->getName()[0] != '\1') { + // If this global has a name, handle it simply. + if (GV->hasName()) { + ManglerPrefixTy PrefixTy = Mangler::Default; if (GV->hasPrivateLinkage() || isImplicitlyPrivate) - OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); + PrefixTy = Mangler::Private; else if (GV->hasLinkerPrivateLinkage()) - OutName.append(LinkerPrivatePrefix, - LinkerPrivatePrefix+strlen(LinkerPrivatePrefix));; - OutName.append(Prefix, Prefix+strlen(Prefix)); - } - - // If the global has a name, just append it now. - if (GV->hasName()) { - StringRef Name = GV->getName(); + PrefixTy = Mangler::LinkerPrivate; - // Strip off the prefix marker if present. - if (Name[0] != '\1') - OutName.append(Name.begin(), Name.end()); - else - OutName.append(Name.begin()+1, Name.end()); - return; + return getNameWithPrefix(OutName, GV->getName(), PrefixTy); } // If the global variable doesn't have a name, return a unique name for the // global based on a numbering. + // Anonymous names always get prefixes. + if (GV->hasPrivateLinkage() || isImplicitlyPrivate) + OutName.append(PrivatePrefix, PrivatePrefix+strlen(PrivatePrefix)); + else if (GV->hasLinkerPrivateLinkage()) + OutName.append(LinkerPrivatePrefix, + LinkerPrivatePrefix+strlen(LinkerPrivatePrefix));; + OutName.append(Prefix, Prefix+strlen(Prefix)); + // Get the ID for the global, assigning a new one if we haven't got one // already. unsigned &ID = AnonGlobalIDs[GV]; diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 8e9aab9..7988b44 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -18,6 +18,7 @@ #include "llvm/Instruction.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/SmallString.h" #include "SymbolTableListTraitsImpl.h" #include "llvm/Support/ValueHandle.h" using namespace llvm; @@ -31,7 +32,7 @@ MDString::MDString(LLVMContext &C, StringRef S) MDString *MDString::get(LLVMContext &Context, StringRef Str) { LLVMContextImpl *pImpl = Context.pImpl; - StringMapEntry<MDString *> &Entry = + StringMapEntry<MDString *> &Entry = pImpl->MDStringCache.GetOrCreateValue(Str); MDString *&S = Entry.getValue(); if (!S) S = new MDString(Context, Entry.getKey()); @@ -40,7 +41,7 @@ MDString *MDString::get(LLVMContext &Context, StringRef Str) { MDString *MDString::get(LLVMContext &Context, const char *Str) { LLVMContextImpl *pImpl = Context.pImpl; - StringMapEntry<MDString *> &Entry = + StringMapEntry<MDString *> &Entry = pImpl->MDStringCache.GetOrCreateValue(Str ? StringRef(Str) : StringRef()); MDString *&S = Entry.getValue(); if (!S) S = new MDString(Context, Entry.getKey()); @@ -58,11 +59,11 @@ class MDNodeOperand : public CallbackVH { public: MDNodeOperand(Value *V, MDNode *P) : CallbackVH(V), Parent(P) {} ~MDNodeOperand() {} - + void set(Value *V) { setValPtr(V); } - + virtual void deleted(); virtual void allUsesReplacedWith(Value *NV); }; @@ -94,7 +95,7 @@ MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, bool isFunctionLocal) : MetadataBase(Type::getMetadataTy(C), Value::MDNodeVal) { NumOperands = NumVals; - + if (isFunctionLocal) setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit); @@ -107,19 +108,82 @@ MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, /// ~MDNode - Destroy MDNode. MDNode::~MDNode() { - assert((getSubclassDataFromValue() & DestroyFlag) != 0 && + assert((getSubclassDataFromValue() & DestroyFlag) != 0 && "Not being destroyed through destroy()?"); if (!isNotUniqued()) { LLVMContextImpl *pImpl = getType()->getContext().pImpl; pImpl->MDNodeSet.RemoveNode(this); } - + // Destroy the operands. for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands; Op != E; ++Op) Op->~MDNodeOperand(); } +#ifndef NDEBUG +static Function *assertLocalFunction(const MDNode *N, + SmallPtrSet<const MDNode *, 32> &Visited) { + Function *F = NULL; + // Only visit each MDNode once. + if (!Visited.insert(N)) return F; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + Value *V = N->getOperand(i); + Function *NewF = NULL; + if (!V) continue; + if (Instruction *I = dyn_cast<Instruction>(V)) + NewF = I->getParent()->getParent(); + else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) + NewF = BB->getParent(); + else if (Argument *A = dyn_cast<Argument>(V)) + NewF = A->getParent(); + else if (MDNode *MD = dyn_cast<MDNode>(V)) + if (MD->isFunctionLocal()) + NewF = assertLocalFunction(MD, Visited); + if (F && NewF) assert(F == NewF && "inconsistent function-local metadata"); + if (!F) F = NewF; + } + return F; +} +#endif + +static Function *getFunctionHelper(const MDNode *N, + SmallPtrSet<const MDNode *, 32> &Visited) { + assert(N->isFunctionLocal() && "Should only be called on function-local MD"); +#ifndef NDEBUG + return assertLocalFunction(N, Visited); +#endif + Function *F = NULL; + // Only visit each MDNode once. + if (!Visited.insert(N)) return F; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + Value *V = N->getOperand(i); + if (!V) continue; + if (Instruction *I = dyn_cast<Instruction>(V)) + F = I->getParent()->getParent(); + else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) + F = BB->getParent(); + else if (Argument *A = dyn_cast<Argument>(V)) + F = A->getParent(); + else if (MDNode *MD = dyn_cast<MDNode>(V)) + if (MD->isFunctionLocal()) + F = getFunctionHelper(MD, Visited); + if (F) break; + } + return F; +} + +// getFunction - If this metadata is function-local and recursively has a +// function-local operand, return the first such operand's parent function. +// Otherwise, return null. +Function *MDNode::getFunction() const { + if (!isFunctionLocal()) return NULL; + SmallPtrSet<const MDNode *, 32> Visited; + return getFunctionHelper(this, Visited); +} + // destroy - Delete this node. Only when there are no uses. void MDNode::destroy() { setValueSubclassData(getSubclassDataFromValue() | DestroyFlag); @@ -128,9 +192,8 @@ void MDNode::destroy() { free(this); } - -MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals, - bool isFunctionLocal) { +MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, + unsigned NumVals, FunctionLocalness FL) { LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; for (unsigned i = 0; i != NumVals; ++i) @@ -139,16 +202,46 @@ MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals, void *InsertPoint; MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); if (!N) { + bool isFunctionLocal = false; + switch (FL) { + case FL_Unknown: + for (unsigned i = 0; i != NumVals; ++i) { + Value *V = Vals[i]; + if (!V) continue; + if (isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) || + (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal())) { + isFunctionLocal = true; + break; + } + } + break; + case FL_No: + isFunctionLocal = false; + break; + case FL_Yes: + isFunctionLocal = true; + break; + } + // Coallocate space for the node and Operands together, then placement new. void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand)); N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal); - + // InsertPoint will have been set by the FindNodeOrInsertPos call. pImpl->MDNodeSet.InsertNode(N, InsertPoint); } return N; } +MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) { + return getMDNode(Context, Vals, NumVals, FL_Unknown); +} + +MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, Value*const* Vals, + unsigned NumVals, bool isFunctionLocal) { + return getMDNode(Context, Vals, NumVals, isFunctionLocal ? FL_Yes : FL_No); +} + /// getOperand - Return specified operand. Value *MDNode::getOperand(unsigned i) const { return *getOperandPtr(const_cast<MDNode*>(this), i); @@ -163,7 +256,7 @@ void MDNode::Profile(FoldingSetNodeID &ID) const { // Replace value from this node's operand list. void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { Value *From = *Op; - + if (From == To) return; @@ -173,7 +266,7 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { // If this node is already not being uniqued (because one of the operands // already went to null), then there is nothing else to do here. if (isNotUniqued()) return; - + LLVMContextImpl *pImpl = getType()->getContext().pImpl; // Remove "this" from the context map. FoldingSet doesn't have to reprofile @@ -187,7 +280,7 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { setIsNotUniqued(); return; } - + // Now that the node is out of the folding set, get ready to reinsert it. // First, check to see if another node with the same operands already exists // in the set. If it doesn't exist, this returns the position to insert it. @@ -210,21 +303,40 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { //===----------------------------------------------------------------------===// // NamedMDNode implementation. // -static SmallVector<TrackingVH<MetadataBase>, 4> &getNMDOps(void *Operands) { - return *(SmallVector<TrackingVH<MetadataBase>, 4>*)Operands; + +namespace llvm { +// SymbolTableListTraits specialization for MDSymbolTable. +void ilist_traits<NamedMDNode> +::addNodeToList(NamedMDNode *N) { + assert(N->getParent() == 0 && "Value already in a container!!"); + Module *Owner = getListOwner(); + N->setParent(Owner); + MDSymbolTable &ST = Owner->getMDSymbolTable(); + ST.insert(N->getName(), N); +} + +void ilist_traits<NamedMDNode>::removeNodeFromList(NamedMDNode *N) { + N->setParent(0); + Module *Owner = getListOwner(); + MDSymbolTable &ST = Owner->getMDSymbolTable(); + ST.remove(N->getName()); +} +} + +static SmallVector<WeakVH, 4> &getNMDOps(void *Operands) { + return *(SmallVector<WeakVH, 4>*)Operands; } NamedMDNode::NamedMDNode(LLVMContext &C, const Twine &N, - MetadataBase *const *MDs, + MDNode *const *MDs, unsigned NumMDs, Module *ParentModule) - : MetadataBase(Type::getMetadataTy(C), Value::NamedMDNodeVal), Parent(0) { + : Value(Type::getMetadataTy(C), Value::NamedMDNodeVal), Parent(0) { setName(N); - - Operands = new SmallVector<TrackingVH<MetadataBase>, 4>(); - - SmallVector<TrackingVH<MetadataBase>, 4> &Node = getNMDOps(Operands); + Operands = new SmallVector<WeakVH, 4>(); + + SmallVector<WeakVH, 4> &Node = getNMDOps(Operands); for (unsigned i = 0; i != NumMDs; ++i) - Node.push_back(TrackingVH<MetadataBase>(MDs[i])); + Node.push_back(WeakVH(MDs[i])); if (ParentModule) ParentModule->getNamedMDList().push_back(this); @@ -232,9 +344,9 @@ NamedMDNode::NamedMDNode(LLVMContext &C, const Twine &N, NamedMDNode *NamedMDNode::Create(const NamedMDNode *NMD, Module *M) { assert(NMD && "Invalid source NamedMDNode!"); - SmallVector<MetadataBase *, 4> Elems; + SmallVector<MDNode *, 4> Elems; Elems.reserve(NMD->getNumOperands()); - + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) Elems.push_back(NMD->getOperand(i)); return new NamedMDNode(NMD->getContext(), NMD->getName().data(), @@ -252,14 +364,14 @@ unsigned NamedMDNode::getNumOperands() const { } /// getOperand - Return specified operand. -MetadataBase *NamedMDNode::getOperand(unsigned i) const { +MDNode *NamedMDNode::getOperand(unsigned i) const { assert(i < getNumOperands() && "Invalid Operand number!"); - return getNMDOps(Operands)[i]; + return dyn_cast_or_null<MDNode>(getNMDOps(Operands)[i]); } /// addOperand - Add metadata Operand. -void NamedMDNode::addOperand(MetadataBase *M) { - getNMDOps(Operands).push_back(TrackingVH<MetadataBase>(M)); +void NamedMDNode::addOperand(MDNode *M) { + getNMDOps(Operands).push_back(WeakVH(M)); } /// eraseFromParent - Drop all references and remove the node from parent @@ -273,6 +385,26 @@ void NamedMDNode::dropAllReferences() { getNMDOps(Operands).clear(); } +/// setName - Set the name of this named metadata. +void NamedMDNode::setName(const Twine &NewName) { + assert (!NewName.isTriviallyEmpty() && "Invalid named metadata name!"); + + SmallString<256> NameData; + StringRef NameRef = NewName.toStringRef(NameData); + + // Name isn't changing? + if (getName() == NameRef) + return; + + Name = NameRef.str(); + if (Parent) + Parent->getMDSymbolTable().insert(NameRef, this); +} + +/// getName - Return a constant reference to this named metadata's name. +StringRef NamedMDNode::getName() const { + return StringRef(Name); +} //===----------------------------------------------------------------------===// // LLVMContext MDKind naming implementation. @@ -299,9 +431,9 @@ static bool isValidName(StringRef MDName) { /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. unsigned LLVMContext::getMDKindID(StringRef Name) const { assert(isValidName(Name) && "Invalid MDNode name"); - + unsigned &Entry = pImpl->CustomMDKindNames[Name]; - + // If this is new, assign it its ID. if (Entry == 0) Entry = pImpl->CustomMDKindNames.size(); return Entry; @@ -313,7 +445,7 @@ void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const { Names.resize(pImpl->CustomMDKindNames.size()+1); Names[0] = ""; for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(), - E = pImpl->CustomMDKindNames.end(); I != E; ++I) + E = pImpl->CustomMDKindNames.end(); I != E; ++I) // MD Handlers are numbered from 1. Names[I->second] = I->first(); } @@ -336,7 +468,7 @@ MDNode *Instruction::getMetadataImpl(const char *Kind) const { /// Node is null. void Instruction::setMetadata(unsigned KindID, MDNode *Node) { if (Node == 0 && !hasMetadata()) return; - + // Handle the case when we're adding/updating metadata on an instruction. if (Node) { LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this]; @@ -351,24 +483,24 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) { return; } } - + // No replacement, just add it to the list. Info.push_back(std::make_pair(KindID, Node)); return; } - + // Otherwise, we're removing metadata from an instruction. assert(hasMetadata() && getContext().pImpl->MetadataStore.count(this) && "HasMetadata bit out of date!"); LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this]; - + // Common case is removing the only entry. if (Info.size() == 1 && Info[0].first == KindID) { getContext().pImpl->MetadataStore.erase(this); setHasMetadata(false); return; } - + // Handle replacement of an existing value. for (unsigned i = 0, e = Info.size(); i != e; ++i) if (Info[i].first == KindID) { @@ -383,7 +515,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) { MDNode *Instruction::getMetadataImpl(unsigned KindID) const { LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this]; assert(hasMetadata() && !Info.empty() && "Shouldn't have called this"); - + for (LLVMContextImpl::MDMapTy::iterator I = Info.begin(), E = Info.end(); I != E; ++I) if (I->first == KindID) @@ -398,10 +530,10 @@ void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned, const LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore.find(this)->second; assert(!Info.empty() && "Shouldn't have called this"); - + Result.clear(); Result.append(Info.begin(), Info.end()); - + // Sort the resulting array so it is stable. if (Result.size() > 1) array_pod_sort(Result.begin(), Result.end()); diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index a7f503b..503e708 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -59,6 +59,7 @@ Module::Module(StringRef MID, LLVMContext& C) : Context(C), ModuleID(MID), DataLayout("") { ValSymTab = new ValueSymbolTable(); TypeSymTab = new TypeSymbolTable(); + NamedMDSymTab = new MDSymbolTable(); } Module::~Module() { @@ -70,15 +71,17 @@ Module::~Module() { NamedMDList.clear(); delete ValSymTab; delete TypeSymTab; + delete NamedMDSymTab; } /// Target endian information... Module::Endianness Module::getEndianness() const { - std::string temp = DataLayout; + StringRef temp = DataLayout; Module::Endianness ret = AnyEndianness; while (!temp.empty()) { - std::string token = getToken(temp, "-"); + StringRef token = DataLayout; + tie(token, temp) = getToken(DataLayout, "-"); if (token[0] == 'e') { ret = LittleEndian; @@ -92,15 +95,17 @@ Module::Endianness Module::getEndianness() const { /// Target Pointer Size information... Module::PointerSize Module::getPointerSize() const { - std::string temp = DataLayout; + StringRef temp = DataLayout; Module::PointerSize ret = AnyPointerSize; while (!temp.empty()) { - std::string token = getToken(temp, "-"); - char signal = getToken(token, ":")[0]; + StringRef token, signalToken; + tie(token, temp) = getToken(temp, "-"); + tie(signalToken, token) = getToken(token, ":"); - if (signal == 'p') { - int size = atoi(getToken(token, ":").c_str()); + if (signalToken[0] == 'p') { + int size = 0; + getToken(token, ":").first.getAsInteger(10, size); if (size == 32) ret = Pointer32; else if (size == 64) @@ -307,15 +312,14 @@ GlobalAlias *Module::getNamedAlias(StringRef Name) const { /// specified name. This method returns null if a NamedMDNode with the //// specified name is not found. NamedMDNode *Module::getNamedMetadata(StringRef Name) const { - return dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name)); + return NamedMDSymTab->lookup(Name); } /// getOrInsertNamedMetadata - Return the first named MDNode in the module /// with the specified name. This method returns a new NamedMDNode if a /// NamedMDNode with the specified name is not found. NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) { - NamedMDNode *NMD = - dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name)); + NamedMDNode *NMD = NamedMDSymTab->lookup(Name); if (!NMD) NMD = NamedMDNode::Create(getContext(), Name, NULL, 0, this); return NMD; diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index 6bea7a8..39da8fb 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -19,6 +19,7 @@ #include "llvm/ModuleProvider.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Atomic.h" @@ -51,7 +52,7 @@ bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const { // dumpPassStructure - Implement the -debug-passes=Structure option void Pass::dumpPassStructure(unsigned Offset) { - errs().indent(Offset*2) << getPassName() << "\n"; + dbgs().indent(Offset*2) << getPassName() << "\n"; } /// getPassName - Return a nice clean name for a pass. This usually @@ -95,7 +96,7 @@ void Pass::print(raw_ostream &O,const Module*) const { // dump - call print(cerr); void Pass::dump() const { - print(errs(), 0); + print(dbgs(), 0); } //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index d688385..b37b2ae 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -15,6 +15,7 @@ #include "llvm/PassManagers.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Module.h" #include "llvm/ModuleProvider.h" @@ -132,7 +133,7 @@ public: // Print passes managed by this manager void dumpPassStructure(unsigned Offset) { - llvm::errs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n"; + llvm::dbgs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { BasicBlockPass *BP = getContainedPass(Index); BP->dumpPassStructure(Offset + 1); @@ -272,7 +273,7 @@ public: // Print passes managed by this manager void dumpPassStructure(unsigned Offset) { - llvm::errs() << std::string(Offset*2, ' ') << "ModulePass Manager\n"; + llvm::dbgs() << std::string(Offset*2, ' ') << "ModulePass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { ModulePass *MP = getContainedPass(Index); MP->dumpPassStructure(Offset + 1); @@ -595,11 +596,11 @@ void PMTopLevelManager::dumpArguments() const { if (PassDebugging < Arguments) return; - errs() << "Pass Arguments: "; + dbgs() << "Pass Arguments: "; for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(), E = PassManagers.end(); I != E; ++I) (*I)->dumpPassArguments(); - errs() << "\n"; + dbgs() << "\n"; } void PMTopLevelManager::initializeAllAnalysisInfo() { @@ -718,8 +719,8 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { // Remove this analysis if (PassDebugging >= Details) { Pass *S = Info->second; - errs() << " -- '" << P->getPassName() << "' is not preserving '"; - errs() << S->getPassName() << "'\n"; + dbgs() << " -- '" << P->getPassName() << "' is not preserving '"; + dbgs() << S->getPassName() << "'\n"; } AvailableAnalysis.erase(Info); } @@ -742,8 +743,8 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { // Remove this analysis if (PassDebugging >= Details) { Pass *S = Info->second; - errs() << " -- '" << P->getPassName() << "' is not preserving '"; - errs() << S->getPassName() << "'\n"; + dbgs() << " -- '" << P->getPassName() << "' is not preserving '"; + dbgs() << S->getPassName() << "'\n"; } InheritedAnalysis[Index]->erase(Info); } @@ -764,9 +765,9 @@ void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg, TPM->collectLastUses(DeadPasses, P); if (PassDebugging >= Details && !DeadPasses.empty()) { - errs() << " -*- '" << P->getPassName(); - errs() << "' is the last user of following pass instances."; - errs() << " Free these instances\n"; + dbgs() << " -*- '" << P->getPassName(); + dbgs() << "' is the last user of following pass instances."; + dbgs() << " Free these instances\n"; } for (SmallVector<Pass *, 12>::iterator I = DeadPasses.begin(), @@ -959,7 +960,7 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{ for (SmallVector<Pass *, 12>::iterator I = LUses.begin(), E = LUses.end(); I != E; ++I) { - llvm::errs() << "--" << std::string(Offset*2, ' '); + llvm::dbgs() << "--" << std::string(Offset*2, ' '); (*I)->dumpPassStructure(0); } } @@ -972,7 +973,7 @@ void PMDataManager::dumpPassArguments() const { else if (const PassInfo *PI = (*I)->getPassInfo()) if (!PI->isAnalysisGroup()) - errs() << " -" << PI->getPassArgument(); + dbgs() << " -" << PI->getPassArgument(); } } @@ -981,35 +982,35 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1, StringRef Msg) { if (PassDebugging < Executions) return; - errs() << (void*)this << std::string(getDepth()*2+1, ' '); + dbgs() << (void*)this << std::string(getDepth()*2+1, ' '); switch (S1) { case EXECUTION_MSG: - errs() << "Executing Pass '" << P->getPassName(); + dbgs() << "Executing Pass '" << P->getPassName(); break; case MODIFICATION_MSG: - errs() << "Made Modification '" << P->getPassName(); + dbgs() << "Made Modification '" << P->getPassName(); break; case FREEING_MSG: - errs() << " Freeing Pass '" << P->getPassName(); + dbgs() << " Freeing Pass '" << P->getPassName(); break; default: break; } switch (S2) { case ON_BASICBLOCK_MSG: - errs() << "' on BasicBlock '" << Msg << "'...\n"; + dbgs() << "' on BasicBlock '" << Msg << "'...\n"; break; case ON_FUNCTION_MSG: - errs() << "' on Function '" << Msg << "'...\n"; + dbgs() << "' on Function '" << Msg << "'...\n"; break; case ON_MODULE_MSG: - errs() << "' on Module '" << Msg << "'...\n"; + dbgs() << "' on Module '" << Msg << "'...\n"; break; case ON_LOOP_MSG: - errs() << "' on Loop '" << Msg << "'...\n"; + dbgs() << "' on Loop '" << Msg << "'...\n"; break; case ON_CG_MSG: - errs() << "' on Call Graph Nodes '" << Msg << "'...\n"; + dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n"; break; default: break; @@ -1039,12 +1040,12 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P, assert(PassDebugging >= Details); if (Set.empty()) return; - errs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; + dbgs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; for (unsigned i = 0; i != Set.size(); ++i) { - if (i) errs() << ','; - errs() << ' ' << Set[i]->getPassName(); + if (i) dbgs() << ','; + dbgs() << ' ' << Set[i]->getPassName(); } - errs() << '\n'; + dbgs() << '\n'; } /// Add RequiredPass into list of lower level passes required by pass P. @@ -1067,8 +1068,8 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { // checks whether any lower level manager will be able to provide this // analysis info on demand or not. #ifndef NDEBUG - errs() << "Unable to schedule '" << RequiredPass->getPassName(); - errs() << "' required by '" << P->getPassName() << "'\n"; + dbgs() << "Unable to schedule '" << RequiredPass->getPassName(); + dbgs() << "' required by '" << P->getPassName() << "'\n"; #endif llvm_unreachable("Unable to schedule pass"); } @@ -1300,7 +1301,7 @@ bool FunctionPassManagerImpl::run(Function &F) { char FPPassManager::ID = 0; /// Print passes managed by this manager void FPPassManager::dumpPassStructure(unsigned Offset) { - llvm::errs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n"; + llvm::dbgs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { FunctionPass *FP = getContainedPass(Index); FP->dumpPassStructure(Offset + 1); @@ -1698,19 +1699,19 @@ LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) { return wrap(new FunctionPassManager(unwrap(P))); } -int LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) { +LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) { return unwrap<PassManager>(PM)->run(*unwrap(M)); } -int LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) { +LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) { return unwrap<FunctionPassManager>(FPM)->doInitialization(); } -int LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) { +LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) { return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F)); } -int LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) { +LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) { return unwrap<FunctionPassManager>(FPM)->doFinalization(); } diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp index 3d4f19d..f0f6e7a 100644 --- a/lib/VMCore/PrintModulePass.cpp +++ b/lib/VMCore/PrintModulePass.cpp @@ -16,6 +16,7 @@ #include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -26,7 +27,7 @@ namespace { bool DeleteStream; // Delete the ostream in our dtor? public: static char ID; - PrintModulePass() : ModulePass(&ID), Out(&errs()), + PrintModulePass() : ModulePass(&ID), Out(&dbgs()), DeleteStream(false) {} PrintModulePass(raw_ostream *o, bool DS) : ModulePass(&ID), Out(o), DeleteStream(DS) {} @@ -51,7 +52,7 @@ namespace { bool DeleteStream; // Delete the ostream in our dtor? public: static char ID; - PrintFunctionPass() : FunctionPass(&ID), Banner(""), Out(&errs()), + PrintFunctionPass() : FunctionPass(&ID), Banner(""), Out(&dbgs()), DeleteStream(false) {} PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS) : FunctionPass(&ID), Banner(B), Out(o), DeleteStream(DS) {} diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index fd46aa1..044de4f 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -124,6 +124,11 @@ const Type *Type::getScalarType() const { return this; } +/// isInteger - Return true if this is an IntegerType of the specified width. +bool Type::isInteger(unsigned Bitwidth) const { + return isInteger() && cast<IntegerType>(this)->getBitWidth() == Bitwidth; +} + /// isIntOrIntVector - Return true if this is an integer type or a vector of /// integer types. /// @@ -280,7 +285,7 @@ std::string Type::getDescription() const { bool StructType::indexValid(const Value *V) const { // Structure indexes require 32-bit integer constants. - if (V->getType() == Type::getInt32Ty(V->getContext())) + if (V->getType()->isInteger(32)) if (const ConstantInt *CU = dyn_cast<ConstantInt>(V)) return indexValid(CU->getZExtValue()); return false; @@ -487,7 +492,7 @@ PointerType::PointerType(const Type *E, unsigned AddrSpace) OpaqueType::OpaqueType(LLVMContext &C) : DerivedType(C, OpaqueTyID) { setAbstract(true); #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *this << "\n"); + DEBUG(dbgs() << "Derived new type: " << *this << "\n"); #endif } @@ -782,7 +787,7 @@ const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) { pImpl->IntegerTypes.add(IVT, ITy); } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *ITy << "\n"); + DEBUG(dbgs() << "Derived new type: " << *ITy << "\n"); #endif return ITy; } @@ -825,7 +830,7 @@ FunctionType *FunctionType::get(const Type *ReturnType, } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << FT << "\n"); + DEBUG(dbgs() << "Derived new type: " << FT << "\n"); #endif return FT; } @@ -846,7 +851,7 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) { pImpl->ArrayTypes.add(AVT, AT = new ArrayType(ElementType, NumElements)); } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *AT << "\n"); + DEBUG(dbgs() << "Derived new type: " << *AT << "\n"); #endif return AT; } @@ -870,7 +875,7 @@ VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) { pImpl->VectorTypes.add(PVT, PT = new VectorType(ElementType, NumElements)); } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *PT << "\n"); + DEBUG(dbgs() << "Derived new type: " << *PT << "\n"); #endif return PT; } @@ -902,7 +907,7 @@ StructType *StructType::get(LLVMContext &Context, pImpl->StructTypes.add(STV, ST); } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *ST << "\n"); + DEBUG(dbgs() << "Derived new type: " << *ST << "\n"); #endif return ST; } @@ -946,7 +951,7 @@ PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) { pImpl->PointerTypes.add(PVT, PT = new PointerType(ValueType, AddressSpace)); } #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "Derived new type: " << *PT << "\n"); + DEBUG(dbgs() << "Derived new type: " << *PT << "\n"); #endif return PT; } @@ -1009,13 +1014,13 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const { AbstractTypeUsers.erase(AbstractTypeUsers.begin()+i); #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << " remAbstractTypeUser[" << (void*)this << ", " + DEBUG(dbgs() << " remAbstractTypeUser[" << (void*)this << ", " << *this << "][" << i << "] User = " << U << "\n"); #endif if (AbstractTypeUsers.empty() && getRefCount() == 0 && isAbstract()) { #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "DELETEing unused abstract type: <" << *this + DEBUG(dbgs() << "DELETEing unused abstract type: <" << *this << ">[" << (void*)this << "]" << "\n"); #endif @@ -1041,7 +1046,7 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) { pImpl->AbstractTypeDescriptions.clear(); #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "REFINING abstract type [" << (void*)this << " " + DEBUG(dbgs() << "REFINING abstract type [" << (void*)this << " " << *this << "] to [" << (void*)NewType << " " << *NewType << "]!\n"); #endif @@ -1078,7 +1083,7 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) { unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize; #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << " REFINING user " << OldSize-1 << "[" << (void*)User + DEBUG(dbgs() << " REFINING user " << OldSize-1 << "[" << (void*)User << "] of abstract type [" << (void*)this << " " << *this << "] to [" << (void*)NewTy.get() << " " << *NewTy << "]!\n"); @@ -1109,7 +1114,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) { // void DerivedType::notifyUsesThatTypeBecameConcrete() { #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n"); + DEBUG(dbgs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n"); #endif unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize; diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp index 0d0cdf5..b4daf0f 100644 --- a/lib/VMCore/TypeSymbolTable.cpp +++ b/lib/VMCore/TypeSymbolTable.cpp @@ -15,6 +15,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -58,7 +59,7 @@ Type* TypeSymbolTable::remove(iterator Entry) { #if DEBUG_SYMBOL_TABLE dump(); - errs() << " Removing Value: " << Result->getDescription() << "\n"; + dbgs() << " Removing Value: " << Result->getDescription() << "\n"; #endif tmap.erase(Entry); @@ -67,7 +68,7 @@ Type* TypeSymbolTable::remove(iterator Entry) { // list... if (Result->isAbstract()) { #if DEBUG_ABSTYPE - errs() << "Removing abstract type from symtab" + dbgs() << "Removing abstract type from symtab" << Result->getDescription() << "\n"; #endif @@ -87,7 +88,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) { #if DEBUG_SYMBOL_TABLE dump(); - errs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n"; + dbgs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n"; #endif } else { // If there is a name conflict... @@ -99,7 +100,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) { #if DEBUG_SYMBOL_TABLE dump(); - errs() << " Inserting type: " << UniqueName << ": " + dbgs() << " Inserting type: " << UniqueName << ": " << T->getDescription() << "\n"; #endif @@ -111,7 +112,7 @@ void TypeSymbolTable::insert(StringRef Name, const Type* T) { if (T->isAbstract()) { cast<DerivedType>(T)->addAbstractTypeUser(this); #if DEBUG_ABSTYPE - errs() << "Added abstract type to ST: " << T->getDescription() << "\n"; + dbgs() << "Added abstract type to ST: " << T->getDescription() << "\n"; #endif } } @@ -127,14 +128,14 @@ void TypeSymbolTable::refineAbstractType(const DerivedType *OldType, for (iterator I = begin(), E = end(); I != E; ++I) { if (I->second == (Type*)OldType) { // FIXME when Types aren't const. #if DEBUG_ABSTYPE - errs() << "Removing type " << OldType->getDescription() << "\n"; + dbgs() << "Removing type " << OldType->getDescription() << "\n"; #endif OldType->removeAbstractTypeUser(this); I->second = (Type*)NewType; // TODO FIXME when types aren't const if (NewType->isAbstract()) { #if DEBUG_ABSTYPE - errs() << "Added type " << NewType->getDescription() << "\n"; + dbgs() << "Added type " << NewType->getDescription() << "\n"; #endif cast<DerivedType>(NewType)->addAbstractTypeUser(this); } @@ -154,13 +155,13 @@ void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) { } static void DumpTypes(const std::pair<const std::string, const Type*>& T ) { - errs() << " '" << T.first << "' = "; + dbgs() << " '" << T.first << "' = "; T.second->dump(); - errs() << "\n"; + dbgs() << "\n"; } void TypeSymbolTable::dump() const { - errs() << "TypeSymbolPlane: "; + dbgs() << "TypeSymbolPlane: "; for_each(tmap.begin(), tmap.end(), DumpTypes); } diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h index e7950bd..93a801b 100644 --- a/lib/VMCore/TypesContext.h +++ b/lib/VMCore/TypesContext.h @@ -302,7 +302,7 @@ public: void RefineAbstractType(TypeClass *Ty, const DerivedType *OldType, const Type *NewType) { #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "RefineAbstractType(" << (void*)OldType << "[" << *OldType + DEBUG(dbgs() << "RefineAbstractType(" << (void*)OldType << "[" << *OldType << "], " << (void*)NewType << " [" << *NewType << "])\n"); #endif @@ -408,11 +408,11 @@ public: void print(const char *Arg) const { #ifdef DEBUG_MERGE_TYPES - DEBUG(errs() << "TypeMap<>::" << Arg << " table contents:\n"); + DEBUG(dbgs() << "TypeMap<>::" << Arg << " table contents:\n"); unsigned i = 0; for (typename std::map<ValType, PATypeHolder>::const_iterator I = Map.begin(), E = Map.end(); I != E; ++I) - DEBUG(errs() << " " << (++i) << ". " << (void*)I->second.get() << " " + DEBUG(dbgs() << " " << (++i) << ". " << (void*)I->second.get() << " " << *I->second.get() << "\n"); #endif } diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index fe1219f..40679bf 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -44,14 +44,12 @@ Value::Value(const Type *ty, unsigned scid) SubclassOptionalData(0), SubclassData(0), VTy(checkType(ty)), UseList(0), Name(0) { if (isa<CallInst>(this) || isa<InvokeInst>(this)) - assert((VTy->isFirstClassType() || - VTy == Type::getVoidTy(ty->getContext()) || + assert((VTy->isFirstClassType() || VTy->isVoidTy() || isa<OpaqueType>(ty) || VTy->getTypeID() == Type::StructTyID) && "invalid CallInst type!"); else if (!isa<Constant>(this) && !isa<BasicBlock>(this)) - assert((VTy->isFirstClassType() || - VTy == Type::getVoidTy(ty->getContext()) || - isa<OpaqueType>(ty)) && + assert((VTy->isFirstClassType() || VTy->isVoidTy() || + isa<OpaqueType>(ty)) && "Cannot create non-first-class values except for constants!"); } @@ -68,9 +66,9 @@ Value::~Value() { // a <badref> // if (!use_empty()) { - errs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n"; + dbgs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n"; for (use_iterator I = use_begin(), E = use_end(); I != E; ++I) - errs() << "Use still stuck around after Def is destroyed:" + dbgs() << "Use still stuck around after Def is destroyed:" << **I << "\n"; } #endif @@ -172,17 +170,13 @@ void Value::setName(const Twine &NewName) { return; SmallString<256> NameData; - NewName.toVector(NameData); - - const char *NameStr = NameData.data(); - unsigned NameLen = NameData.size(); + StringRef NameRef = NewName.toStringRef(NameData); // Name isn't changing? - if (getName() == StringRef(NameStr, NameLen)) + if (getName() == NameRef) return; - assert(getType() != Type::getVoidTy(getContext()) && - "Cannot assign a name to void values!"); + assert(!getType()->isVoidTy() && "Cannot assign a name to void values!"); // Get the symbol table to update for this object. ValueSymbolTable *ST; @@ -190,7 +184,7 @@ void Value::setName(const Twine &NewName) { return; // Cannot set a name on this value (e.g. constant). if (!ST) { // No symbol table to update? Just do the change. - if (NameLen == 0) { + if (NameRef.empty()) { // Free the name for this value. Name->Destroy(); Name = 0; @@ -204,7 +198,7 @@ void Value::setName(const Twine &NewName) { // then reallocated. // Create the new name. - Name = ValueName::Create(NameStr, NameStr+NameLen); + Name = ValueName::Create(NameRef.begin(), NameRef.end()); Name->setValue(this); return; } @@ -217,12 +211,12 @@ void Value::setName(const Twine &NewName) { Name->Destroy(); Name = 0; - if (NameLen == 0) + if (NameRef.empty()) return; } // Name is changing to something new. - Name = ST->createValueName(StringRef(NameStr, NameLen), this); + Name = ST->createValueName(NameRef, this); } @@ -522,7 +516,7 @@ void ValueHandleBase::ValueIsDeleted(Value *V) { // All callbacks, weak references, and assertingVHs should be dropped by now. if (V->HasValueHandle) { #ifndef NDEBUG // Only in +Asserts mode... - errs() << "While deleting: " << *V->getType() << " %" << V->getNameStr() + dbgs() << "While deleting: " << *V->getType() << " %" << V->getNameStr() << "\n"; if (pImpl->ValueHandles[V]->getKind() == Assert) llvm_unreachable("An asserting value handle still pointed to this" diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp index 9d39a50..d30a9d6 100644 --- a/lib/VMCore/ValueSymbolTable.cpp +++ b/lib/VMCore/ValueSymbolTable.cpp @@ -24,7 +24,7 @@ using namespace llvm; ValueSymbolTable::~ValueSymbolTable() { #ifndef NDEBUG // Only do this in -g mode... for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI) - errs() << "Value still in symbol table! Type = '" + dbgs() << "Value still in symbol table! Type = '" << VI->getValue()->getType()->getDescription() << "' Name = '" << VI->getKeyData() << "'\n"; assert(vmap.empty() && "Values remain in symbol table!"); @@ -38,7 +38,7 @@ void ValueSymbolTable::reinsertValue(Value* V) { // Try inserting the name, assuming it won't conflict. if (vmap.insert(V->Name)) { - //DEBUG(errs() << " Inserted value: " << V->Name << ": " << *V << "\n"); + //DEBUG(dbgs() << " Inserted value: " << V->Name << ": " << *V << "\n"); return; } @@ -62,14 +62,14 @@ void ValueSymbolTable::reinsertValue(Value* V) { // Newly inserted name. Success! NewName.setValue(V); V->Name = &NewName; - //DEBUG(errs() << " Inserted value: " << UniqueName << ": " << *V << "\n"); + //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n"); return; } } } void ValueSymbolTable::removeValueName(ValueName *V) { - //DEBUG(errs() << " Removing Value: " << V->getKeyData() << "\n"); + //DEBUG(dbgs() << " Removing Value: " << V->getKeyData() << "\n"); // Remove the value from the symbol table. vmap.remove(V); } @@ -82,7 +82,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { ValueName &Entry = vmap.GetOrCreateValue(Name); if (Entry.getValue() == 0) { Entry.setValue(V); - //DEBUG(errs() << " Inserted value: " << Entry.getKeyData() << ": " + //DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": " // << *V << "\n"); return &Entry; } @@ -102,7 +102,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { if (NewName.getValue() == 0) { // Newly inserted name. Success! NewName.setValue(V); - //DEBUG(errs() << " Inserted value: " << UniqueName << ": " << *V << "\n"); + //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n"); return &NewName; } } @@ -112,10 +112,12 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { // dump - print out the symbol table // void ValueSymbolTable::dump() const { - //DEBUG(errs() << "ValueSymbolTable:\n"); + //DEBUG(dbgs() << "ValueSymbolTable:\n"); for (const_iterator I = begin(), E = end(); I != E; ++I) { - //DEBUG(errs() << " '" << I->getKeyData() << "' = "); + //DEBUG(dbgs() << " '" << I->getKeyData() << "' = "); I->getValue()->dump(); - //DEBUG(errs() << "\n"); + //DEBUG(dbgs() << "\n"); } } + +MDSymbolTable::~MDSymbolTable() { } diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 30528bf..ec475e4 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -56,6 +56,7 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/InstVisitor.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -85,9 +86,9 @@ namespace { // Anonymous namespace for class for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { if (I->empty() || !I->back().isTerminator()) { - errs() << "Basic Block does not have terminator!\n"; - WriteAsOperand(errs(), I, true); - errs() << "\n"; + dbgs() << "Basic Block does not have terminator!\n"; + WriteAsOperand(dbgs(), I, true); + dbgs() << "\n"; Broken = true; } } @@ -262,12 +263,12 @@ namespace { default: llvm_unreachable("Unknown action"); case AbortProcessAction: MessagesStr << "compilation aborted!\n"; - errs() << MessagesStr.str(); + dbgs() << MessagesStr.str(); // Client should choose different reaction if abort is not desired abort(); case PrintMessageAction: MessagesStr << "verification continues.\n"; - errs() << MessagesStr.str(); + dbgs() << MessagesStr.str(); return false; case ReturnStatusAction: MessagesStr << "compilation terminated.\n"; @@ -1589,9 +1590,10 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { default: break; case Intrinsic::dbg_declare: // llvm.dbg.declare - if (Constant *C = dyn_cast<Constant>(CI.getOperand(1))) - Assert1(C && !isa<ConstantPointerNull>(C), - "invalid llvm.dbg.declare intrinsic call", &CI); + if (MDNode *MD = dyn_cast<MDNode>(CI.getOperand(1))) + if (Constant *C = dyn_cast<Constant>(MD->getOperand(0))) + Assert1(C && !isa<ConstantPointerNull>(C), + "invalid llvm.dbg.declare intrinsic call", &CI); break; case Intrinsic::memcpy: case Intrinsic::memmove: |