diff options
Diffstat (limited to 'lib')
54 files changed, 1722 insertions, 1347 deletions
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 1f8053a..8288e96 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -194,11 +194,10 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const Type *ITy = GetCompareTy(LHS); // icmp X, X -> true/false - if (LHS == RHS) + // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false + // because X could be 0. + if (LHS == RHS || isa<UndefValue>(RHS)) return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); - - if (isa<UndefValue>(RHS)) // X icmp undef -> undef - return UndefValue::get(ITy); // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value // addresses never equal each other! We already know that Op0 != Op1. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index f5f10c8..e27da96 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/STLExtras.h" @@ -137,6 +138,10 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, if (IP != BlockBegin) { --IP; for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && IP->getOperand(1) == RHS) return IP; @@ -505,6 +510,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, if (IP != BlockBegin) { --IP; for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; if (IP->getOpcode() == Instruction::GetElementPtr && IP->getOperand(0) == V && IP->getOperand(1) == Idx) return IP; @@ -1258,8 +1267,19 @@ Value *SCEVExpander::expand(const SCEV *S) { L = L->getParentLoop()) if (S->isLoopInvariant(L)) { if (!L) break; - if (BasicBlock *Preheader = L->getLoopPreheader()) + if (BasicBlock *Preheader = L->getLoopPreheader()) { InsertPt = Preheader->getTerminator(); + BasicBlock::iterator IP = InsertPt; + // Back past any debug info instructions. Sometimes we inserted + // something earlier before debug info but after any real instructions. + // This should behave the same as if debug info was not present. + while (IP != Preheader->begin()) { + --IP; + if (!isa<DbgInfoIntrinsic>(IP)) + break; + InsertPt = IP; + } + } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 09344a3..92cbb7c 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -23,6 +23,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include <cstring> using namespace llvm; @@ -1436,3 +1437,131 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // The array isn't null terminated, but maybe this is a memcpy, not a strcpy. return true; } + +// These next two are very similar to the above, but also look through PHI +// nodes. +// TODO: See if we can integrate these two together. + +/// GetStringLengthH - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { + // Look through noop bitcast instructions. + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + return GetStringLengthH(BCI->getOperand(0), PHIs); + + // If this is a PHI node, there are two cases: either we have already seen it + // or we haven't. + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (!PHIs.insert(PN)) + return ~0ULL; // already in the set. + + // If it was new, see if all the input strings are the same length. + uint64_t LenSoFar = ~0ULL; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); + if (Len == 0) return 0; // Unknown length -> unknown. + + if (Len == ~0ULL) continue; + + if (Len != LenSoFar && LenSoFar != ~0ULL) + return 0; // Disagree -> unknown. + LenSoFar = Len; + } + + // Success, all agree. + return LenSoFar; + } + + // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) + if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); + if (Len1 == 0) return 0; + uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); + if (Len2 == 0) return 0; + if (Len1 == ~0ULL) return Len2; + if (Len2 == ~0ULL) return Len1; + if (Len1 != Len2) return 0; + return Len1; + } + + // If the value is not a GEP instruction nor a constant expression with a + // GEP instruction, then return unknown. + User *GEP = 0; + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { + GEP = GEPI; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (CE->getOpcode() != Instruction::GetElementPtr) + return 0; + GEP = CE; + } else { + return 0; + } + + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return 0; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) { + if (!Idx->isZero()) + return 0; + } else + return 0; + + // If the second index isn't a ConstantInt, then this is a variable index + // into the array. If this occurs, we can't say anything meaningful about + // the string. + uint64_t StartIdx = 0; + if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) + StartIdx = CI->getZExtValue(); + else + return 0; + + // The GEP instruction, constant or instruction, must reference a global + // variable that is a constant and is initialized. The referenced constant + // initializer is the array that we'll use for optimization. + GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); + if (!GV || !GV->isConstant() || !GV->hasInitializer() || + GV->mayBeOverridden()) + return 0; + Constant *GlobalInit = GV->getInitializer(); + + // Handle the ConstantAggregateZero case, which is a degenerate case. The + // initializer is constant zero so the length of the string must be zero. + if (isa<ConstantAggregateZero>(GlobalInit)) + return 1; // Len = 0 offset by 1. + + // Must be a Constant Array + ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); + if (!Array || !Array->getType()->getElementType()->isIntegerTy(8)) + return false; + + // Get the number of elements in the array + uint64_t NumElts = Array->getType()->getNumElements(); + + // Traverse the constant array from StartIdx (derived above) which is + // the place the GEP refers to in the array. + for (unsigned i = StartIdx; i != NumElts; ++i) { + Constant *Elt = Array->getOperand(i); + ConstantInt *CI = dyn_cast<ConstantInt>(Elt); + if (!CI) // This array isn't suitable, non-int initializer. + return 0; + if (CI->isZero()) + return i-StartIdx+1; // We found end of string, success! + } + + return 0; // The array isn't null terminated, conservatively return 'unknown'. +} + +/// GetStringLength - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +uint64_t llvm::GetStringLength(Value *V) { + if (!V->getType()->isPointerTy()) return 0; + + SmallPtrSet<PHINode*, 32> PHIs; + uint64_t Len = GetStringLengthH(V, PHIs); + // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return + // an empty string as a length. + return Len == ~0ULL ? 1 : Len; +} diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp index 7ed651b..4288422 100644 --- a/lib/Bitcode/Writer/BitWriter.cpp +++ b/lib/Bitcode/Writer/BitWriter.cpp @@ -27,20 +27,14 @@ int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) { return 0; } -#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR >= 4) -#include <ext/stdio_filebuf.h> - -int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) { - raw_fd_ostream OS(FileHandle, false); +int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose, + int Unbuffered) { + raw_fd_ostream OS(FD, ShouldClose, Unbuffered); WriteBitcodeToFile(unwrap(M), OS); return 0; } -#else - int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) { - return -1; // Not supported. + return LLVMWriteBitcodeToFD(M, FileHandle, true, false); } - -#endif diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index faf4d95..d94729a 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -334,7 +334,9 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, unsigned TailLen = 0; while (I1 != MBB1->begin() && I2 != MBB2->begin()) { --I1; --I2; - if (!I1->isIdenticalTo(I2) || + // Don't merge debugging pseudos. + if (I1->isDebugValue() || I2->isDebugValue() || + !I1->isIdenticalTo(I2) || // FIXME: This check is dubious. It's used to get around a problem where // people incorrectly expect inline asm directives to remain in the same // relative order. This is untenable because normal compiler @@ -412,6 +414,8 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { unsigned Time = 0; for (; I != E; ++I) { + if (I->isDebugValue()) + continue; const TargetInstrDesc &TID = I->getDesc(); if (TID.isCall()) Time += 10; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 056e2d5..7d3de89 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -119,6 +119,8 @@ void CriticalAntiDepBreaker::FinishBlock() { void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex) { + if (MI->isDebugValue()) + return; assert(Count < InsertPosIndex && "Instruction index out of expected range!"); // Any register which was defined within the previous scheduling region @@ -409,6 +411,8 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits, for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; // Check if this instruction has a dependence on the critical path that // is an anti-dependence that we may be able to break. If it is, set diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index fd442db..5e88865 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -67,7 +67,7 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); -static cl::opt<bool> EnableMachineCSE("machine-cse", cl::Hidden, +static cl::opt<bool> EnableMachineCSE("enable-machine-cse", cl::Hidden, cl::desc("Enable Machine CSE")); static cl::opt<cl::boolOrDefault> @@ -212,6 +212,12 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return false; // success! } +static void printNoVerify(PassManagerBase &PM, + const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); +} + static void printAndVerify(PassManagerBase &PM, const char *Banner, bool allowDoubleDefs = false) { @@ -320,10 +326,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) { PM.add(createOptimizeExtsPass()); - if (EnableMachineCSE) - PM.add(createMachineCSEPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); + if (EnableMachineCSE) + PM.add(createMachineCSEPass()); if (!DisableMachineSink) PM.add(createMachineSinkingPass()); printAndVerify(PM, "After MachineLICM and MachineSinking", @@ -378,13 +384,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Branch folding must be run after regalloc and prolog/epilog insertion. if (OptLevel != CodeGenOpt::None && !DisableBranchFold) { PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); - printAndVerify(PM, "After BranchFolding"); + printNoVerify(PM, "After BranchFolding"); } // Tail duplication. if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) { PM.add(createTailDuplicatePass(false)); - printAndVerify(PM, "After TailDuplicate"); + printNoVerify(PM, "After TailDuplicate"); } PM.add(createGCMachineCodeAnalysisPass()); @@ -394,11 +400,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None && !DisableCodePlace) { PM.add(createCodePlacementOptPass()); - printAndVerify(PM, "After CodePlacementOpt"); + printNoVerify(PM, "After CodePlacementOpt"); } if (addPreEmitPass(PM, OptLevel)) - printAndVerify(PM, "After PreEmit passes"); + printNoVerify(PM, "After PreEmit passes"); return false; } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 68c8539..519990e 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -365,27 +365,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { } } - if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { - if (LastPartDef) - // The last partial def kills the register. - LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, - true/*IsImp*/, true/*IsKill*/)); - else { - MachineOperand *MO = - LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI); - bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg; - // If the last reference is the last def, then it's not used at all. - // That is, unless we are currently processing the last reference itself. - LastRefOrPartRef->addRegisterDead(Reg, TRI, true); - if (NeedEC) { - // If we are adding a subreg def and the superreg def is marked early - // clobber, add an early clobber marker to the subreg def. - MO = LastRefOrPartRef->findRegisterDefOperand(Reg); - if (MO) - MO->setIsEarlyClobber(); - } - } - } else if (!PhysRegUse[Reg]) { + if (!PhysRegUse[Reg]) { // Partial uses. Mark register def dead and add implicit def of // sub-registers which are used. // EAX<dead> = op AL<imp-def> @@ -419,6 +399,26 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) PartUses.erase(*SS); } + } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { + if (LastPartDef) + // The last partial def kills the register. + LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/, true/*IsKill*/)); + else { + MachineOperand *MO = + LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI); + bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg; + // If the last reference is the last def, then it's not used at all. + // That is, unless we are currently processing the last reference itself. + LastRefOrPartRef->addRegisterDead(Reg, TRI, true); + if (NeedEC) { + // If we are adding a subreg def and the superreg def is marked early + // clobber, add an early clobber marker to the subreg def. + MO = LastRefOrPartRef->findRegisterDefOperand(Reg); + if (MO) + MO->setIsEarlyClobber(); + } + } } else LastRefOrPartRef->addRegisterKilled(Reg, TRI, true); return true; diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 023ace2..b376e3d 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/Statistic.h" @@ -25,76 +26,16 @@ using namespace llvm; -namespace llvm { - template<> struct DenseMapInfo<MachineInstr*> { - static inline MachineInstr *getEmptyKey() { - return 0; - } - - static inline MachineInstr *getTombstoneKey() { - return reinterpret_cast<MachineInstr*>(-1); - } - - static unsigned getHashValue(const MachineInstr* const &MI) { - unsigned Hash = MI->getOpcode() * 37; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - uint64_t Key = (uint64_t)MO.getType() << 32; - switch (MO.getType()) { - default: break; - case MachineOperand::MO_Register: - if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; // Skip virtual register defs. - Key |= MO.getReg(); - break; - case MachineOperand::MO_Immediate: - Key |= MO.getImm(); - break; - case MachineOperand::MO_FrameIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_JumpTableIndex: - Key |= MO.getIndex(); - break; - case MachineOperand::MO_MachineBasicBlock: - Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB()); - break; - case MachineOperand::MO_GlobalAddress: - Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal()); - break; - case MachineOperand::MO_BlockAddress: - Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress()); - break; - } - Key += ~(Key << 32); - Key ^= (Key >> 22); - Key += ~(Key << 13); - Key ^= (Key >> 8); - Key += (Key << 3); - Key ^= (Key >> 15); - Key += ~(Key << 27); - Key ^= (Key >> 31); - Hash = (unsigned)Key + Hash * 37; - } - return Hash; - } - - static bool isEqual(const MachineInstr* const &LHS, - const MachineInstr* const &RHS) { - if (RHS == getEmptyKey() || RHS == getTombstoneKey() || - LHS == getEmptyKey() || LHS == getTombstoneKey()) - return LHS == RHS; - return LHS->isIdenticalTo(RHS, MachineInstr::IgnoreVRegDefs); - } - }; -} // end llvm namespace +STATISTIC(NumCoalesces, "Number of copies coalesced"); +STATISTIC(NumCSEs, "Number of common subexpression eliminated"); namespace { class MachineCSE : public MachineFunctionPass { const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; MachineDominatorTree *DT; - ScopedHashTable<MachineInstr*, unsigned> VNT; - unsigned CurrVN; + AliasAnalysis *AA; public: static char ID; // Pass identification MachineCSE() : MachineFunctionPass(&ID), CurrVN(0) {} @@ -104,12 +45,22 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); } private: + unsigned CurrVN; + ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; + SmallVector<MachineInstr*, 64> Exps; + bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); + bool isPhysDefTriviallyDead(unsigned Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E); + bool hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB); + bool isCSECandidate(MachineInstr *MI); bool ProcessBlock(MachineDomTreeNode *Node); }; } // end anonymous namespace @@ -125,27 +76,65 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, bool Changed = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isUse()) { - unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) - continue; - MachineInstr *DefMI = MRI->getVRegDef(Reg); - if (DefMI->getParent() == MBB) { - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - TargetRegisterInfo::isVirtualRegister(SrcReg) && - !SrcSubIdx && !DstSubIdx) { - MO.setReg(SrcReg); - Changed = true; - } - } + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (!MRI->hasOneUse(Reg)) + // Only coalesce single use copies. This ensure the copy will be + // deleted. + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (DefMI->getParent() != MBB) + continue; + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + TargetRegisterInfo::isVirtualRegister(SrcReg) && + !SrcSubIdx && !DstSubIdx) { + MO.setReg(SrcReg); + DefMI->eraseFromParent(); + ++NumCoalesces; + Changed = true; } } return Changed; } -static bool hasLivePhysRegDefUse(MachineInstr *MI) { +bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E) { + unsigned LookAheadLeft = 5; + while (LookAheadLeft--) { + if (I == E) + // Reached end of block, register is obviously dead. + return true; + + if (I->isDebugValue()) + continue; + bool SeenDef = false; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = I->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (!TRI->regsOverlap(MO.getReg(), Reg)) + continue; + if (MO.isUse()) + return false; + SeenDef = true; + } + if (SeenDef) + // See a def of Reg (or an alias) before encountering any use, it's + // trivially dead. + return true; + ++I; + } + return false; +} + +bool MachineCSE::hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB){ + unsigned PhysDef = 0; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) @@ -153,30 +142,69 @@ static bool hasLivePhysRegDefUse(MachineInstr *MI) { unsigned Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg) && - !(MO.isDef() && MO.isDead())) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) + // Can't touch anything to read a physical register. + return true; + if (MO.isDead()) + // If the def is dead, it's ok. + continue; + // Ok, this is a physical register def that's not marked "dead". That's + // common since this pass is run before livevariables. We can scan + // forward a few instructions and check if it is obviously dead. + if (PhysDef) + // Multiple physical register defs. These are rare, forget about it. + return true; + PhysDef = Reg; + } + } + + if (PhysDef) { + MachineBasicBlock::iterator I = MI; I = llvm::next(I); + if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end())) return true; } return false; } +bool MachineCSE::isCSECandidate(MachineInstr *MI) { + // Ignore copies or instructions that read / write physical registers + // (except for dead defs of physical registers). + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) || + MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg()) + return false; + + // Ignore stuff that we obviously can't move. + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.mayStore() || TID.isCall() || TID.isTerminator() || + TID.hasUnmodeledSideEffects()) + return false; + + if (TID.mayLoad()) { + // Okay, this instruction does a load. As a refinement, we allow the target + // to decide whether the loaded value is actually a constant. If so, we can + // actually use it as a load. + if (!MI->isInvariantLoad(AA)) + // FIXME: we should be able to hoist loads with no other side effects if + // there are no other instructions which can change memory in this loop. + // This is a trivial form of alias analysis. + return false; + } + return true; +} + bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) { bool Changed = false; - ScopedHashTableScope<MachineInstr*, unsigned> VNTS(VNT); + ScopedHashTableScope<MachineInstr*, unsigned, + MachineInstrExpressionTrait> VNTS(VNT); MachineBasicBlock *MBB = Node->getBlock(); - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; - bool SawStore = false; - if (!MI->isSafeToMove(TII, 0, SawStore)) - continue; - // Ignore copies or instructions that read / write physical registers - // (except for dead defs of physical registers). - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) - continue; - if (hasLivePhysRegDefUse(MI)) + ++I; + + if (!isCSECandidate(MI)) continue; bool FoundCSE = VNT.count(MI); @@ -185,11 +213,41 @@ bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) { if (PerformTrivialCoalescing(MI, MBB)) FoundCSE = VNT.count(MI); } + // FIXME: commute commutable instructions? + + // If the instruction defines a physical register and the value *may* be + // used, then it's not safe to replace it with a common subexpression. + if (FoundCSE && hasLivePhysRegDefUse(MI, MBB)) + FoundCSE = false; + + if (!FoundCSE) { + VNT.insert(MI, CurrVN++); + Exps.push_back(MI); + continue; + } - if (FoundCSE) - DEBUG(dbgs() << "Found a common subexpression: " << *MI); - else - VNT.insert(MI, ++CurrVN); + // Found a common subexpression, eliminate it. + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + DEBUG(dbgs() << "Examining: " << *MI); + DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); + unsigned NumDefs = MI->getDesc().getNumDefs(); + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned OldReg = MO.getReg(); + unsigned NewReg = CSMI->getOperand(i).getReg(); + if (OldReg == NewReg) + continue; + assert(TargetRegisterInfo::isVirtualRegister(OldReg) && + TargetRegisterInfo::isVirtualRegister(NewReg) && + "Do not CSE physical register defs!"); + MRI->replaceRegWith(OldReg, NewReg); + --NumDefs; + } + MI->eraseFromParent(); + ++NumCSEs; } // Recursively call ProcessBlock with childred. @@ -202,7 +260,9 @@ bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) { bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); MRI = &MF.getRegInfo(); DT = &getAnalysis<MachineDominatorTree>(); + AA = &getAnalysis<AliasAnalysis>(); return ProcessBlock(DT->getRootNode()); } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index cba93f1..e23670d 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -704,24 +704,31 @@ void MachineInstr::addMemOperand(MachineFunction &MF, bool MachineInstr::isIdenticalTo(const MachineInstr *Other, MICheckType Check) const { - if (Other->getOpcode() != getOpcode() || - Other->getNumOperands() != getNumOperands()) + // If opcodes or number of operands are not the same then the two + // instructions are obviously not identical. + if (Other->getOpcode() != getOpcode() || + Other->getNumOperands() != getNumOperands()) + return false; + + // Check operands to make sure they match. + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + const MachineOperand &OMO = Other->getOperand(i); + // Clients may or may not want to ignore defs when testing for equality. + // For example, machine CSE pass only cares about finding common + // subexpressions, so it's safe to ignore virtual register defs. + if (Check != CheckDefs && MO.isReg() && MO.isDef()) { + if (Check == IgnoreDefs) + continue; + // Check == IgnoreVRegDefs + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + TargetRegisterInfo::isPhysicalRegister(OMO.getReg())) + if (MO.getReg() != OMO.getReg()) + return false; + } else if (!MO.isIdenticalTo(OMO)) return false; - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); - const MachineOperand &OMO = Other->getOperand(i); - if (Check != CheckDefs && MO.isReg() && MO.isDef()) { - if (Check == IgnoreDefs) - continue; - // Check == IgnoreVRegDefs - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || - TargetRegisterInfo::isPhysicalRegister(OMO.getReg())) - if (MO.getReg() != OMO.getReg()) - return false; - } else if (!MO.isIdenticalTo(OMO)) - return false; - } - return true; + } + return true; } /// removeFromParent - This method unlinks 'this' from the containing basic @@ -1348,3 +1355,48 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg, true /*IsDef*/, true /*IsImp*/)); } + +unsigned +MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { + unsigned Hash = MI->getOpcode() * 37; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + uint64_t Key = (uint64_t)MO.getType() << 32; + switch (MO.getType()) { + default: break; + case MachineOperand::MO_Register: + if (MO.isDef() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; // Skip virtual register defs. + Key |= MO.getReg(); + break; + case MachineOperand::MO_Immediate: + Key |= MO.getImm(); + break; + case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + Key |= MO.getIndex(); + break; + case MachineOperand::MO_MachineBasicBlock: + Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB()); + break; + case MachineOperand::MO_GlobalAddress: + Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal()); + break; + case MachineOperand::MO_BlockAddress: + Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress()); + break; + } + Key += ~(Key << 32); + Key ^= (Key >> 22); + Key += ~(Key << 13); + Key ^= (Key >> 8); + Key += (Key << 3); + Key ^= (Key >> 15); + Key += ~(Key << 27); + Key ^= (Key >> 31); + Hash = (unsigned)Key + Hash * 37; + } + return Hash; +} diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index b31973e..d9ab677 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -116,6 +116,19 @@ MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { return 0; } +bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { + use_iterator UI = use_begin(RegNo); + if (UI == use_end()) + return false; + return ++UI == use_end(); +} + +bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { + use_nodbg_iterator UI = use_nodbg_begin(RegNo); + if (UI == use_nodbg_end()) + return false; + return ++UI == use_nodbg_end(); +} #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 9ba7d14..e47ba7c 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -72,8 +72,13 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); - for (MachineRegisterInfo::use_iterator I = RegInfo->use_begin(Reg), - E = RegInfo->use_end(); I != E; ++I) { + // Ignoring debug uses is necessary so debug info doesn't affect the code. + // This may leave a referencing dbg_value in the original block, before + // the definition of the vreg. Dwarf generator handles this although the + // user might not get the right info at runtime. + for (MachineRegisterInfo::use_nodbg_iterator I = + RegInfo->use_nodbg_begin(Reg), + E = RegInfo->use_nodbg_end(); I != E; ++I) { // Determine the block of the use. MachineInstr *UseInst = &*I; MachineBasicBlock *UseBlock = UseInst->getParent(); @@ -135,7 +140,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { ProcessedBegin = I == MBB.begin(); if (!ProcessedBegin) --I; - + + if (MI->isDebugValue()) + continue; + if (SinkInstruction(MI, SawStore)) ++NumSunk, MadeChange = true; diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index bdfd448..8bbe0a7 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -443,34 +443,3 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, return NMBB; } - -unsigned -PHIElimination::PHINodeTraits::getHashValue(const MachineInstr *MI) { - if (!MI || MI==getEmptyKey() || MI==getTombstoneKey()) - return DenseMapInfo<MachineInstr*>::getHashValue(MI); - unsigned hash = 0; - for (unsigned ni = 1, ne = MI->getNumOperands(); ni != ne; ni += 2) - hash = hash*37 + DenseMapInfo<BBVRegPair>:: - getHashValue(BBVRegPair(MI->getOperand(ni+1).getMBB()->getNumber(), - MI->getOperand(ni).getReg())); - return hash; -} - -bool PHIElimination::PHINodeTraits::isEqual(const MachineInstr *LHS, - const MachineInstr *RHS) { - const MachineInstr *EmptyKey = getEmptyKey(); - const MachineInstr *TombstoneKey = getTombstoneKey(); - if (!LHS || !RHS || LHS==EmptyKey || RHS==EmptyKey || - LHS==TombstoneKey || RHS==TombstoneKey) - return LHS==RHS; - - unsigned ne = LHS->getNumOperands(); - if (ne != RHS->getNumOperands()) - return false; - // Ignore operand 0, the defined register. - for (unsigned ni = 1; ni != ne; ni += 2) - if (LHS->getOperand(ni).getReg() != RHS->getOperand(ni).getReg() || - LHS->getOperand(ni+1).getMBB() != RHS->getOperand(ni+1).getMBB()) - return false; - return true; -} diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h index ff4aa20..7dedf03 100644 --- a/lib/CodeGen/PHIElimination.h +++ b/lib/CodeGen/PHIElimination.h @@ -102,15 +102,9 @@ namespace llvm { // Defs of PHI sources which are implicit_def. SmallPtrSet<MachineInstr*, 4> ImpDefs; - // Lowered PHI nodes may be reused. We provide special DenseMap traits to - // match PHI nodes with identical arguments. - struct PHINodeTraits : public DenseMapInfo<MachineInstr*> { - static unsigned getHashValue(const MachineInstr *PtrVal); - static bool isEqual(const MachineInstr *LHS, const MachineInstr *RHS); - }; - // Map reusable lowered PHI node -> incoming join register. - typedef DenseMap<MachineInstr*, unsigned, PHINodeTraits> LoweredPHIMap; + typedef DenseMap<MachineInstr*, unsigned, + MachineInstrExpressionTrait> LoweredPHIMap; LoweredPHIMap LoweredPHIs; }; diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index f43395f..424181c 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -460,6 +460,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); I != E; --Count) { MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; // Update liveness. Registers that are defed but not used in this // instruction are now dead. Mark register and all subregs as they diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 7fb3e6e..5e86e5a 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -18,19 +18,38 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/System/Mutex.h" #include <map> using namespace llvm; -static ManagedStatic<PseudoSourceValue[4]> PSVs; +namespace { +struct PSVGlobalsTy { + // PseudoSourceValues are immutable so don't need locking. + const PseudoSourceValue PSVs[4]; + sys::Mutex Lock; // Guards FSValues, but not the values inside it. + std::map<int, const PseudoSourceValue *> FSValues; + + PSVGlobalsTy() : PSVs() {} + ~PSVGlobalsTy() { + for (std::map<int, const PseudoSourceValue *>::iterator + I = FSValues.begin(), E = FSValues.end(); I != E; ++I) { + delete I->second; + } + } +}; + +static ManagedStatic<PSVGlobalsTy> PSVGlobals; + +} // anonymous namespace const PseudoSourceValue *PseudoSourceValue::getStack() -{ return &(*PSVs)[0]; } +{ return &PSVGlobals->PSVs[0]; } const PseudoSourceValue *PseudoSourceValue::getGOT() -{ return &(*PSVs)[1]; } +{ return &PSVGlobals->PSVs[1]; } const PseudoSourceValue *PseudoSourceValue::getJumpTable() -{ return &(*PSVs)[2]; } +{ return &PSVGlobals->PSVs[2]; } const PseudoSourceValue *PseudoSourceValue::getConstantPool() -{ return &(*PSVs)[3]; } +{ return &PSVGlobals->PSVs[3]; } static const char *const PSVNames[] = { "Stack", @@ -48,13 +67,13 @@ PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) : Subclass) {} void PseudoSourceValue::printCustom(raw_ostream &O) const { - O << PSVNames[this - *PSVs]; + O << PSVNames[this - PSVGlobals->PSVs]; } -static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues; - const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) { - const PseudoSourceValue *&V = (*FSValues)[FI]; + PSVGlobalsTy &PG = *PSVGlobals; + sys::ScopedLock locked(PG.Lock); + const PseudoSourceValue *&V = PG.FSValues[FI]; if (!V) V = new FixedStackPseudoSourceValue(FI); return V; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e4ff44d..3be6b43 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1758,7 +1758,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N1.getValueType(); - unsigned BitWidth = VT.getSizeInBits(); + unsigned BitWidth = VT.getScalarType().getSizeInBits(); // fold vector ops if (VT.isVector()) { @@ -1872,9 +1872,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueSizeInBits(); + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getSizeInBits())) && + BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, @@ -1895,9 +1895,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueSizeInBits(); + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getSizeInBits())) && + BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 02fe85d..625de11 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "instr-emitter" #include "InstrEmitter.h" +#include "SDDbgValue.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -497,6 +498,56 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, assert(isNew && "Node emitted out of order - early"); } +/// EmitDbgValue - Generate any debug info that refers to this Node. Constant +/// dbg_value is not handled here. +void +InstrEmitter::EmitDbgValue(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + SDDbgValue *sd) { + if (!Node->getHasDebugValue()) + return; + if (!sd) + return; + unsigned VReg = getVR(SDValue(sd->getSDNode(), sd->getResNo()), VRBaseMap); + const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + DebugLoc DL = sd->getDebugLoc(); + MachineInstr *MI; + if (VReg) { + MI = BuildMI(*MF, DL, II).addReg(VReg, RegState::Debug). + addImm(sd->getOffset()). + addMetadata(sd->getMDPtr()); + } else { + // Insert an Undef so we can see what we dropped. + MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()). + addMetadata(sd->getMDPtr()); + } + MBB->insert(InsertPos, MI); +} + +/// EmitDbgValue - Generate constant debug info. No SDNode is involved. +void +InstrEmitter::EmitDbgValue(SDDbgValue *sd) { + if (!sd) + return; + const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + DebugLoc DL = sd->getDebugLoc(); + MachineInstr *MI; + Value *V = sd->getConst(); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + MI = BuildMI(*MF, DL, II).addImm(CI->getZExtValue()). + addImm(sd->getOffset()). + addMetadata(sd->getMDPtr()); + } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + MI = BuildMI(*MF, DL, II).addFPImm(CF).addImm(sd->getOffset()). + addMetadata(sd->getMDPtr()); + } else { + // Insert an Undef so we can see what we dropped. + MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()). + addMetadata(sd->getMDPtr()); + } + MBB->insert(InsertPos, MI); +} + /// EmitNode - Generate machine code for a node and needed dependencies. /// void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 91817e4..4fe9f19 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -23,6 +23,7 @@ namespace llvm { class TargetInstrDesc; +class SDDbgValue; class InstrEmitter { MachineFunction *MF; @@ -97,6 +98,16 @@ public: /// MachineInstr. static unsigned CountOperands(SDNode *Node); + /// EmitDbgValue - Generate any debug info that refers to this Node. Constant + /// dbg_value is not handled here. + void EmitDbgValue(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + SDDbgValue* sd); + + + /// EmitDbgValue - Generate a constant DBG_VALUE. No node is involved. + void EmitDbgValue(SDDbgValue* sd); + /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index c7ab34f..f498263 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2008,6 +2008,31 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, return Result; } assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + + // Implementation of unsigned i64 to f64 following the algorithm in + // __floatundidf in compiler_rt. This implementation has the advantage + // of performing rounding correctly, both in the default rounding mode + // and in all alternate rounding modes. + // TODO: Generalize this for use with other types. + if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { + SDValue TwoP52 = + DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64); + SDValue TwoP84PlusTwoP52 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64); + SDValue TwoP84 = + DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64); + + SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32); + SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, + DAG.getConstant(32, MVT::i64)); + SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); + SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); + SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr); + SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52); + return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); + } + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), diff --git a/lib/CodeGen/SelectionDAG/SDDbgValue.h b/lib/CodeGen/SelectionDAG/SDDbgValue.h new file mode 100644 index 0000000..9e15fc9 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SDDbgValue.h @@ -0,0 +1,67 @@ +//===-- llvm/CodeGen/SDDbgValue.h - SD dbg_value handling--------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SDDbgValue class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SDDBGVALUE_H +#define LLVM_CODEGEN_SDDBGVALUE_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DebugLoc.h" + +namespace llvm { + +class MDNode; +class SDNode; +class Value; + +/// SDDbgValue - Holds the information from a dbg_value node through SDISel. +/// Either Const or Node is nonzero, but not both. +/// We do not use SDValue here to avoid including its header. + +class SDDbgValue { + SDNode *Node; // valid for non-constants + unsigned ResNo; // valid for non-constants + Value *Const; // valid for constants + MDNode *mdPtr; + uint64_t Offset; + DebugLoc DL; +public: + // Constructor for non-constants. + SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl) : + Node(N), ResNo(R), Const(0), mdPtr(mdP), Offset(off), DL(dl) {} + + // Constructor for constants. + SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl) : Node(0), + ResNo(0), Const(C), mdPtr(mdP), Offset(off), DL(dl) {} + + // Returns the MDNode pointer. + MDNode *getMDPtr() { return mdPtr; } + + // Returns the SDNode* (valid for non-constants only). + SDNode *getSDNode() { assert (!Const); return Node; } + + // Returns the ResNo (valid for non-constants only). + unsigned getResNo() { assert (!Const); return ResNo; } + + // Returns the Value* for a constant (invalid for non-constants). + Value *getConst() { assert (!Node); return Const; } + + // Returns the offset. + uint64_t getOffset() { return Offset; } + + // Returns the DebugLoc. + DebugLoc getDebugLoc() { return DL; } +}; + +} // end llvm namespace + +#endif diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 002bc68..023e486 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4869,6 +4869,43 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, return NULL; } +namespace { + +/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node +/// pointed to by a use iterator is deleted, increment the use iterator +/// so that it doesn't dangle. +/// +/// This class also manages a "downlink" DAGUpdateListener, to forward +/// messages to ReplaceAllUsesWith's callers. +/// +class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { + SelectionDAG::DAGUpdateListener *DownLink; + SDNode::use_iterator &UI; + SDNode::use_iterator &UE; + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + // Increment the iterator as needed. + while (UI != UE && N == *UI) + ++UI; + + // Then forward the message. + if (DownLink) DownLink->NodeDeleted(N, E); + } + + virtual void NodeUpdated(SDNode *N) { + // Just forward the message. + if (DownLink) DownLink->NodeUpdated(N); + } + +public: + RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl, + SDNode::use_iterator &ui, + SDNode::use_iterator &ue) + : DownLink(dl), UI(ui), UE(ue) {} +}; + +} + /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. /// This can cause recursive merging of nodes in the DAG. /// @@ -4889,6 +4926,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // is replaced by To, we don't want to replace of all its users with To // too. See PR3018 for more info. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -4907,7 +4945,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, UpdateListener); + AddModifiedNodeToCSEMaps(User, &Listener); } } @@ -4933,6 +4971,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -4951,7 +4990,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, UpdateListener); + AddModifiedNodeToCSEMaps(User, &Listener); } } @@ -4969,6 +5008,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -4988,7 +5028,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, UpdateListener); + AddModifiedNodeToCSEMaps(User, &Listener); } } @@ -5010,6 +5050,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From.getNode()->use_begin(), UE = From.getNode()->use_end(); + RAUWUpdateListener Listener(UpdateListener, UI, UE); while (UI != UE) { SDNode *User = *UI; bool UserRemovedFromCSEMaps = false; @@ -5045,7 +5086,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, UpdateListener); + AddModifiedNodeToCSEMaps(User, &Listener); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 2e2020d..05f9f1f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -56,9 +56,12 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" #include <algorithm> using namespace llvm; +STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); + static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " @@ -930,6 +933,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // feed PHI nodes in successor blocks. if (isa<TerminatorInst>(BI)) if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { + ++NumFastIselFailures; ResetDebugLoc(SDB, FastIS); if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; @@ -954,6 +958,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(BI)) { + ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed call: "; BI->dump(); @@ -983,6 +988,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // Otherwise, give up on FastISel for the rest of the block. // For now, be a little lenient about non-branch terminators. if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) { + ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; BI->dump(); @@ -1032,6 +1038,8 @@ SelectionDAGISel::FinishBasicBlock() { MachineInstr *PHI = SDB->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); + if (!BB->isSuccessor(PHI->getParent())) + continue; PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); @@ -1414,21 +1422,6 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, return false; } -/// isNonImmUse - Start searching from Root up the DAG to check is Def can -/// be reached. Return true if that's the case. However, ignore direct uses -/// by ImmedUse (which would be U in the example illustrated in -/// IsLegalToFold) and by Root (which can happen in the store case). -/// FIXME: to be really generic, we should allow direct use by any node -/// that is being folded. But realisticly since we only fold loads which -/// have one non-chain use, we only need to watch out for load/op/store -/// and load/op/cmp case where the root (store / cmp) may reach the load via -/// its chain operand. -static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, - bool IgnoreChains) { - SmallPtrSet<SDNode*, 16> Visited; - return findNonImmUse(Root, Def, ImmedUse, Root, Visited, IgnoreChains); -} - /// IsProfitableToFold - Returns true if it's profitable to fold the specific /// operand node N of U during instruction selection that starts at Root. bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, @@ -1485,6 +1478,8 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // Fold. But since Fold and FU are flagged together, this will create // a cycle in the scheduling graph. + // If the node has flags, walk down the graph to the "lowest" node in the + // flagged set. EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Flag) { SDNode *FU = findFlagUse(Root); @@ -1492,9 +1487,17 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, break; Root = FU; VT = Root->getValueType(Root->getNumValues()-1); + + // If our query node has a flag result with a use, we've walked up it. If + // the user (which has already been selected) has a chain or indirectly uses + // the chain, our WalkChainUsers predicate will not consider it. Because of + // this, we cannot ignore chains in this predicate. + IgnoreChains = false; } + - return !isNonImmUse(Root, N.getNode(), U, IgnoreChains); + SmallPtrSet<SDNode*, 16> Visited; + return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); } SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { @@ -2249,11 +2252,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, N.getNode())) break; continue; - case OPC_CheckComplexPat: - if (!CheckComplexPattern(NodeToMatch, N, - MatcherTable[MatcherIndex++], RecordedNodes)) + case OPC_CheckComplexPat: { + unsigned CPNum = MatcherTable[MatcherIndex++]; + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum, + RecordedNodes)) break; continue; + } case OPC_CheckOpcode: if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break; continue; @@ -2711,29 +2718,26 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, void SelectionDAGISel::CannotYetSelect(SDNode *N) { - if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN || - N->getOpcode() == ISD::INTRINSIC_WO_CHAIN || - N->getOpcode() == ISD::INTRINSIC_VOID) - return CannotYetSelectIntrinsic(N); - std::string msg; raw_string_ostream Msg(msg); Msg << "Cannot yet select: "; - N->printrFull(Msg, CurDAG); + + if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN && + N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && + N->getOpcode() != ISD::INTRINSIC_VOID) { + N->printrFull(Msg, CurDAG); + } else { + bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; + unsigned iid = + cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue(); + if (iid < Intrinsic::num_intrinsics) + Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid); + else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo()) + Msg << "target intrinsic %" << TII->getName(iid); + else + Msg << "unknown intrinsic #" << iid; + } llvm_report_error(Msg.str()); } -void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) { - dbgs() << "Cannot yet select: "; - unsigned iid = - cast<ConstantSDNode>(N->getOperand(N->getOperand(0).getValueType() == - MVT::Other))->getZExtValue(); - if (iid < Intrinsic::num_intrinsics) - llvm_report_error("Cannot yet select: intrinsic %" + - Intrinsic::getName((Intrinsic::ID)iid)); - else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo()) - llvm_report_error(Twine("Cannot yet select: target intrinsic %") + - tii->getName(iid)); -} - char SelectionDAGISel::ID = 0; diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 8d4d1b2..059e8d6 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -44,7 +44,6 @@ namespace { const Type *FunctionContextTy; Constant *RegisterFn; Constant *UnregisterFn; - Constant *ResumeFn; Constant *BuiltinSetjmpFn; Constant *FrameAddrFn; Constant *LSDAAddrFn; @@ -67,8 +66,8 @@ namespace { } private: - void markInvokeCallSite(InvokeInst *II, unsigned InvokeNo, - Value *CallSite, + void insertCallSiteStore(Instruction *I, int Number, Value *CallSite); + void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, SwitchInst *CatchSwitch); void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes); bool insertSjLjEHSupport(Function &F); @@ -107,11 +106,6 @@ bool SjLjEHPass::doInitialization(Module &M) { Type::getVoidTy(M.getContext()), PointerType::getUnqual(FunctionContextTy), (Type *)0); - ResumeFn = - M.getOrInsertFunction("_Unwind_SjLj_Resume", - Type::getVoidTy(M.getContext()), - VoidPtrTy, - (Type *)0); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); @@ -123,12 +117,22 @@ bool SjLjEHPass::doInitialization(Module &M) { return true; } +/// insertCallSiteStore - Insert a store of the call-site value to the +/// function context +void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number, + Value *CallSite) { + ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()), + Number); + // Insert a store of the call-site number + new StoreInst(CallSiteNoC, CallSite, true, I); // volatile +} + /// markInvokeCallSite - Insert code to mark the call_site for this invoke -void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo, +void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, SwitchInst *CatchSwitch) { ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()), - InvokeNo); + InvokeNo); // The runtime comes back to the dispatcher with the call_site - 1 in // the context. Odd, but there it is. ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()), @@ -145,8 +149,11 @@ void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo, } } - // Insert a store of the invoke num before the invoke - new StoreInst(CallSiteNoC, CallSite, true, II); // volatile + // Insert the store of the call site value + insertCallSiteStore(II, InvokeNo, CallSite); + + // Record the call site value for the back end so it stays associated with + // the invoke. CallInst::Create(CallSiteFn, CallSiteNoC, "", II); // Add a switch case to our unwind block. @@ -272,8 +279,8 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { SmallVector<InvokeInst*,16> Invokes; // Look through the terminators of the basic blocks to find invokes, returns - // and unwinds - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + // and unwinds. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { // Remember all return instructions in case we insert an invoke into this // function. @@ -283,6 +290,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { Unwinds.push_back(UI); } + } // If we don't have any invokes or unwinds, there's nothing to do. if (Unwinds.empty() && Invokes.empty()) return false; @@ -478,24 +486,21 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { for (unsigned i = 0, e = Invokes.size(); i != e; ++i) markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch); - // The front end has likely added calls to _Unwind_Resume. We need - // to find those calls and mark the call_site as -1 immediately prior. - // resume is a noreturn function, so any block that has a call to it - // should end in an 'unreachable' instruction with the call immediately - // prior. That's how we'll search. - // ??? There's got to be a better way. this is fugly. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if ((dyn_cast<UnreachableInst>(BB->getTerminator()))) { - BasicBlock::iterator I = BB->getTerminator(); - // Check the previous instruction and see if it's a resume call - if (I == BB->begin()) continue; - if (CallInst *CI = dyn_cast<CallInst>(--I)) { - if (CI->getCalledFunction() == ResumeFn) { - Value *NegativeOne = Constant::getAllOnesValue(Int32Ty); - new StoreInst(NegativeOne, CallSite, true, I); // volatile - } + // Mark call instructions that aren't nounwind as no-action + // (call_site == -1). Skip the entry block, as prior to then, no function + // context has been created for this function and any unexpected exceptions + // thrown will go directly to the caller's context, which is what we want + // anyway, so no need to do anything here. + for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { + for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + // Ignore calls to the EH builtins (eh.selector, eh.exception) + Constant *Callee = CI->getCalledFunction(); + if (Callee != SelectorFn && Callee != ExceptionFn + && !CI->doesNotThrow()) + insertCallSiteStore(CI, -1, CallSite); } - } + } // Replace all unwinds with a branch to the unwind handler. // ??? Should this ever happen with sjlj exceptions? diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index ef6e129..3b3be5d 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -550,8 +550,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, } // Exception Handling. - LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0, - SectionKind::getReadOnlyWithRel()); + LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0, + SectionKind::getDataRel()); EHFrameSection = getMachOSection("__TEXT", "__eh_frame", MCSectionMachO::S_COALESCED | @@ -652,7 +652,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() || - Kind.isMergeable2ByteCString()) { + (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage())) { if (TM.getTargetData()->getPreferredAlignment( cast<GlobalVariable>(GV)) < 32) { if (Kind.isMergeable1ByteCString()) @@ -779,7 +779,7 @@ unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const { } unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const { - return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; + return DW_EH_PE_absptr; } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 0ba3843..c840b39 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -454,13 +454,10 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, const TargetInstrInfo *TII, bool &IsCopy, unsigned &DstReg, bool &IsDstPhys) { - MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg); - if (UI == MRI->use_nodbg_end()) - return 0; - MachineInstr &UseMI = *UI; - if (++UI != MRI->use_nodbg_end()) - // More than one use. + if (!MRI->hasOneNonDBGUse(Reg)) + // None or more than one use. return 0; + MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg); if (UseMI.getParent() != MBB) return 0; unsigned SrcReg; diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp index 7bcd30a..9d07811 100644 --- a/lib/CompilerDriver/Action.cpp +++ b/lib/CompilerDriver/Action.cpp @@ -15,6 +15,7 @@ #include "llvm/CompilerDriver/BuiltinOptions.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SystemUtils.h" #include "llvm/System/Program.h" #include "llvm/System/TimeValue.h" @@ -24,13 +25,23 @@ using namespace llvm; using namespace llvmc; +namespace llvmc { + +extern int Main(int argc, char** argv); +extern const char* ProgramName; + +} + namespace { int ExecuteProgram(const std::string& name, const StrVector& args) { sys::Path prog = sys::Program::FindProgramByName(name); - if (prog.isEmpty()) - throw std::runtime_error("Can't find program '" + name + "'"); + if (prog.isEmpty()) { + prog = FindExecutable(name, ProgramName, (void *)(intptr_t)&Main); + if (prog.isEmpty()) + throw std::runtime_error("Can't find program '" + name + "'"); + } if (!prog.canExecute()) throw std::runtime_error("Program '" + name + "' is not executable."); diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 57c4375..783ebb4 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -156,53 +156,18 @@ namespace { // was no stub. This function uses the call-site->function map to find a // relevant function, but asserts that only stubs and not other call sites // will be passed in. - Function *EraseStub(const MutexGuard &locked, void *Stub) { - CallSiteToFunctionMapTy::iterator C2F_I = - CallSiteToFunctionMap.find(Stub); - if (C2F_I == CallSiteToFunctionMap.end()) { - // Not a stub. - return NULL; - } - - Function *const F = C2F_I->second; -#ifndef NDEBUG - void *RealStub = FunctionToLazyStubMap.lookup(F); - assert(RealStub == Stub && - "Call-site that wasn't a stub pass in to EraseStub"); -#endif - FunctionToLazyStubMap.erase(F); - CallSiteToFunctionMap.erase(C2F_I); - - // Remove the stub from the function->call-sites map, and remove the whole - // entry from the map if that was the last call site. - FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F); - assert(F2C_I != FunctionToCallSitesMap.end() && - "FunctionToCallSitesMap broken"); - bool Erased = F2C_I->second.erase(Stub); - (void)Erased; - assert(Erased && "FunctionToCallSitesMap broken"); - if (F2C_I->second.empty()) - FunctionToCallSitesMap.erase(F2C_I); - - return F; - } + Function *EraseStub(const MutexGuard &locked, void *Stub); - void EraseAllCallSites(const MutexGuard &locked, Function *F) { + void EraseAllCallSitesFor(const MutexGuard &locked, Function *F) { assert(locked.holds(TheJIT->lock)); - EraseAllCallSitesPrelocked(F); - } - void EraseAllCallSitesPrelocked(Function *F) { - FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F); - if (F2C == FunctionToCallSitesMap.end()) - return; - for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(), - E = F2C->second.end(); I != E; ++I) { - bool Erased = CallSiteToFunctionMap.erase(*I); - (void)Erased; - assert(Erased && "Missing call site->function mapping"); - } - FunctionToCallSitesMap.erase(F2C); + EraseAllCallSitesForPrelocked(F); } + void EraseAllCallSitesForPrelocked(Function *F); + + // Erases _all_ call sites regardless of their function. This is used to + // unregister the stub addresses from the StubToResolverMap in + // ~JITResolver(). + void EraseAllCallSitesPrelocked(); }; /// JITResolver - Keep track of, and resolve, call sites for functions that @@ -240,6 +205,8 @@ namespace { LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn); } + ~JITResolver(); + /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's /// lazy-compilation stub if it has already been created. void *getLazyFunctionStubIfAvailable(Function *F); @@ -259,8 +226,6 @@ namespace { void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs, SmallVectorImpl<void*> &Ptrs); - GlobalValue *invalidateStub(void *Stub); - /// getGOTIndexForAddress - Return a new or existing index in the GOT for /// an address. This function only manages slots, it does not manage the /// contents of the slots or the memory associated with the GOT. @@ -305,6 +270,17 @@ namespace { --I; return I->second; } + /// True if any stubs refer to the given resolver. Only used in an assert(). + /// O(N) + bool ResolverHasStubs(JITResolver* Resolver) const { + MutexGuard guard(Lock); + for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(), + E = Map.end(); I != E; ++I) { + if (I->second == Resolver) + return true; + } + return false; + } }; /// This needs to be static so that a lazy call stub can access it with no /// context except the address of the stub. @@ -370,9 +346,6 @@ namespace { /// MMI - Machine module info for exception informations MachineModuleInfo* MMI; - // GVSet - a set to keep track of which globals have been seen - SmallPtrSet<const GlobalVariable*, 8> GVSet; - // CurFn - The llvm function being emitted. Only valid during // finishFunction(). const Function *CurFn; @@ -396,16 +369,6 @@ namespace { ValueMap<const Function *, EmittedCode, EmittedFunctionConfig> EmittedFunctions; - // CurFnStubUses - For a given Function, a vector of stubs that it - // references. This facilitates the JIT detecting that a stub is no - // longer used, so that it may be deallocated. - DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses; - - // StubFnRefs - For a given pointer to a stub, a set of Functions which - // reference the stub. When the count of a stub's references drops to zero, - // the stub is unused. - DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs; - DILocation PrevDLT; /// Instance of the JIT @@ -494,11 +457,6 @@ namespace { /// function body. void deallocateMemForFunction(const Function *F); - /// AddStubToCurrentFunction - Mark the current function being JIT'd as - /// using the stub at the specified address. Allows - /// deallocateMemForFunction to also remove stubs no longer referenced. - void AddStubToCurrentFunction(void *Stub); - virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn); virtual void emitLabel(uint64_t LabelID) { @@ -529,14 +487,86 @@ namespace { bool MayNeedFarStub); void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference); unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size); - unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size); - unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size); + unsigned addSizeOfGlobalsInConstantVal( + const Constant *C, unsigned Size, + SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, + SmallVectorImpl<const GlobalVariable*> &Worklist); + unsigned addSizeOfGlobalsInInitializer( + const Constant *Init, unsigned Size, + SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, + SmallVectorImpl<const GlobalVariable*> &Worklist); unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF); }; } void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) { - JRS->EraseAllCallSitesPrelocked(F); + JRS->EraseAllCallSitesForPrelocked(F); +} + +Function *JITResolverState::EraseStub(const MutexGuard &locked, void *Stub) { + CallSiteToFunctionMapTy::iterator C2F_I = + CallSiteToFunctionMap.find(Stub); + if (C2F_I == CallSiteToFunctionMap.end()) { + // Not a stub. + return NULL; + } + + StubToResolverMap->UnregisterStubResolver(Stub); + + Function *const F = C2F_I->second; +#ifndef NDEBUG + void *RealStub = FunctionToLazyStubMap.lookup(F); + assert(RealStub == Stub && + "Call-site that wasn't a stub passed in to EraseStub"); +#endif + FunctionToLazyStubMap.erase(F); + CallSiteToFunctionMap.erase(C2F_I); + + // Remove the stub from the function->call-sites map, and remove the whole + // entry from the map if that was the last call site. + FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F); + assert(F2C_I != FunctionToCallSitesMap.end() && + "FunctionToCallSitesMap broken"); + bool Erased = F2C_I->second.erase(Stub); + (void)Erased; + assert(Erased && "FunctionToCallSitesMap broken"); + if (F2C_I->second.empty()) + FunctionToCallSitesMap.erase(F2C_I); + + return F; +} + +void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) { + FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F); + if (F2C == FunctionToCallSitesMap.end()) + return; + StubToResolverMapTy &S2RMap = *StubToResolverMap; + for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(), + E = F2C->second.end(); I != E; ++I) { + S2RMap.UnregisterStubResolver(*I); + bool Erased = CallSiteToFunctionMap.erase(*I); + (void)Erased; + assert(Erased && "Missing call site->function mapping"); + } + FunctionToCallSitesMap.erase(F2C); +} + +void JITResolverState::EraseAllCallSitesPrelocked() { + StubToResolverMapTy &S2RMap = *StubToResolverMap; + for (CallSiteToFunctionMapTy::const_iterator + I = CallSiteToFunctionMap.begin(), + E = CallSiteToFunctionMap.end(); I != E; ++I) { + S2RMap.UnregisterStubResolver(I->first); + } + CallSiteToFunctionMap.clear(); + FunctionToCallSitesMap.clear(); +} + +JITResolver::~JITResolver() { + // No need to lock because we're in the destructor, and state isn't shared. + state.EraseAllCallSitesPrelocked(); + assert(!StubToResolverMap->ResolverHasStubs(this) && + "Resolver destroyed with stubs still alive."); } /// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub @@ -589,20 +619,22 @@ void *JITResolver::getLazyFunctionStub(Function *F) { DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '" << F->getName() << "'\n"); - // Register this JITResolver as the one corresponding to this call site so - // JITCompilerFn will be able to find it. - StubToResolverMap->RegisterStubResolver(Stub, this); - - // Finally, keep track of the stub-to-Function mapping so that the - // JITCompilerFn knows which function to compile! - state.AddCallSite(locked, Stub, F); - - // If we are JIT'ing non-lazily but need to call a function that does not - // exist yet, add it to the JIT's work list so that we can fill in the stub - // address later. - if (!Actual && !TheJIT->isCompilingLazily()) - if (!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage()) - TheJIT->addPendingFunction(F); + if (TheJIT->isCompilingLazily()) { + // Register this JITResolver as the one corresponding to this call site so + // JITCompilerFn will be able to find it. + StubToResolverMap->RegisterStubResolver(Stub, this); + + // Finally, keep track of the stub-to-Function mapping so that the + // JITCompilerFn knows which function to compile! + state.AddCallSite(locked, Stub, F); + } else if (!Actual) { + // If we are JIT'ing non-lazily but need to call a function that does not + // exist yet, add it to the JIT's work list so that we can fill in the + // stub address later. + assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() && + "'Actual' should have been set above."); + TheJIT->addPendingFunction(F); + } return Stub; } @@ -676,42 +708,6 @@ void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs, } } -GlobalValue *JITResolver::invalidateStub(void *Stub) { - MutexGuard locked(TheJIT->lock); - - // Remove the stub from the StubToResolverMap. - StubToResolverMap->UnregisterStubResolver(Stub); - - GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); - - // Look up the cheap way first, to see if it's a function stub we are - // invalidating. If so, remove it from both the forward and reverse maps. - if (Function *F = state.EraseStub(locked, Stub)) { - return F; - } - - // Otherwise, it might be an indirect symbol stub. Find it and remove it. - for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end(); - i != e; ++i) { - if (i->second != Stub) - continue; - GlobalValue *GV = i->first; - GM.erase(i); - return GV; - } - - // Lastly, check to see if it's in the ExternalFnToStubMap. - for (std::map<void *, void *>::iterator i = ExternalFnToStubMap.begin(), - e = ExternalFnToStubMap.end(); i != e; ++i) { - if (i->second != Stub) - continue; - ExternalFnToStubMap.erase(i); - break; - } - - return 0; -} - /// JITCompilerFn - This function is called when a lazy compilation stub has /// been entered. It looks up which function this stub corresponds to, compiles /// it if necessary, then returns the resultant function pointer. @@ -797,7 +793,6 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, // that we're returning the same address for the function as any previous // call. TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be // close enough to call. - AddStubToCurrentFunction(FnStub); return FnStub; } @@ -814,18 +809,10 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, return TheJIT->getPointerToFunction(F); } - // Otherwise, we may need a to emit a stub, and, conservatively, we - // always do so. - void *StubAddr = Resolver.getLazyFunctionStub(F); - - // Add the stub to the current function's list of referenced stubs, so we can - // deallocate them if the current function is ever freed. It's possible to - // return null from getLazyFunctionStub in the case of a weak extern that - // fails to resolve. - if (StubAddr) - AddStubToCurrentFunction(StubAddr); - - return StubAddr; + // Otherwise, we may need a to emit a stub, and, conservatively, we always do + // so. Note that it's possible to return null from getLazyFunctionStub in the + // case of a weak extern that fails to resolve. + return Resolver.getLazyFunctionStub(F); } void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) { @@ -833,24 +820,9 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) { // resolved address. void *GVAddress = getPointerToGlobal(V, Reference, false); void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress); - - // Add the stub to the current function's list of referenced stubs, so we can - // deallocate them if the current function is ever freed. - AddStubToCurrentFunction(StubAddr); - return StubAddr; } -void JITEmitter::AddStubToCurrentFunction(void *StubAddr) { - assert(CurFn && "Stub added to current function, but current function is 0!"); - - SmallVectorImpl<void*> &StubsUsed = CurFnStubUses[CurFn]; - StubsUsed.push_back(StubAddr); - - SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[StubAddr]; - FnRefs.insert(CurFn); -} - void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { if (!DL.isUnknown()) { DILocation CurDLT = EmissionDetails.MF->getDILocation(DL); @@ -922,11 +894,14 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) { } /// addSizeOfGlobalsInConstantVal - find any globals that we haven't seen yet -/// but are referenced from the constant; put them in GVSet and add their -/// size into the running total Size. - -unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, - unsigned Size) { +/// but are referenced from the constant; put them in SeenGlobals and the +/// Worklist, and add their size into the running total Size. + +unsigned JITEmitter::addSizeOfGlobalsInConstantVal( + const Constant *C, + unsigned Size, + SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, + SmallVectorImpl<const GlobalVariable*> &Worklist) { // If its undefined, return the garbage. if (isa<UndefValue>(C)) return Size; @@ -948,7 +923,7 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: { - Size = addSizeOfGlobalsInConstantVal(Op0, Size); + Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist); break; } case Instruction::Add: @@ -964,8 +939,9 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, case Instruction::And: case Instruction::Or: case Instruction::Xor: { - Size = addSizeOfGlobalsInConstantVal(Op0, Size); - Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size); + Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist); + Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size, + SeenGlobals, Worklist); break; } default: { @@ -979,8 +955,10 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, if (C->getType()->getTypeID() == Type::PointerTyID) if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C)) - if (GVSet.insert(GV)) + if (SeenGlobals.insert(GV)) { + Worklist.push_back(GV); Size = addSizeOfGlobal(GV, Size); + } return Size; } @@ -988,15 +966,18 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, /// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet /// but are referenced from the given initializer. -unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init, - unsigned Size) { +unsigned JITEmitter::addSizeOfGlobalsInInitializer( + const Constant *Init, + unsigned Size, + SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, + SmallVectorImpl<const GlobalVariable*> &Worklist) { if (!isa<UndefValue>(Init) && !isa<ConstantVector>(Init) && !isa<ConstantAggregateZero>(Init) && !isa<ConstantArray>(Init) && !isa<ConstantStruct>(Init) && Init->getType()->isFirstClassType()) - Size = addSizeOfGlobalsInConstantVal(Init, Size); + Size = addSizeOfGlobalsInConstantVal(Init, Size, SeenGlobals, Worklist); return Size; } @@ -1007,7 +988,7 @@ unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init, unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { unsigned Size = 0; - GVSet.clear(); + SmallPtrSet<const GlobalVariable*, 8> SeenGlobals; for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { @@ -1031,7 +1012,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { // assuming the addresses of the new globals in this module // start at 0 (or something) and adjusting them after codegen // complete. Another possibility is to grab a marker bit in GV. - if (GVSet.insert(GV)) + if (SeenGlobals.insert(GV)) // A variable as yet unseen. Add in its size. Size = addSizeOfGlobal(GV, Size); } @@ -1040,12 +1021,14 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { } DEBUG(dbgs() << "JIT: About to look through initializers\n"); // Look for more globals that are referenced only from initializers. - // GVSet.end is computed each time because the set can grow as we go. - for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin(); - I != GVSet.end(); I++) { - const GlobalVariable* GV = *I; + SmallVector<const GlobalVariable*, 8> Worklist( + SeenGlobals.begin(), SeenGlobals.end()); + while (!Worklist.empty()) { + const GlobalVariable* GV = Worklist.back(); + Worklist.pop_back(); if (GV->hasInitializer()) - Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size); + Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size, + SeenGlobals, Worklist); } return Size; @@ -1347,40 +1330,6 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { if (JITEmitDebugInfo) { DR->UnregisterFunction(F); } - - // If the function did not reference any stubs, return. - if (CurFnStubUses.find(F) == CurFnStubUses.end()) - return; - - // For each referenced stub, erase the reference to this function, and then - // erase the list of referenced stubs. - SmallVectorImpl<void *> &StubList = CurFnStubUses[F]; - for (unsigned i = 0, e = StubList.size(); i != e; ++i) { - void *Stub = StubList[i]; - - // If we already invalidated this stub for this function, continue. - if (StubFnRefs.count(Stub) == 0) - continue; - - SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[Stub]; - FnRefs.erase(F); - - // If this function was the last reference to the stub, invalidate the stub - // in the JITResolver. Were there a memory manager deallocateStub routine, - // we could call that at this point too. - if (FnRefs.empty()) { - DEBUG(dbgs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n"); - StubFnRefs.erase(Stub); - - // Invalidate the stub. If it is a GV stub, update the JIT's global - // mapping for that GV to zero. - GlobalValue *GV = Resolver.invalidateStub(Stub); - if (GV) { - TheJIT->updateGlobalMapping(GV, 0); - } - } - } - CurFnStubUses.erase(F); } diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 25c3fbd..071c924 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -368,6 +368,7 @@ void format_object_base::home() { /// if no error occurred. raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, unsigned Flags) : pos(0) { + assert(Filename != 0 && "Filename is null"); // Verify that we don't have both "append" and "excl". assert((!(Flags & F_Excl) || !(Flags & F_Append)) && "Cannot specify both 'excl' and 'append' file creation flags!"); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 8044966..577c363 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -1350,7 +1350,9 @@ emitPrologue(MachineFunction &MF) const { unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + if (STI.isTargetDarwin() || hasFP(MF)) + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 1c77f27..786dd65 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -734,7 +734,7 @@ def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, // multiply register let isCommutable = 1 in def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32, - "mul", "\t$dst, $rhs", + "mul", "\t$dst, $rhs, $dst", /* A8.6.105 MUL Encoding T1 */ [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>, T1DataProcessing<0b1101>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 316567d..6241766 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -131,7 +131,7 @@ def t2addrmode_imm12 : Operand<i32>, let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } -// t2addrmode_imm8 := reg - imm8 +// t2addrmode_imm8 := reg +/- imm8 def t2addrmode_imm8 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let PrintMethod = "printT2AddrModeImm8Operand"; @@ -657,6 +657,32 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { } } +// SXTB16 and UXTB16 do not need the .w qualifier. +multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { + def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + opc, "\t$dst, $src", + [(set GPR:$dst, (opnode GPR:$src))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = 0b00; // rotate + } + def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + opc, "\t$dst, $src, ror $rot", + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = {?,?}; // rotate + } +} + // DO variant - disassembly only, no pattern multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> { @@ -983,6 +1009,28 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), []>; } +// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are +// for disassembly only. +// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 +class T2IldT<bit signed, bits<2> type, string opc> + : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, + "\t$dst, $addr", []> { + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 0; + let Inst{22-21} = type; + let Inst{20} = 1; // load + let Inst{11} = 1; + let Inst{10-8} = 0b110; // PUW. +} + +def t2LDRT : T2IldT<0, 0b10, "ldrt">; +def t2LDRBT : T2IldT<0, 0b00, "ldrbt">; +def t2LDRHT : T2IldT<0, 0b01, "ldrht">; +def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">; +def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">; + // Store defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>; defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; @@ -1037,9 +1085,98 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; +// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly +// only. +// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 +class T2IstT<bits<2> type, string opc> + : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc, + "\t$src, $addr", []> { + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = 0; // not signed + let Inst{23} = 0; + let Inst{22-21} = type; + let Inst{20} = 0; // store + let Inst{11} = 1; + let Inst{10-8} = 0b110; // PUW +} + +def t2STRT : T2IstT<0b10, "strt">; +def t2STRBT : T2IstT<0b00, "strbt">; +def t2STRHT : T2IstT<0b01, "strht">; // FIXME: ldrd / strd pre / post variants +// T2Ipl (Preload Data/Instruction) signals the memory system of possible future +// data/instruction access. These are for disassembly only. +multiclass T2Ipl<bit instr, bit write, string opc> { + + def i12 : T2I<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoadi, opc, + "\t$addr", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 1; // U = 1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + } + + def i8 : T2I<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, + "\t$addr", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // U = 0 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-8} = 0b1100; + } + + // A8.6.118 #0 and #-0 differs. Translates -0 to -1, -1 to -2, ..., etc. + def pci : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc, + "\t[pc, ${imm:negzero}]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = ?; // add = (U == 1) + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{19-16} = 0b1111; // Rn = 0b1111 + let Inst{15-12} = 0b1111; + } + + def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc, + "\t[$base, $a]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // add = TRUE for T1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-6} = 0000000; + let Inst{5-4} = 0b00; // no shift is applied + } + + def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc, + "\t[$base, $a, lsl $shamt]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // add = TRUE for T1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-6} = 0000000; + } +} + +defm t2PLD : T2Ipl<0, 0, "pld">; +defm t2PLDW : T2Ipl<0, 1, "pldw">; +defm t2PLI : T2Ipl<1, 0, "pli">; + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -1149,7 +1286,7 @@ defm t2UXTB : T2I_unary_rrot<0b101, "uxtb", UnOpFrag<(and node:$Src, 0x000000FF)>>; defm t2UXTH : T2I_unary_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_unary_rrot<0b011, "uxtb16", +defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 5b4f02d..19f1e3b 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -761,6 +761,11 @@ static bool isMemoryOp(const MachineInstr *MI) { MI->getOperand(0).isUndef()) return false; + // Likewise don't mess with references to undefined addresses. + if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && + MI->getOperand(1).isUndef()) + return false; + int Opcode = MI->getOpcode(); switch (Opcode) { default: break; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index b61ce29..163d1e9 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -778,9 +778,19 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { } static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { - return (MI->getOpcode() == ARM::tRestore && - MI->getOperand(1).isFI() && - isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); + if (MI->getOpcode() == ARM::tRestore && + MI->getOperand(1).isFI() && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) + return true; + else if (MI->getOpcode() == ARM::tPOP) { + // The first three operands are predicates and such. The last two are + // imp-def and imp-use of SP. Check everything in between. + for (int i = 3, e = MI->getNumOperands() - 2; i != e; ++i) + if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) + return false; + return true; + } + return false; } void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, @@ -794,13 +804,13 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); + const unsigned *CSRegs = getCalleeSavedRegs(); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. - const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; @@ -836,6 +846,9 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, } if (VARegSaveSize) { + // Move back past the callee-saved register restoration + while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) + ++MBBI; // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(0) // No write back. @@ -845,6 +858,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); + // erase the old tBX_RET instruction MBB.erase(MBBI); } } diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td index 38ada69..bde8819 100644 --- a/lib/Target/Alpha/AlphaCallingConv.td +++ b/lib/Target/Alpha/AlphaCallingConv.td @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// def RetCC_Alpha : CallingConv<[ // i64 is returned in register R0 - CCIfType<[i64], CCAssignToReg<[R0]>>, + // R1 is an llvm extension, I don't know what gcc does + CCIfType<[i64], CCAssignToReg<[R0,R1]>>, // f32 / f64 are returned in F0/F1 CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>> diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp index 5ef3c6b..3e17a51 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp @@ -34,5 +34,8 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) { // Exception handling is not supported on CellSPU (think about it: you only // have 256K for code+data. Would you support exception handling?) ExceptionsType = ExceptionHandling::None; + + // SPU assembly requires ".section" before ".bss" + UsesELFSectionDirectiveForBSS = true; } diff --git a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp index 46cc819..f1bdb12 100644 --- a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp +++ b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp @@ -15,7 +15,8 @@ using namespace llvm; Target llvm::ThePIC16Target, llvm::TheCooperTarget; extern "C" void LLVMInitializePIC16TargetInfo() { - RegisterTarget<> X(ThePIC16Target, "pic16", "PIC16 14-bit [experimental]"); + RegisterTarget<Triple::pic16> X(ThePIC16Target, "pic16", + "PIC16 14-bit [experimental]"); RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]"); } diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp index d6b45be..f6753a6 100644 --- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp @@ -204,9 +204,10 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, /// isBlockOnlyReachableByFallthough - Return true if the basic block has /// exactly one predecessor and the control transfer mechanism between /// the predecessor and this block is a fall-through. -/// Override AsmPrinter implementation to handle delay slots -bool SparcAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) - const { +/// +/// This overrides AsmPrinter's implementation to handle delay slots. +bool SparcAsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // If this is a landing pad, it isn't a fall through. If it has no preds, // then nothing falls through to it. if (MBB->isLandingPad() || MBB->pred_empty()) @@ -224,10 +225,10 @@ bool SparcAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock if (!Pred->isLayoutSuccessor(MBB)) return false; - // Check if the last terminator is an unconditional branch + // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); - while( I != Pred->begin() && !(--I)->getDesc().isTerminator() ) - ; /* Noop */ + while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) + ; // Noop return I == Pred->end() || !I->getDesc().isBarrier(); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 17366ee..98e3f4e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -388,6 +388,8 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { } case Instruction::GetElementPtr: { + X86AddressMode SavedAM = AM; + // Pattern-match simple GEPs. uint64_t Disp = (int32_t)AM.Disp; unsigned IndexReg = AM.IndexReg; @@ -428,7 +430,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { AM.IndexReg = IndexReg; AM.Scale = Scale; AM.Disp = (uint32_t)Disp; - return X86SelectAddress(U->getOperand(0), AM); + if (X86SelectAddress(U->getOperand(0), AM)) + return true; + + // If we couldn't merge the sub value into this addr mode, revert back to + // our address and just match the value instead of completely failing. + AM = SavedAM; + break; unsupported_gep: // Ok, the GEP indices weren't all covered. break; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 08030e0..3fad8ad 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -413,6 +413,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { } void X86DAGToDAGISel::PreprocessISelDAG() { + // OptForSize is used in pattern predicates that isel is matching. OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e2b8193..8384ab7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -990,7 +990,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::SELECT); - setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); @@ -2236,7 +2235,8 @@ static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, const X86InstrInfo *TII) { - int FI; + unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; + int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) @@ -2252,25 +2252,30 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) && Def->getOperand(1).isFI()) { FI = Def->getOperand(1).getIndex(); - if (MFI->getObjectSize(FI) != Flags.getByValSize()) - return false; + Bytes = Flags.getByValSize(); } else return false; } - } else { - LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg); - if (!Ld) + } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { + if (Flags.isByVal()) + // ByVal argument is passed in as a pointer but it's now being + // dereferenced. e.g. + // define @foo(%struct.X* %A) { + // tail call @bar(%struct.X* byval %A) + // } return false; SDValue Ptr = Ld->getBasePtr(); FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); if (!FINode) return false; FI = FINode->getIndex(); - } + } else + return false; + assert(FI != INT_MAX); if (!MFI->isFixedObjectIndex(FI)) return false; - return Offset == MFI->getObjectOffset(FI); + return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); } /// IsEligibleForTailCallOptimization - Check whether the call is eligible @@ -9174,58 +9179,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// PerformANDCombine - Look for SSE and instructions of this form: -/// (and x, (build_vector signbit,signbit,signbit,signbit)). If there -/// exists a use of a build_vector that's the bitwise complement of the mask, -/// then transform the node to -/// (and (xor x, (build_vector -1,-1,-1,-1)), (build_vector ~sb,~sb,~sb,~sb)). -static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { - EVT VT = N->getValueType(0); - if (!VT.isVector() || !VT.isInteger()) - return SDValue(); - - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - if (N0.getOpcode() == ISD::XOR || !N1.hasOneUse()) - return SDValue(); - - if (N1.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = VT.getVectorNumElements(); - EVT EltVT = VT.getVectorElementType(); - SmallVector<SDValue, 8> Mask; - Mask.reserve(NumElts); - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Arg = N1.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - Mask.push_back(Arg); - continue; - } - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Arg); - if (!C) - return SDValue(); - if (!C->getAPIntValue().isSignBit() && - !C->getAPIntValue().isMaxSignedValue()) - return SDValue(); - Mask.push_back(DAG.getConstant(~C->getAPIntValue(), EltVT)); - } - N1 = DAG.getNode(ISD::BUILD_VECTOR, N1.getDebugLoc(), VT, - &Mask[0], NumElts); - if (!N1.use_empty()) { - unsigned Bits = EltVT.getSizeInBits(); - Mask.clear(); - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(DAG.getConstant(APInt::getAllOnesValue(Bits), EltVT)); - SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - VT, &Mask[0], NumElts); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, - N0, NewMask), N1); - } - } - - return SDValue(); -} /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. @@ -9755,7 +9708,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); - case ISD::AND: return PerformANDCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: @@ -9838,11 +9790,20 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { // rorw $$8, ${0:w} --> llvm.bswap.i16 if (CI->getType()->isIntegerTy(16) && AsmPieces.size() == 3 && - AsmPieces[0] == "rorw" && + (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") && AsmPieces[1] == "$$8," && AsmPieces[2] == "${0:w}" && - IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") { - return LowerToBSwap(CI); + IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { + AsmPieces.clear(); + SplitString(IA->getConstraintString().substr(5), AsmPieces, ","); + std::sort(AsmPieces.begin(), AsmPieces.end()); + if (AsmPieces.size() == 4 && + AsmPieces[0] == "~{cc}" && + AsmPieces[1] == "~{dirflag}" && + AsmPieces[2] == "~{flags}" && + AsmPieces[3] == "~{fpsr}") { + return LowerToBSwap(CI); + } } break; case 3: diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index cfe71a5..d46b946 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1050,7 +1050,10 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src), // // Extra precision multiplication -let Defs = [AL,AH,EFLAGS], Uses = [AL] in + +// AL is really implied by AX, by the registers in Defs must match the +// SDNode results (i8, i32). +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the @@ -1068,7 +1071,7 @@ def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), "mul{l}\t$src", []>; // EAX,EDX = EAX*GR32 -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. @@ -1090,7 +1093,7 @@ def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), } let neverHasSideEffects = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX] in @@ -1100,7 +1103,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; // EAX,EDX = EAX*GR32 let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), "imul{b}\t$src", []>; // AL,AH = AL*[mem8] let Defs = [AX,DX,EFLAGS], Uses = [AX] in @@ -1113,7 +1116,7 @@ def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), } // neverHasSideEffects // unsigned division/remainder -let Defs = [AX,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1123,7 +1126,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "div{l}\t$src", []>; let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1136,7 +1139,7 @@ def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), } // Signed division/remainder. -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1146,7 +1149,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "idiv{l}\t$src", []>; let mayLoad = 1, mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 09accb6..07fb15e 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -117,11 +117,11 @@ public: Instruction *visitUDiv(BinaryOperator &I); Instruction *visitSDiv(BinaryOperator &I); Instruction *visitFDiv(BinaryOperator &I); - Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); - Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); + Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *visitAnd(BinaryOperator &I); - Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); - Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); + Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C); Instruction *visitOr (BinaryOperator &I); @@ -327,8 +327,8 @@ private: Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, bool isSub, Instruction &I); - Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, - bool isSigned, bool Inside, Instruction &IB); + Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, + bool isSigned, bool Inside); Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); Instruction *MatchBSwap(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 86673f8..3fb3de7 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -137,80 +137,44 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { /// opcode and two operands into either a constant true or false, or a brand /// new ICmp instruction. The sign is passed in to determine which kind /// of predicate to use in the new icmp instruction. -static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS) { +static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, + InstCombiner::BuilderTy *Builder) { + CmpInst::Predicate Pred; switch (Code) { default: assert(0 && "Illegal ICmp code!"); - case 0: - return ConstantInt::getFalse(LHS->getContext()); - case 1: - if (Sign) - return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); - return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS); - case 2: - return new ICmpInst(ICmpInst::ICMP_EQ, LHS, RHS); - case 3: - if (Sign) - return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS); - return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS); - case 4: - if (Sign) - return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS); - return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS); - case 5: - return new ICmpInst(ICmpInst::ICMP_NE, LHS, RHS); - case 6: - if (Sign) - return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); - return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); - case 7: - return ConstantInt::getTrue(LHS->getContext()); + case 0: // False. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; + case 2: Pred = ICmpInst::ICMP_EQ; break; + case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; + case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; + case 5: Pred = ICmpInst::ICMP_NE; break; + case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; + case 7: // True. + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); } + return Builder->CreateICmp(Pred, LHS, RHS); } /// getFCmpValue - This is the complement of getFCmpCode, which turns an /// opcode and two operands into either a FCmp instruction. isordered is passed /// in to determine which kind of predicate to use in the new fcmp instruction. static Value *getFCmpValue(bool isordered, unsigned code, - Value *LHS, Value *RHS) { + Value *LHS, Value *RHS, + InstCombiner::BuilderTy *Builder) { + CmpInst::Predicate Pred; switch (code) { - default: llvm_unreachable("Illegal FCmp code!"); - case 0: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS); - case 1: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS); - case 2: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS); - case 3: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS); - case 4: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS); - case 5: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS); - case 6: - if (isordered) - return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); - else - return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); - case 7: return ConstantInt::getTrue(LHS->getContext()); + default: assert(0 && "Illegal FCmp code!"); + case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break; + case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break; + case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break; + case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break; + case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break; + case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break; + case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break; + case 7: return ConstantInt::getTrue(LHS->getContext()); } + return Builder->CreateFCmp(Pred, LHS, RHS); } /// PredicatesFoldable - Return true if both predicates match sign or if at @@ -355,40 +319,39 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, /// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates /// whether to treat the V, Lo and HI as signed or not. IB is the location to /// insert new instructions. -Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, - bool isSigned, bool Inside, - Instruction &IB) { +Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, + bool isSigned, bool Inside) { assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && "Lo is not <= Hi in range emission code!"); if (Inside) { if (Lo == Hi) // Trivially false. - return new ICmpInst(ICmpInst::ICMP_NE, V, V); + return ConstantInt::getFalse(V->getContext()); // V >= Min && V < Hi --> V < Hi if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { ICmpInst::Predicate pred = (isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); - return new ICmpInst(pred, V, Hi); + return Builder->CreateICmp(pred, V, Hi); } // Emit V-Lo <u Hi-Lo Constant *NegLo = ConstantExpr::getNeg(Lo); Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); - return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); + return Builder->CreateICmpULT(Add, UpperBound); } if (Lo == Hi) // Trivially true. - return new ICmpInst(ICmpInst::ICMP_EQ, V, V); + return ConstantInt::getTrue(V->getContext()); // V < Min || V >= Hi -> V > Hi-1 Hi = SubOne(cast<ConstantInt>(Hi)); if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { ICmpInst::Predicate pred = (isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); - return new ICmpInst(pred, V, Hi); + return Builder->CreateICmp(pred, V, Hi); } // Emit V-Lo >u Hi-1-Lo @@ -396,7 +359,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); - return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); + return Builder->CreateICmpUGT(Add, LowerBound); } // isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with @@ -472,8 +435,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, } /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. -Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, - ICmpInst *LHS, ICmpInst *RHS) { +Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) @@ -486,11 +448,7 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); unsigned Code = getICmpCode(LHS) & getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); - Value *RV = getICmpValue(isSigned, Code, Op0, Op1); - if (Instruction *I = dyn_cast<Instruction>(RV)) - return I; - // Otherwise, it's a constant boolean value. - return ReplaceInstUsesWith(I, RV); + return getICmpValue(isSigned, Code, Op0, Op1, Builder); } } @@ -506,13 +464,13 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, if (LHSCC == ICmpInst::ICMP_ULT && LHSCst->getValue().isPowerOf2()) { Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } } @@ -562,33 +520,32 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; } case ICmpInst::ICMP_NE: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_ULT: if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 - return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); + return Builder->CreateICmpULT(Val, LHSCst); break; // (X != 13 & X u< 15) -> no change case ICmpInst::ICMP_SLT: if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 - return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); + return Builder->CreateICmpSLT(Val, LHSCst); break; // (X != 13 & X s< 15) -> no change case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; case ICmpInst::ICMP_NE: if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 Constant *AddCST = ConstantExpr::getNeg(LHSCst); Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); - return new ICmpInst(ICmpInst::ICMP_UGT, Add, - ConstantInt::get(Add->getType(), 1)); + return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1)); } break; // (X != 13 & X != 15) -> no change } @@ -598,12 +555,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change break; case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change break; } @@ -613,12 +570,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change break; } @@ -628,16 +585,15 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change break; case ICmpInst::ICMP_NE: if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 - return new ICmpInst(LHSCC, Val, RHSCst); + return Builder->CreateICmp(LHSCC, Val, RHSCst); break; // (X u> 13 & X != 15) -> no change case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1 - return InsertRangeTest(Val, AddOne(LHSCst), - RHSCst, false, true, I); + return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true); case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change break; } @@ -647,16 +603,15 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 - return new ICmpInst(LHSCC, Val, RHSCst); + return Builder->CreateICmp(LHSCC, Val, RHSCst); break; // (X s> 13 & X != 15) -> no change case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 - return InsertRangeTest(Val, AddOne(LHSCst), - RHSCst, true, true, I); + return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true); case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change break; } @@ -666,9 +621,10 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, return 0; } -Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, - FCmpInst *RHS) { - +/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of +/// instcombine, this returns a Value which should already be inserted into the +/// function. +Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_ORD && RHS->getPredicate() == FCmpInst::FCMP_ORD) { // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) @@ -677,17 +633,15 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, // If either of the constants are nans, then the whole thing returns // false. if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); - return new FCmpInst(FCmpInst::FCMP_ORD, - LHS->getOperand(0), RHS->getOperand(0)); + return ConstantInt::getFalse(LHS->getContext()); + return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); } // Handle vector zeros. This occurs because the canonical form of // "fcmp ord x,x" is "fcmp ord x, 0". if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && isa<ConstantAggregateZero>(RHS->getOperand(1))) - return new FCmpInst(FCmpInst::FCMP_ORD, - LHS->getOperand(0), RHS->getOperand(0)); + return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); return 0; } @@ -705,14 +659,13 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); - + return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); if (Op0CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, RHS); + return RHS; if (Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, LHS); + return LHS; bool Op0Ordered; bool Op1Ordered; @@ -727,14 +680,14 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, // uno && ueq -> uno && (uno || eq) -> ueq // ord && olt -> ord && (ord && lt) -> olt if (Op0Ordered == Op1Ordered) - return ReplaceInstUsesWith(I, RHS); + return RHS; // uno && oeq -> uno && (ord && eq) -> false // uno && ord -> false if (!Op0Ordered) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); // ord && ueq -> ord && (uno || eq) -> oeq - return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS)); + return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder); } } @@ -930,14 +883,14 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) - if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) - return Res; + if (Value *Res = FoldAndOfICmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); // If and'ing two fcmp, try combine them into one. if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) - return Res; + if (Value *Res = FoldAndOfFCmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); // fold (and (cast A), (cast B)) -> (cast (and A, B)) @@ -960,19 +913,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // cast is otherwise not optimizable. This happens for vector sexts. if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) - if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) { - InsertNewInstBefore(Res, I); + if (Value *Res = FoldAndOfICmps(LHS, RHS)) return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); - } // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the // cast is otherwise not optimizable. This happens for vector sexts. if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) - if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) { - InsertNewInstBefore(Res, I); + if (Value *Res = FoldAndOfFCmps(LHS, RHS)) return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); - } } } @@ -1179,8 +1128,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B, } /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. -Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, - ICmpInst *LHS, ICmpInst *RHS) { +Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) @@ -1193,11 +1141,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); unsigned Code = getICmpCode(LHS) | getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); - Value *RV = getICmpValue(isSigned, Code, Op0, Op1); - if (Instruction *I = dyn_cast<Instruction>(RV)) - return I; - // Otherwise, it's a constant boolean value. - return ReplaceInstUsesWith(I, RV); + return getICmpValue(isSigned, Code, Op0, Op1, Builder); } } @@ -1211,7 +1155,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, if (LHSCst == RHSCst && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } // From here on, we only handle: @@ -1263,7 +1207,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, Constant *AddCST = ConstantExpr::getNeg(LHSCst); Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); - return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); + return Builder->CreateICmpULT(Add, AddCST); } break; // (X == 13 | X == 15) -> no change case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change @@ -1272,7 +1216,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; } break; case ICmpInst::ICMP_NE: @@ -1281,11 +1225,11 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ConstantInt::getTrue(LHS->getContext()); } break; case ICmpInst::ICMP_ULT: @@ -1297,14 +1241,13 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, // If RHSCst is [us]MAXINT, it is always false. Not handling // this can cause overflow. if (RHSCst->isMaxValue(false)) - return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), - false, false, I); + return LHS; + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false); case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change break; case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change break; } @@ -1318,14 +1261,13 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, // If RHSCst is [us]MAXINT, it is always false. Not handling // this can cause overflow. if (RHSCst->isMaxValue(true)) - return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), - true, false, I); + return LHS; + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false); case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 - return ReplaceInstUsesWith(I, RHS); + return RHS; case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change break; } @@ -1335,12 +1277,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change break; case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ConstantInt::getTrue(LHS->getContext()); case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change break; } @@ -1350,12 +1292,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 - return ReplaceInstUsesWith(I, LHS); + return LHS; case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ConstantInt::getTrue(LHS->getContext()); case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change break; } @@ -1364,8 +1306,10 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, return 0; } -Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, - FCmpInst *RHS) { +/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of +/// instcombine, this returns a Value which should already be inserted into the +/// function. +Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_UNO && RHS->getPredicate() == FCmpInst::FCMP_UNO && LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { @@ -1374,20 +1318,18 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, // If either of the constants are nans, then the whole thing returns // true. if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ConstantInt::getTrue(LHS->getContext()); // Otherwise, no need to compare the two constants, compare the // rest. - return new FCmpInst(FCmpInst::FCMP_UNO, - LHS->getOperand(0), RHS->getOperand(0)); + return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); } // Handle vector zeros. This occurs because the canonical form of // "fcmp uno x,x" is "fcmp uno x, 0". if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && isa<ConstantAggregateZero>(RHS->getOperand(1))) - return new FCmpInst(FCmpInst::FCMP_UNO, - LHS->getOperand(0), RHS->getOperand(0)); + return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); return 0; } @@ -1404,14 +1346,13 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, - Op0LHS, Op0RHS); + return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); if (Op0CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, RHS); + return RHS; if (Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, LHS); + return LHS; bool Op0Ordered; bool Op1Ordered; unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); @@ -1419,11 +1360,7 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, if (Op0Ordered == Op1Ordered) { // If both are ordered or unordered, return a new fcmp with // or'ed predicates. - Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS); - if (Instruction *I = dyn_cast<Instruction>(RV)) - return I; - // Otherwise, it's a constant boolean value... - return ReplaceInstUsesWith(I, RV); + return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder); } } return 0; @@ -1686,14 +1623,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) - if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) - return Res; + if (Value *Res = FoldOrOfICmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) - return Res; + if (Value *Res = FoldOrOfFCmps(LHS, RHS)) + return ReplaceInstUsesWith(I, Res); // fold (or (cast A), (cast B)) -> (cast (or A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { @@ -1717,19 +1654,15 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // cast is otherwise not optimizable. This happens for vector sexts. if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) - if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) { - InsertNewInstBefore(Res, I); + if (Value *Res = FoldOrOfICmps(LHS, RHS)) return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); - } // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the // cast is otherwise not optimizable. This happens for vector sexts. if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) - if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) { - InsertNewInstBefore(Res, I); + if (Value *Res = FoldOrOfFCmps(LHS, RHS)) return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); - } } } } @@ -2005,11 +1938,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); - Value *RV = getICmpValue(isSigned, Code, Op0, Op1); - if (Instruction *I = dyn_cast<Instruction>(RV)) - return I; - // Otherwise, it's a constant boolean value. - return ReplaceInstUsesWith(I, RV); + return ReplaceInstUsesWith(I, + getICmpValue(isSigned, Code, Op0, Op1, Builder)); } } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 835d149..a241f169 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -304,29 +304,39 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { - const Type *ReturnTy = CI.getType(); - Value *Op1 = II->getOperand(1); - bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1); - // We need target data for just about everything so depend on it. if (!TD) break; + const Type *ReturnTy = CI.getType(); + bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1); + // Get to the real allocated thing and offset as fast as possible. - Op1 = Op1->stripPointerCasts(); + Value *Op1 = II->getOperand(1)->stripPointerCasts(); // If we've stripped down to a single global variable that we // can know the size of then just return that. if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) { if (GV->hasDefinitiveInitializer()) { Constant *C = GV->getInitializer(); - uint64_t globalSize = TD->getTypeAllocSize(C->getType()); - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, globalSize)); + uint64_t GlobalSize = TD->getTypeAllocSize(C->getType()); + return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize)); } else { + // Can't determine size of the GV. Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); return ReplaceInstUsesWith(CI, RetVal); } - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) { - + } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { + // Get alloca size. + if (AI->getAllocatedType()->isSized()) { + uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); + if (AI->isArrayAllocation()) { + const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize()); + if (!C) break; + AllocaSize *= C->getZExtValue(); + } + return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize)); + } + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) { // Only handle constant GEPs here. if (CE->getOpcode() != Instruction::GetElementPtr) break; GEPOperator *GEP = cast<GEPOperator>(CE); @@ -361,6 +371,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return ReplaceInstUsesWith(CI, RetVal); } + + // Do not return "I don't know" here. Later optimization passes could + // make it possible to evaluate objectsize to a constant. + break; } case Intrinsic::bswap: // bswap(bswap(x)) -> x diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 518af74..72fd558 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -877,25 +877,26 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, case ICmpInst::ICMP_EQ: if (LoOverflow && HiOverflow) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext())); - else if (HiOverflow) + if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, LoBound); - else if (LoOverflow) + if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); + return ReplaceInstUsesWith(ICI, + InsertRangeTest(X, LoBound, HiBound, DivIsSigned, + true)); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext())); - else if (HiOverflow) + if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, LoBound); - else if (LoOverflow) + if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); + return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, + DivIsSigned, false)); case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: if (LoOverflow == +1) // Low bound is greater than input range. diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index fba8354..65f0393 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -266,6 +266,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // and if TD isn't around, we can't handle the mixed case. bool isVolatile = FirstLI->isVolatile(); unsigned LoadAlignment = FirstLI->getAlignment(); + unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); // We can't sink the load if the loaded value could be modified between the // load and the PHI. @@ -290,6 +291,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // the load and the PHI. if (LI->isVolatile() != isVolatile || LI->getParent() != PN.getIncomingBlock(i) || + LI->getPointerAddressSpace() != LoadAddrSpace || !isSafeAndProfitableToSinkLoad(LI)) return 0; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 12827b6..5aca9cdc 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -597,19 +597,35 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { /// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively /// add its operands as factors, otherwise add V to the list of factors. +/// +/// Ops is the top-level list of add operands we're trying to factor. static void FindSingleUseMultiplyFactors(Value *V, - SmallVectorImpl<Value*> &Factors) { + SmallVectorImpl<Value*> &Factors, + const SmallVectorImpl<ValueEntry> &Ops, + bool IsRoot) { BinaryOperator *BO; - if ((!V->hasOneUse() && !V->use_empty()) || + if (!(V->hasOneUse() || V->use_empty()) || // More than one use. !(BO = dyn_cast<BinaryOperator>(V)) || BO->getOpcode() != Instruction::Mul) { Factors.push_back(V); return; } + // If this value has a single use because it is another input to the add + // tree we're reassociating and we dropped its use, it actually has two + // uses and we can't factor it. + if (!IsRoot) { + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (Ops[i].Op == V) { + Factors.push_back(V); + return; + } + } + + // Otherwise, add the LHS and RHS to the list of factors. - FindSingleUseMultiplyFactors(BO->getOperand(1), Factors); - FindSingleUseMultiplyFactors(BO->getOperand(0), Factors); + FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false); + FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false); } /// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor' @@ -753,7 +769,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Compute all of the factors of this added value. SmallVector<Value*, 8> Factors; - FindSingleUseMultiplyFactors(BOp, Factors); + FindSingleUseMultiplyFactors(BOp, Factors, Ops, true); assert(Factors.size() > 1 && "Bad linearize!"); // Add one to FactorOccurrences for each unique factor in this op. diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index cde214b..86ddeac 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -17,6 +17,7 @@ #define DEBUG_TYPE "simplify-libcalls" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" @@ -67,496 +68,14 @@ public: Context = &CI->getCalledFunction()->getContext(); return CallOptimizer(CI->getCalledFunction(), CI, B); } - - /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. - Value *CastToCStr(Value *V, IRBuilder<> &B); - - /// EmitStrLen - Emit a call to the strlen function to the builder, for the - /// specified pointer. Ptr is required to be some pointer type, and the - /// return value has 'intptr_t' type. - Value *EmitStrLen(Value *Ptr, IRBuilder<> &B); - - /// EmitStrChr - Emit a call to the strchr function to the builder, for the - /// specified pointer and character. Ptr is required to be some pointer type, - /// and the return value has 'i8*' type. - Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B); - - /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the - /// specified pointer arguments. - Value *EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B); - - /// EmitMemCpy - Emit a call to the memcpy function to the builder. This - /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. - Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B); - - /// EmitMemMove - Emit a call to the memmove function to the builder. This - /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. - Value *EmitMemMove(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B); - - /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is - /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. - Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B); - - /// EmitMemCmp - Emit a call to the memcmp function. - Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B); - - /// EmitMemSet - Emit a call to the memset function - Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B); - - /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' - /// (e.g. 'floor'). This function is known to take a single of type matching - /// 'Op' and returns one value with the same type. If 'Op' is a long double, - /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f' - /// suffix. - Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B, - const AttrListPtr &Attrs); - - /// EmitPutChar - Emit a call to the putchar function. This assumes that Char - /// is an integer. - Value *EmitPutChar(Value *Char, IRBuilder<> &B); - - /// EmitPutS - Emit a call to the puts function. This assumes that Str is - /// some pointer. - void EmitPutS(Value *Str, IRBuilder<> &B); - - /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is - /// an i32, and File is a pointer to FILE. - void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B); - - /// EmitFPutS - Emit a call to the puts function. Str is required to be a - /// pointer and File is a pointer to FILE. - void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B); - - /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is - /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. - void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B); - }; } // End anonymous namespace. -/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. -Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) { - return B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr"); -} - -/// EmitStrLen - Emit a call to the strlen function to the builder, for the -/// specified pointer. This always returns an integer value of size intptr_t. -Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); - - Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), - TD->getIntPtrType(*Context), - Type::getInt8PtrTy(*Context), - NULL); - CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); - if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -/// EmitStrChr - Emit a call to the strchr function to the builder, for the -/// specified pointer and character. Ptr is required to be some pointer type, -/// and the return value has 'i8*' type. -Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI = - AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); - - const Type *I8Ptr = Type::getInt8PtrTy(*Context); - const Type *I32Ty = Type::getInt32Ty(*Context); - Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), - I8Ptr, I8Ptr, I32Ty, NULL); - CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), - ConstantInt::get(I32Ty, C), "strchr"); - if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; -} - -/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the -/// specified pointer arguments. -Value *LibCallOptimization::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - const Type *I8Ptr = Type::getInt8PtrTy(*Context); - Value *StrCpy = M->getOrInsertFunction("strcpy", AttrListPtr::get(AWI, 2), - I8Ptr, I8Ptr, I8Ptr, NULL); - CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), - "strcpy"); - if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; -} - -/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always -/// expects that the size has type 'intptr_t' and Dst/Src are pointers. -Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B) { - Module *M = Caller->getParent(); - const Type *Ty = Len->getType(); - Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1); - Dst = CastToCStr(Dst, B); - Src = CastToCStr(Src, B); - return B.CreateCall4(MemCpy, Dst, Src, Len, - ConstantInt::get(Type::getInt32Ty(*Context), Align)); -} - -/// EmitMemMove - Emit a call to the memmove function to the builder. This -/// always expects that the size has type 'intptr_t' and Dst/Src are pointers. -Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B) { - Module *M = Caller->getParent(); - const Type *Ty = TD->getIntPtrType(*Context); - Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1); - Dst = CastToCStr(Dst, B); - Src = CastToCStr(Src, B); - Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align); - return B.CreateCall4(MemMove, Dst, Src, Len, A); -} - -/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is -/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. -Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val, - Value *Len, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI; - AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); - - Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), - Type::getInt8PtrTy(*Context), - Type::getInt8PtrTy(*Context), - Type::getInt32Ty(*Context), - TD->getIntPtrType(*Context), - NULL); - CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); - - if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -/// EmitMemCmp - Emit a call to the memcmp function. -Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2, - Value *Len, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); - - Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), - Type::getInt32Ty(*Context), - Type::getInt8PtrTy(*Context), - Type::getInt8PtrTy(*Context), - TD->getIntPtrType(*Context), NULL); - CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), - Len, "memcmp"); - - if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -/// EmitMemSet - Emit a call to the memset function -Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val, - Value *Len, IRBuilder<> &B) { - Module *M = Caller->getParent(); - Intrinsic::ID IID = Intrinsic::memset; - const Type *Tys[1]; - Tys[0] = Len->getType(); - Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1); - Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1); - return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align); -} - -/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. -/// 'floor'). This function is known to take a single of type matching 'Op' and -/// returns one value with the same type. If 'Op' is a long double, 'l' is -/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. -Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, - IRBuilder<> &B, - const AttrListPtr &Attrs) { - char NameBuffer[20]; - if (!Op->getType()->isDoubleTy()) { - // If we need to add a suffix, copy into NameBuffer. - unsigned NameLen = strlen(Name); - assert(NameLen < sizeof(NameBuffer)-2); - memcpy(NameBuffer, Name, NameLen); - if (Op->getType()->isFloatTy()) - NameBuffer[NameLen] = 'f'; // floorf - else - NameBuffer[NameLen] = 'l'; // floorl - NameBuffer[NameLen+1] = 0; - Name = NameBuffer; - } - - Module *M = Caller->getParent(); - Value *Callee = M->getOrInsertFunction(Name, Op->getType(), - Op->getType(), NULL); - CallInst *CI = B.CreateCall(Callee, Op, Name); - CI->setAttributes(Attrs); - if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - -/// EmitPutChar - Emit a call to the putchar function. This assumes that Char -/// is an integer. -Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { - Module *M = Caller->getParent(); - Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context), - Type::getInt32Ty(*Context), NULL); - CallInst *CI = B.CreateCall(PutChar, - B.CreateIntCast(Char, - Type::getInt32Ty(*Context), - /*isSigned*/true, - "chari"), - "putchar"); - - if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; -} - -/// EmitPutS - Emit a call to the puts function. This assumes that Str is -/// some pointer. -void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - - Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), - Type::getInt32Ty(*Context), - Type::getInt8PtrTy(*Context), - NULL); - CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); - if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - -} - -/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is -/// an integer and File is a pointer to FILE. -void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - Constant *F; - if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), - Type::getInt32Ty(*Context), - Type::getInt32Ty(*Context), File->getType(), - NULL); - else - F = M->getOrInsertFunction("fputc", - Type::getInt32Ty(*Context), - Type::getInt32Ty(*Context), - File->getType(), NULL); - Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true, - "chari"); - CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); - - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); -} - -/// EmitFPutS - Emit a call to the puts function. Str is required to be a -/// pointer and File is a pointer to FILE. -void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - Constant *F; - if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), - Type::getInt32Ty(*Context), - Type::getInt8PtrTy(*Context), - File->getType(), NULL); - else - F = M->getOrInsertFunction("fputs", Type::getInt32Ty(*Context), - Type::getInt8PtrTy(*Context), - File->getType(), NULL); - CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); - - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); -} - -/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is -/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. -void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - Constant *F; - if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), - TD->getIntPtrType(*Context), - Type::getInt8PtrTy(*Context), - TD->getIntPtrType(*Context), - TD->getIntPtrType(*Context), - File->getType(), NULL); - else - F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context), - Type::getInt8PtrTy(*Context), - TD->getIntPtrType(*Context), - TD->getIntPtrType(*Context), - File->getType(), NULL); - CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, - ConstantInt::get(TD->getIntPtrType(*Context), 1), File); - - if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) - CI->setCallingConv(Fn->getCallingConv()); -} //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// -/// GetStringLengthH - If we can compute the length of the string pointed to by -/// the specified pointer, return 'len+1'. If we can't, return 0. -static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { - // Look through noop bitcast instructions. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) - return GetStringLengthH(BCI->getOperand(0), PHIs); - - // If this is a PHI node, there are two cases: either we have already seen it - // or we haven't. - if (PHINode *PN = dyn_cast<PHINode>(V)) { - if (!PHIs.insert(PN)) - return ~0ULL; // already in the set. - - // If it was new, see if all the input strings are the same length. - uint64_t LenSoFar = ~0ULL; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); - if (Len == 0) return 0; // Unknown length -> unknown. - - if (Len == ~0ULL) continue; - - if (Len != LenSoFar && LenSoFar != ~0ULL) - return 0; // Disagree -> unknown. - LenSoFar = Len; - } - - // Success, all agree. - return LenSoFar; - } - - // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) - if (SelectInst *SI = dyn_cast<SelectInst>(V)) { - uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); - if (Len1 == 0) return 0; - uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); - if (Len2 == 0) return 0; - if (Len1 == ~0ULL) return Len2; - if (Len2 == ~0ULL) return Len1; - if (Len1 != Len2) return 0; - return Len1; - } - - // If the value is not a GEP instruction nor a constant expression with a - // GEP instruction, then return unknown. - User *GEP = 0; - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { - GEP = GEPI; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (CE->getOpcode() != Instruction::GetElementPtr) - return 0; - GEP = CE; - } else { - return 0; - } - - // Make sure the GEP has exactly three arguments. - if (GEP->getNumOperands() != 3) - return 0; - - // Check to make sure that the first operand of the GEP is an integer and - // has value 0 so that we are sure we're indexing into the initializer. - if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) { - if (!Idx->isZero()) - return 0; - } else - return 0; - - // If the second index isn't a ConstantInt, then this is a variable index - // into the array. If this occurs, we can't say anything meaningful about - // the string. - uint64_t StartIdx = 0; - if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) - StartIdx = CI->getZExtValue(); - else - return 0; - - // The GEP instruction, constant or instruction, must reference a global - // variable that is a constant and is initialized. The referenced constant - // initializer is the array that we'll use for optimization. - GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); - if (!GV || !GV->isConstant() || !GV->hasInitializer() || - GV->mayBeOverridden()) - return 0; - Constant *GlobalInit = GV->getInitializer(); - - // Handle the ConstantAggregateZero case, which is a degenerate case. The - // initializer is constant zero so the length of the string must be zero. - if (isa<ConstantAggregateZero>(GlobalInit)) - return 1; // Len = 0 offset by 1. - - // Must be a Constant Array - ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); - if (!Array || !Array->getType()->getElementType()->isIntegerTy(8)) - return false; - - // Get the number of elements in the array - uint64_t NumElts = Array->getType()->getNumElements(); - - // Traverse the constant array from StartIdx (derived above) which is - // the place the GEP refers to in the array. - for (unsigned i = StartIdx; i != NumElts; ++i) { - Constant *Elt = Array->getOperand(i); - ConstantInt *CI = dyn_cast<ConstantInt>(Elt); - if (!CI) // This array isn't suitable, non-int initializer. - return 0; - if (CI->isZero()) - return i-StartIdx+1; // We found end of string, success! - } - - return 0; // The array isn't null terminated, conservatively return 'unknown'. -} - -/// GetStringLength - If we can compute the length of the string pointed to by -/// the specified pointer, return 'len+1'. If we can't, return 0. -static uint64_t GetStringLength(Value *V) { - if (!V->getType()->isPointerTy()) return 0; - - SmallPtrSet<PHINode*, 32> PHIs; - uint64_t Len = GetStringLengthH(V, PHIs); - // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return - // an empty string as a length. - return Len == ~0ULL ? 1 : Len; -} - /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { @@ -613,7 +132,7 @@ struct StrCatOpt : public LibCallOptimization { void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { // We need to find the end of the destination string. That's where the // memory is to be moved to. We just generate a call to strlen. - Value *DstLen = EmitStrLen(Dst, B); + Value *DstLen = EmitStrLen(Dst, B, TD); // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of @@ -623,7 +142,7 @@ struct StrCatOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(CpyDst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B, TD); } }; @@ -701,7 +220,8 @@ struct StrChrOpt : public LibCallOptimization { return 0; return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. - ConstantInt::get(TD->getIntPtrType(*Context), Len), B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), + B, TD); } // Otherwise, the character is a constant, see if the first argument is @@ -772,7 +292,7 @@ struct StrCmpOpt : public LibCallOptimization { return EmitMemCmp(Str1P, Str2P, ConstantInt::get(TD->getIntPtrType(*Context), - std::min(Len1, Len2)), B); + std::min(Len1, Len2)), B, TD); } return 0; @@ -852,7 +372,7 @@ struct StrCpyOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD); return Dst; } }; @@ -881,7 +401,7 @@ struct StrNCpyOpt : public LibCallOptimization { if (SrcLen == 0) { // strncpy(x, "", y) -> memset(x, '\0', y, 1) EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp, - B); + B, TD); return Dst; } @@ -901,7 +421,7 @@ struct StrNCpyOpt : public LibCallOptimization { // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] EmitMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD); return Dst; } @@ -993,7 +513,7 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B), + return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD), CI->getType()); return 0; } @@ -1061,7 +581,8 @@ struct MemCpyOpt : public LibCallOptimization { return 0; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B); + EmitMemCpy(CI->getOperand(1), CI->getOperand(2), + CI->getOperand(3), 1, B, TD); return CI->getOperand(1); } }; @@ -1082,7 +603,8 @@ struct MemMoveOpt : public LibCallOptimization { return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B); + EmitMemMove(CI->getOperand(1), CI->getOperand(2), + CI->getOperand(3), 1, B, TD); return CI->getOperand(1); } }; @@ -1105,7 +627,7 @@ struct MemSetOpt : public LibCallOptimization { // memset(p, v, n) -> llvm.memset(p, v, n, 1) Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B); + EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD); return CI->getOperand(1); } }; @@ -1130,11 +652,14 @@ struct MemCpyChkOpt : public LibCallOptimization { FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; - ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); - if (!SizeCI) + ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); + if (!ObjSizeCI) return 0; - if (SizeCI->isAllOnesValue()) { - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B); + ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3)); + if (ObjSizeCI->isAllOnesValue() || + (SizeCI && ObjSizeCI->getValue().uge(SizeCI->getValue()))) { + EmitMemCpy(CI->getOperand(1), CI->getOperand(2), + CI->getOperand(3), 1, B, TD); return CI->getOperand(1); } @@ -1158,13 +683,15 @@ struct MemSetChkOpt : public LibCallOptimization { FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; - ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); - if (!SizeCI) + ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); + if (!ObjSizeCI) return 0; - if (SizeCI->isAllOnesValue()) { + ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3)); + if (ObjSizeCI->isAllOnesValue() || + (SizeCI && ObjSizeCI->getValue().uge(SizeCI->getValue()))) { Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B); + EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD); return CI->getOperand(1); } @@ -1188,12 +715,14 @@ struct MemMoveChkOpt : public LibCallOptimization { FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; - ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); - if (!SizeCI) + ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); + if (!ObjSizeCI) return 0; - if (SizeCI->isAllOnesValue()) { + ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3)); + if (ObjSizeCI->isAllOnesValue() || + (SizeCI && ObjSizeCI->getValue().uge(SizeCI->getValue()))) { EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), - 1, B); + 1, B, TD); return CI->getOperand(1); } @@ -1209,8 +738,8 @@ struct StrCpyChkOpt : public LibCallOptimization { !FT->getParamType(1)->isPointerTy()) return 0; - ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3)); - if (!SizeCI) + ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getOperand(3)); + if (!ObjSizeCI) return 0; // If a) we don't have any length information, or b) we know this will @@ -1218,9 +747,9 @@ struct StrCpyChkOpt : public LibCallOptimization { // strcpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (SizeCI->isAllOnesValue() || - SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2))) - return EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B); + if (ObjSizeCI->isAllOnesValue() || + ObjSizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2))) + return EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD); return 0; } @@ -1512,7 +1041,7 @@ struct PrintFOpt : public LibCallOptimization { // in case there is an error writing to stdout. if (FormatStr.size() == 1) { Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), - FormatStr[0]), B); + FormatStr[0]), B, TD); if (CI->use_empty()) return CI; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1526,7 +1055,7 @@ struct PrintFOpt : public LibCallOptimization { Constant *C = ConstantArray::get(*Context, FormatStr, true); C = new GlobalVariable(*Callee->getParent(), C->getType(), true, GlobalVariable::InternalLinkage, C, "str"); - EmitPutS(C, B); + EmitPutS(C, B, TD); return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), FormatStr.size()+1); } @@ -1535,7 +1064,7 @@ struct PrintFOpt : public LibCallOptimization { // printf("%c", chr) --> putchar(*(i8*)dst) if (FormatStr == "%c" && CI->getNumOperands() > 2 && CI->getOperand(2)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getOperand(2), B); + Value *Res = EmitPutChar(CI->getOperand(2), B, TD); if (CI->use_empty()) return CI; return B.CreateIntCast(Res, CI->getType(), true); @@ -1545,7 +1074,7 @@ struct PrintFOpt : public LibCallOptimization { if (FormatStr == "%s\n" && CI->getNumOperands() > 2 && CI->getOperand(2)->getType()->isPointerTy() && CI->use_empty()) { - EmitPutS(CI->getOperand(2), B); + EmitPutS(CI->getOperand(2), B, TD); return CI; } return 0; @@ -1582,8 +1111,8 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. - ConstantInt::get - (TD->getIntPtrType(*Context), FormatStr.size()+1),1,B); + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()+1), 1, B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1614,11 +1143,11 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) if (!CI->getOperand(3)->getType()->isPointerTy()) return 0; - Value *Len = EmitStrLen(CI->getOperand(3), B); + Value *Len = EmitStrLen(CI->getOperand(3), B, TD); Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B); + EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B, TD); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -1654,7 +1183,7 @@ struct FWriteOpt : public LibCallOptimization { // If this is writing one byte, turn it into fputc. if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char"); - EmitFPutC(Char, CI->getOperand(4), B); + EmitFPutC(Char, CI->getOperand(4), B, TD); return ConstantInt::get(CI->getType(), 1); } @@ -1682,7 +1211,7 @@ struct FPutsOpt : public LibCallOptimization { if (!Len) return 0; EmitFWrite(CI->getOperand(1), ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getOperand(2), B); + CI->getOperand(2), B, TD); return CI; // Known to have no uses (see above). } }; @@ -1716,7 +1245,7 @@ struct FPrintFOpt : public LibCallOptimization { EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()), - CI->getOperand(1), B); + CI->getOperand(1), B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1729,7 +1258,7 @@ struct FPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> *(i8*)dst = chr if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; - EmitFPutC(CI->getOperand(3), CI->getOperand(1), B); + EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD); return ConstantInt::get(CI->getType(), 1); } @@ -1737,7 +1266,7 @@ struct FPrintFOpt : public LibCallOptimization { // fprintf(F, "%s", str) -> fputs(str, F) if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty()) return 0; - EmitFPutS(CI->getOperand(3), CI->getOperand(1), B); + EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD); return CI; } return 0; diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp new file mode 100644 index 0000000..2ea4bb6 --- /dev/null +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -0,0 +1,324 @@ +//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some functions that will create standard C libcalls. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Type.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/LLVMContext.h" +#include "llvm/Intrinsics.h" + +using namespace llvm; + +/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. +Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { + return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr"); +} + +/// EmitStrLen - Emit a call to the strlen function to the builder, for the +/// specified pointer. This always returns an integer value of size intptr_t. +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + NULL); + CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); + if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitStrChr - Emit a call to the strchr function to the builder, for the +/// specified pointer and character. Ptr is required to be some pointer type, +/// and the return value has 'i8*' type. +Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, + const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI = + AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + + const Type *I8Ptr = B.getInt8PtrTy(); + const Type *I32Ty = B.getInt32Ty(); + Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), + I8Ptr, I8Ptr, I32Ty, NULL); + CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), + ConstantInt::get(I32Ty, C), "strchr"); + if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the +/// specified pointer arguments. +Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, + const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + const Type *I8Ptr = B.getInt8PtrTy(); + Value *StrCpy = M->getOrInsertFunction("strcpy", AttrListPtr::get(AWI, 2), + I8Ptr, I8Ptr, I8Ptr, NULL); + CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), + "strcpy"); + if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always +/// expects that the size has type 'intptr_t' and Dst/Src are pointers. +Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, + unsigned Align, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + const Type *Ty = Len->getType(); + Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1); + Dst = CastToCStr(Dst, B); + Src = CastToCStr(Src, B); + return B.CreateCall4(MemCpy, Dst, Src, Len, + ConstantInt::get(B.getInt32Ty(), Align)); +} + +/// EmitMemMove - Emit a call to the memmove function to the builder. This +/// always expects that the size has type 'intptr_t' and Dst/Src are pointers. +Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, + unsigned Align, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + const Type *Ty = TD->getIntPtrType(Context); + Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1); + Dst = CastToCStr(Dst, B); + Src = CastToCStr(Src, B); + Value *A = ConstantInt::get(B.getInt32Ty(), Align); + return B.CreateCall4(MemMove, Dst, Src, Len, A); +} + +/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is +/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. +Value *llvm::EmitMemChr(Value *Ptr, Value *Val, + Value *Len, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI; + AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + B.getInt32Ty(), + TD->getIntPtrType(Context), + NULL); + CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); + + if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitMemCmp - Emit a call to the memcmp function. +Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, + Value *Len, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), + B.getInt32Ty(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), NULL); + CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), + Len, "memcmp"); + + if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitMemSet - Emit a call to the memset function +Value *llvm::EmitMemSet(Value *Dst, Value *Val, + Value *Len, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Intrinsic::ID IID = Intrinsic::memset; + const Type *Tys[1]; + Tys[0] = Len->getType(); + Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1); + Value *Align = ConstantInt::get(B.getInt32Ty(), 1); + return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align); +} + +/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. +/// 'floor'). This function is known to take a single of type matching 'Op' and +/// returns one value with the same type. If 'Op' is a long double, 'l' is +/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. +Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name, + IRBuilder<> &B, const AttrListPtr &Attrs) { + char NameBuffer[20]; + if (!Op->getType()->isDoubleTy()) { + // If we need to add a suffix, copy into NameBuffer. + unsigned NameLen = strlen(Name); + assert(NameLen < sizeof(NameBuffer)-2); + memcpy(NameBuffer, Name, NameLen); + if (Op->getType()->isFloatTy()) + NameBuffer[NameLen] = 'f'; // floorf + else + NameBuffer[NameLen] = 'l'; // floorl + NameBuffer[NameLen+1] = 0; + Name = NameBuffer; + } + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Op->getType(), NULL); + CallInst *CI = B.CreateCall(Callee, Op, Name); + CI->setAttributes(Attrs); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + +/// EmitPutChar - Emit a call to the putchar function. This assumes that Char +/// is an integer. +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), + B.getInt32Ty(), NULL); + CallInst *CI = B.CreateCall(PutChar, + B.CreateIntCast(Char, + B.getInt32Ty(), + /*isSigned*/true, + "chari"), + "putchar"); + + if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + +/// EmitPutS - Emit a call to the puts function. This assumes that Str is +/// some pointer. +void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + + Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), + B.getInt32Ty(), + B.getInt8PtrTy(), + NULL); + CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); + if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + +} + +/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is +/// an integer and File is a pointer to FILE. +void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, + const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), + B.getInt32Ty(), + B.getInt32Ty(), File->getType(), + NULL); + else + F = M->getOrInsertFunction("fputc", + B.getInt32Ty(), + B.getInt32Ty(), + File->getType(), NULL); + Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true, + "chari"); + CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); +} + +/// EmitFPutS - Emit a call to the puts function. Str is required to be a +/// pointer and File is a pointer to FILE. +void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, + const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), + B.getInt32Ty(), + B.getInt8PtrTy(), + File->getType(), NULL); + else + F = M->getOrInsertFunction("fputs", B.getInt32Ty(), + B.getInt8PtrTy(), + File->getType(), NULL); + CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); +} + +/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is +/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. +void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, + IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *F; + if (File->getType()->isPointerTy()) + F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), + File->getType(), NULL); + else + F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), + File->getType(), NULL); + CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, + ConstantInt::get(TD->getIntPtrType(Context), 1), File); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); +} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 93577b4..dec227a 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils BasicBlockUtils.cpp BasicInliner.cpp BreakCriticalEdges.cpp + BuildLibCalls.cpp CloneFunction.cpp CloneLoop.cpp CloneModule.cpp diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 194a6d4..549977c 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -1818,7 +1818,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // Handle some degenerate cases first if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) - return UndefValue::get(ResultTy); + return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred)); // No compile-time operations on this type yet. if (C1->getType()->isPPC_FP128Ty()) @@ -2070,7 +2070,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) { Constant *CE2Op0 = CE2->getOperand(0); if (CE2->getOpcode() == Instruction::BitCast && - CE2->getType()->isVectorTy()==CE2Op0->getType()->isVectorTy()) { + CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) { Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType()); return ConstantExpr::getICmp(pred, Inverse, CE2Op0); } @@ -2078,8 +2078,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // If the left hand side is an extension, try eliminating it. if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) { - if (CE1->getOpcode() == Instruction::SExt || - CE1->getOpcode() == Instruction::ZExt) { + if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) || + (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){ Constant *CE1Op0 = CE1->getOperand(0); Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType()); if (CE1Inverse == CE1Op0) { @@ -2097,27 +2097,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // If C2 is a constant expr and C1 isn't, flip them around and fold the // other way if possible. // Also, if C1 is null and C2 isn't, flip them around. - switch (pred) { - case ICmpInst::ICMP_EQ: - case ICmpInst::ICMP_NE: - // No change of predicate required. - return ConstantExpr::getICmp(pred, C2, C1); - - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: - // Change the predicate as necessary to swap the operands. - pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred); - return ConstantExpr::getICmp(pred, C2, C1); - - default: // These predicates cannot be flopped around. - break; - } + pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred); + return ConstantExpr::getICmp(pred, C2, C1); } } return 0; diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index 85bbe4a..9887f28 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -246,6 +246,11 @@ public: MDNode *N = &(*MDNodeSet.begin()); N->destroy(); } + // Destroy MDStrings. + for (StringMap<MDString*>::iterator I = MDStringCache.begin(), + E = MDStringCache.end(); I != E; ++I) { + delete I->second; + } } }; diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile index bc5e77d..4395ecf 100644 --- a/lib/VMCore/Makefile +++ b/lib/VMCore/Makefile @@ -30,5 +30,5 @@ $(GENFILE): $(ObjDir)/Intrinsics.gen.tmp changed significantly. ) install-local:: $(GENFILE) - $(Echo) Installing $(PROJ_includedir)/llvm/Intrinsics.gen - $(Verb) $(DataInstall) $(GENFILE) $(PROJ_includedir)/llvm/Intrinsics.gen + $(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen + $(Verb) $(DataInstall) $(GENFILE) $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen |