diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
47 files changed, 5335 insertions, 4161 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp index 2e95926..4c9746b 100644 --- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp @@ -102,6 +102,10 @@ FunctionPass *llvm::createAddDiscriminatorsPass() { return new AddDiscriminatorsLegacyPass(); } +static bool shouldHaveDiscriminator(const Instruction *I) { + return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I); +} + /// \brief Assign DWARF discriminators. /// /// To assign discriminators, we examine the boundaries of every @@ -176,7 +180,13 @@ static bool addDiscriminators(Function &F) { // discriminator for this instruction. for (BasicBlock &B : F) { for (auto &I : B.getInstList()) { - if (isa<IntrinsicInst>(&I)) + // Not all intrinsic calls should have a discriminator. + // We want to avoid a non-deterministic assignment of discriminators at + // different debug levels. We still allow discriminators on memory + // intrinsic calls because those can be early expanded by SROA into + // pairs of loads and stores, and the expanded load/store instructions + // should have a valid discriminator. + if (!shouldHaveDiscriminator(&I)) continue; const DILocation *DIL = I.getDebugLoc(); if (!DIL) @@ -190,8 +200,8 @@ static bool addDiscriminators(Function &F) { // discriminator is needed to distinguish both instructions. // Only the lowest 7 bits are used to represent a discriminator to fit // it in 1 byte ULEB128 representation. - unsigned Discriminator = (R.second ? ++LDM[L] : LDM[L]) & 0x7f; - I.setDebugLoc(DIL->cloneWithDiscriminator(Discriminator)); + unsigned Discriminator = R.second ? ++LDM[L] : LDM[L]; + I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator)); DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" << DIL->getColumn() << ":" << Discriminator << " " << I << "\n"); @@ -207,6 +217,10 @@ static bool addDiscriminators(Function &F) { LocationSet CallLocations; for (auto &I : B.getInstList()) { CallInst *Current = dyn_cast<CallInst>(&I); + // We bypass intrinsic calls for the following two reasons: + // 1) We want to avoid a non-deterministic assigment of + // discriminators. + // 2) We want to minimize the number of base discriminators used. if (!Current || isa<IntrinsicInst>(&I)) continue; @@ -216,8 +230,8 @@ static bool addDiscriminators(Function &F) { Location L = std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine()); if (!CallLocations.insert(L).second) { - Current->setDebugLoc( - CurrentDIL->cloneWithDiscriminator((++LDM[L]) & 0x7f)); + unsigned Discriminator = ++LDM[L]; + Current->setDebugLoc(CurrentDIL->setBaseDiscriminator(Discriminator)); Changed = true; } } diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index b90349d..3d5cbfc 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -78,8 +78,8 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { // Recursively deleting a PHI may cause multiple PHIs to be deleted - // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. - SmallVector<WeakVH, 8> PHIs; + // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete. + SmallVector<WeakTrackingVH, 8> PHIs; for (BasicBlock::iterator I = BB->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) PHIs.push_back(PN); @@ -438,7 +438,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); - BI->setDebugLoc(BB->getFirstNonPHI()->getDebugLoc()); + BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); // Move the edges from Preds to point to NewBB instead of BB. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { @@ -646,9 +646,10 @@ llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, } if (LI) { - Loop *L = LI->getLoopFor(Head); - L->addBasicBlockToLoop(ThenBlock, *LI); - L->addBasicBlockToLoop(Tail, *LI); + if (Loop *L = LI->getLoopFor(Head)) { + L->addBasicBlockToLoop(ThenBlock, *LI); + L->addBasicBlockToLoop(Tail, *LI); + } } return CheckTerm; diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index e61b04f..b60dfb4 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -58,7 +58,7 @@ static bool setOnlyReadsMemory(Function &F) { static bool setOnlyAccessesArgMemory(Function &F) { if (F.onlyAccessesArgMemory()) return false; - F.setOnlyAccessesArgMemory (); + F.setOnlyAccessesArgMemory(); ++NumArgMemOnly; return true; } @@ -71,632 +71,633 @@ static bool setDoesNotThrow(Function &F) { return true; } -static bool setDoesNotCapture(Function &F, unsigned n) { - if (F.doesNotCapture(n)) +static bool setRetDoesNotAlias(Function &F) { + if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias)) return false; - F.setDoesNotCapture(n); - ++NumNoCapture; + F.addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + ++NumNoAlias; return true; } -static bool setOnlyReadsMemory(Function &F, unsigned n) { - if (F.onlyReadsMemory(n)) +static bool setDoesNotCapture(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::NoCapture)) return false; - F.setOnlyReadsMemory(n); - ++NumReadOnlyArg; + F.addParamAttr(ArgNo, Attribute::NoCapture); + ++NumNoCapture; return true; } -static bool setDoesNotAlias(Function &F, unsigned n) { - if (F.doesNotAlias(n)) +static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly)) return false; - F.setDoesNotAlias(n); - ++NumNoAlias; + F.addParamAttr(ArgNo, Attribute::ReadOnly); + ++NumReadOnlyArg; return true; } -static bool setNonNull(Function &F, unsigned n) { - assert((n != AttributeSet::ReturnIndex || - F.getReturnType()->isPointerTy()) && +static bool setRetNonNull(Function &F) { + assert(F.getReturnType()->isPointerTy() && "nonnull applies only to pointers"); - if (F.getAttributes().hasAttribute(n, Attribute::NonNull)) + if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull)) return false; - F.addAttribute(n, Attribute::NonNull); + F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); ++NumNonNull; return true; } bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { - LibFunc::Func TheLibFunc; + LibFunc TheLibFunc; if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc))) return false; bool Changed = false; switch (TheLibFunc) { - case LibFunc::strlen: + case LibFunc_strlen: + case LibFunc_wcslen: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::strchr: - case LibFunc::strrchr: + case LibFunc_strchr: + case LibFunc_strrchr: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); return Changed; - case LibFunc::strtol: - case LibFunc::strtod: - case LibFunc::strtof: - case LibFunc::strtoul: - case LibFunc::strtoll: - case LibFunc::strtold: - case LibFunc::strtoull: + case LibFunc_strtol: + case LibFunc_strtod: + case LibFunc_strtof: + case LibFunc_strtoul: + case LibFunc_strtoll: + case LibFunc_strtold: + case LibFunc_strtoull: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::strcpy: - case LibFunc::stpcpy: - case LibFunc::strcat: - case LibFunc::strncat: - case LibFunc::strncpy: - case LibFunc::stpncpy: + case LibFunc_strcpy: + case LibFunc_stpcpy: + case LibFunc_strcat: + case LibFunc_strncat: + case LibFunc_strncpy: + case LibFunc_stpncpy: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::strxfrm: + case LibFunc_strxfrm: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::strcmp: // 0,1 - case LibFunc::strspn: // 0,1 - case LibFunc::strncmp: // 0,1 - case LibFunc::strcspn: // 0,1 - case LibFunc::strcoll: // 0,1 - case LibFunc::strcasecmp: // 0,1 - case LibFunc::strncasecmp: // + case LibFunc_strcmp: // 0,1 + case LibFunc_strspn: // 0,1 + case LibFunc_strncmp: // 0,1 + case LibFunc_strcspn: // 0,1 + case LibFunc_strcoll: // 0,1 + case LibFunc_strcasecmp: // 0,1 + case LibFunc_strncasecmp: // Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; - case LibFunc::strstr: - case LibFunc::strpbrk: + case LibFunc_strstr: + case LibFunc_strpbrk: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::strtok: - case LibFunc::strtok_r: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; - case LibFunc::scanf: + case LibFunc_strtok: + case LibFunc_strtok_r: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::setbuf: - case LibFunc::setvbuf: + case LibFunc_scanf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + return Changed; + case LibFunc_setbuf: + case LibFunc_setvbuf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::strdup: - case LibFunc::strndup: + case LibFunc_strdup: + case LibFunc_strndup: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + return Changed; + case LibFunc_stat: + case LibFunc_statvfs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::stat: - case LibFunc::statvfs: + case LibFunc_sscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::sscanf: + case LibFunc_sprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; - case LibFunc::sprintf: + case LibFunc_snprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); return Changed; - case LibFunc::snprintf: + case LibFunc_setitimer: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 3); - return Changed; - case LibFunc::setitimer: - Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 2); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::system: + case LibFunc_system: // May throw; "system" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::malloc: + case LibFunc_malloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; - case LibFunc::memcmp: + case LibFunc_memcmp: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; - case LibFunc::memchr: - case LibFunc::memrchr: + case LibFunc_memchr: + case LibFunc_memrchr: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); return Changed; - case LibFunc::modf: - case LibFunc::modff: - case LibFunc::modfl: + case LibFunc_modf: + case LibFunc_modff: + case LibFunc_modfl: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; - case LibFunc::memcpy: - case LibFunc::mempcpy: - case LibFunc::memccpy: - case LibFunc::memmove: + case LibFunc_memcpy: + case LibFunc_mempcpy: + case LibFunc_memccpy: + case LibFunc_memmove: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::memcpy_chk: + case LibFunc_memcpy_chk: Changed |= setDoesNotThrow(F); return Changed; - case LibFunc::memalign: - Changed |= setDoesNotAlias(F, 0); + case LibFunc_memalign: + Changed |= setRetDoesNotAlias(F); return Changed; - case LibFunc::mkdir: + case LibFunc_mkdir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::mktime: + case LibFunc_mktime: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::realloc: + case LibFunc_realloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::read: + case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; - case LibFunc::rewind: + case LibFunc_rewind: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::rmdir: - case LibFunc::remove: - case LibFunc::realpath: + case LibFunc_rmdir: + case LibFunc_remove: + case LibFunc_realpath: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::rename: + case LibFunc_rename: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; - case LibFunc::readlink: + case LibFunc_readlink: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::write: + case LibFunc_write: // May throw; "write" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::bcopy: + case LibFunc_bcopy: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::bcmp: + case LibFunc_bcmp: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; - case LibFunc::bzero: + case LibFunc_bzero: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::calloc: + case LibFunc_calloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; - case LibFunc::chmod: - case LibFunc::chown: + case LibFunc_chmod: + case LibFunc_chown: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::ctermid: - case LibFunc::clearerr: - case LibFunc::closedir: + case LibFunc_ctermid: + case LibFunc_clearerr: + case LibFunc_closedir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::atoi: - case LibFunc::atol: - case LibFunc::atof: - case LibFunc::atoll: + case LibFunc_atoi: + case LibFunc_atol: + case LibFunc_atof: + case LibFunc_atoll: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::access: + case LibFunc_access: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); + return Changed; + case LibFunc_fopen: + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::fopen: + case LibFunc_fdopen: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; - case LibFunc::fdopen: + case LibFunc_feof: + case LibFunc_free: + case LibFunc_fseek: + case LibFunc_ftell: + case LibFunc_fgetc: + case LibFunc_fseeko: + case LibFunc_ftello: + case LibFunc_fileno: + case LibFunc_fflush: + case LibFunc_fclose: + case LibFunc_fsetpos: + case LibFunc_flockfile: + case LibFunc_funlockfile: + case LibFunc_ftrylockfile: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::feof: - case LibFunc::free: - case LibFunc::fseek: - case LibFunc::ftell: - case LibFunc::fgetc: - case LibFunc::fseeko: - case LibFunc::ftello: - case LibFunc::fileno: - case LibFunc::fflush: - case LibFunc::fclose: - case LibFunc::fsetpos: - case LibFunc::flockfile: - case LibFunc::funlockfile: - case LibFunc::ftrylockfile: + case LibFunc_ferror: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F); return Changed; - case LibFunc::ferror: + case LibFunc_fputc: + case LibFunc_fstat: + case LibFunc_frexp: + case LibFunc_frexpf: + case LibFunc_frexpl: + case LibFunc_fstatvfs: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F); return Changed; - case LibFunc::fputc: - case LibFunc::fstat: - case LibFunc::frexp: - case LibFunc::frexpf: - case LibFunc::frexpl: - case LibFunc::fstatvfs: + case LibFunc_fgets: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 2); return Changed; - case LibFunc::fgets: + case LibFunc_fread: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); return Changed; - case LibFunc::fread: + case LibFunc_fwrite: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 4); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 3); + // FIXME: readonly #1? return Changed; - case LibFunc::fwrite: + case LibFunc_fputs: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 4); - // FIXME: readonly #1? + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::fputs: + case LibFunc_fscanf: + case LibFunc_fprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::fscanf: - case LibFunc::fprintf: + case LibFunc_fgetpos: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); return Changed; - case LibFunc::fgetpos: + case LibFunc_getc: + case LibFunc_getlogin_r: + case LibFunc_getc_unlocked: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::getc: - case LibFunc::getlogin_r: - case LibFunc::getc_unlocked: + case LibFunc_getenv: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::getenv: + case LibFunc_gets: + case LibFunc_getchar: Changed |= setDoesNotThrow(F); - Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 1); return Changed; - case LibFunc::gets: - case LibFunc::getchar: + case LibFunc_getitimer: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); return Changed; - case LibFunc::getitimer: + case LibFunc_getpwnam: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::getpwnam: + case LibFunc_ungetc: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::ungetc: + case LibFunc_uname: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::uname: + case LibFunc_unlink: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::unlink: + case LibFunc_unsetenv: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::unsetenv: + case LibFunc_utime: + case LibFunc_utimes: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::utime: - case LibFunc::utimes: + case LibFunc_putc: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; - case LibFunc::putc: + case LibFunc_puts: + case LibFunc_printf: + case LibFunc_perror: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::puts: - case LibFunc::printf: - case LibFunc::perror: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::pread: + case LibFunc_pread: // May throw; "pread" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; - case LibFunc::pwrite: + case LibFunc_pwrite: // May throw; "pwrite" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::putchar: + case LibFunc_putchar: Changed |= setDoesNotThrow(F); return Changed; - case LibFunc::popen: + case LibFunc_popen: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; - case LibFunc::pclose: + case LibFunc_pclose: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); + return Changed; + case LibFunc_vscanf: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::vscanf: + case LibFunc_vsscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::vsscanf: + case LibFunc_vfscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; - case LibFunc::vfscanf: + case LibFunc_valloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setRetDoesNotAlias(F); return Changed; - case LibFunc::valloc: + case LibFunc_vprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::vprintf: + case LibFunc_vfprintf: + case LibFunc_vsprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::vfprintf: - case LibFunc::vsprintf: + case LibFunc_vsnprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); return Changed; - case LibFunc::vsnprintf: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 3); - return Changed; - case LibFunc::open: + case LibFunc_open: // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::opendir: + case LibFunc_opendir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::tmpfile: + case LibFunc_tmpfile: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; - case LibFunc::times: + case LibFunc_times: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::htonl: - case LibFunc::htons: - case LibFunc::ntohl: - case LibFunc::ntohs: + case LibFunc_htonl: + case LibFunc_htons: + case LibFunc_ntohl: + case LibFunc_ntohs: Changed |= setDoesNotThrow(F); Changed |= setDoesNotAccessMemory(F); return Changed; - case LibFunc::lstat: + case LibFunc_lstat: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::lchown: + case LibFunc_lchown: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::qsort: + case LibFunc_qsort: // May throw; places call through function pointer. - Changed |= setDoesNotCapture(F, 4); + Changed |= setDoesNotCapture(F, 3); + return Changed; + case LibFunc_dunder_strdup: + case LibFunc_dunder_strndup: + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::dunder_strdup: - case LibFunc::dunder_strndup: + case LibFunc_dunder_strtok_r: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc::dunder_strtok_r: + case LibFunc_under_IO_getc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::under_IO_getc: + case LibFunc_under_IO_putc: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); return Changed; - case LibFunc::under_IO_putc: - Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - return Changed; - case LibFunc::dunder_isoc99_scanf: + case LibFunc_dunder_isoc99_scanf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::stat64: - case LibFunc::lstat64: - case LibFunc::statvfs64: + case LibFunc_stat64: + case LibFunc_lstat64: + case LibFunc_statvfs64: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::dunder_isoc99_sscanf: + case LibFunc_dunder_isoc99_sscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; - case LibFunc::fopen64: + case LibFunc_fopen64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; - case LibFunc::fseeko64: - case LibFunc::ftello64: + case LibFunc_fseeko64: + case LibFunc_ftello64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; - case LibFunc::tmpfile64: + case LibFunc_tmpfile64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; - case LibFunc::fstat64: - case LibFunc::fstatvfs64: + case LibFunc_fstat64: + case LibFunc_fstatvfs64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; - case LibFunc::open64: + case LibFunc_open64: // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; - case LibFunc::gettimeofday: + case LibFunc_gettimeofday: // Currently some platforms have the restrict keyword on the arguments to // gettimeofday. To be conservative, do not add noalias to gettimeofday's // arguments. Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; - case LibFunc::Znwj: // new(unsigned int) - case LibFunc::Znwm: // new(unsigned long) - case LibFunc::Znaj: // new[](unsigned int) - case LibFunc::Znam: // new[](unsigned long) - case LibFunc::msvc_new_int: // new(unsigned int) - case LibFunc::msvc_new_longlong: // new(unsigned long long) - case LibFunc::msvc_new_array_int: // new[](unsigned int) - case LibFunc::msvc_new_array_longlong: // new[](unsigned long long) + case LibFunc_Znwj: // new(unsigned int) + case LibFunc_Znwm: // new(unsigned long) + case LibFunc_Znaj: // new[](unsigned int) + case LibFunc_Znam: // new[](unsigned long) + case LibFunc_msvc_new_int: // new(unsigned int) + case LibFunc_msvc_new_longlong: // new(unsigned long long) + case LibFunc_msvc_new_array_int: // new[](unsigned int) + case LibFunc_msvc_new_array_longlong: // new[](unsigned long long) // Operator new always returns a nonnull noalias pointer - Changed |= setNonNull(F, AttributeSet::ReturnIndex); - Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex); + Changed |= setRetNonNull(F); + Changed |= setRetDoesNotAlias(F); return Changed; //TODO: add LibFunc entries for: - //case LibFunc::memset_pattern4: - //case LibFunc::memset_pattern8: - case LibFunc::memset_pattern16: + //case LibFunc_memset_pattern4: + //case LibFunc_memset_pattern8: + case LibFunc_memset_pattern16: Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; // int __nvvm_reflect(const char *) - case LibFunc::nvvm_reflect: + case LibFunc_nvvm_reflect: Changed |= setDoesNotAccessMemory(F); Changed |= setDoesNotThrow(F); return Changed; @@ -717,13 +718,13 @@ Value *llvm::castToCStr(Value *V, IRBuilder<> &B) { Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::strlen)) + if (!TLI->has(LibFunc_strlen)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrLen = M->getOrInsertFunction("strlen", DL.getIntPtrType(Context), - B.getInt8PtrTy(), nullptr); + B.getInt8PtrTy()); inferLibFuncAttributes(*M->getFunction("strlen"), *TLI); CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), "strlen"); if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) @@ -734,14 +735,14 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::strchr)) + if (!TLI->has(LibFunc_strchr)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); Constant *StrChr = - M->getOrInsertFunction("strchr", I8Ptr, I8Ptr, I32Ty, nullptr); + M->getOrInsertFunction("strchr", I8Ptr, I8Ptr, I32Ty); inferLibFuncAttributes(*M->getFunction("strchr"), *TLI); CallInst *CI = B.CreateCall( StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr"); @@ -752,14 +753,14 @@ Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B, Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::strncmp)) + if (!TLI->has(LibFunc_strncmp)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *StrNCmp = M->getOrInsertFunction("strncmp", B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), - DL.getIntPtrType(Context), nullptr); + DL.getIntPtrType(Context)); inferLibFuncAttributes(*M->getFunction("strncmp"), *TLI); CallInst *CI = B.CreateCall( StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "strncmp"); @@ -772,12 +773,12 @@ Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, const TargetLibraryInfo *TLI, StringRef Name) { - if (!TLI->has(LibFunc::strcpy)) + if (!TLI->has(LibFunc_strcpy)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr, nullptr); + Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr); inferLibFuncAttributes(*M->getFunction(Name), *TLI); CallInst *CI = B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name); @@ -788,13 +789,13 @@ Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, const TargetLibraryInfo *TLI, StringRef Name) { - if (!TLI->has(LibFunc::strncpy)) + if (!TLI->has(LibFunc_strncpy)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); Type *I8Ptr = B.getInt8PtrTy(); Value *StrNCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr, - Len->getType(), nullptr); + Len->getType()); inferLibFuncAttributes(*M->getFunction(Name), *TLI); CallInst *CI = B.CreateCall( StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, "strncpy"); @@ -806,18 +807,18 @@ Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::memcpy_chk)) + if (!TLI->has(LibFunc_memcpy_chk)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); - AttributeSet AS; - AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeList AS; + AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex, + Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction( - "__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), + "__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), - DL.getIntPtrType(Context), nullptr); + DL.getIntPtrType(Context)); Dst = castToCStr(Dst, B); Src = castToCStr(Src, B); CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize}); @@ -828,14 +829,14 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::memchr)) + if (!TLI->has(LibFunc_memchr)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemChr = M->getOrInsertFunction("memchr", B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), - DL.getIntPtrType(Context), nullptr); + DL.getIntPtrType(Context)); inferLibFuncAttributes(*M->getFunction("memchr"), *TLI); CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, "memchr"); @@ -847,14 +848,14 @@ Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::memcmp)) + if (!TLI->has(LibFunc_memcmp)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCmp = M->getOrInsertFunction("memcmp", B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), - DL.getIntPtrType(Context), nullptr); + DL.getIntPtrType(Context)); inferLibFuncAttributes(*M->getFunction("memcmp"), *TLI); CallInst *CI = B.CreateCall( MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "memcmp"); @@ -881,15 +882,21 @@ static void appendTypeSuffix(Value *Op, StringRef &Name, } Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, - const AttributeSet &Attrs) { + const AttributeList &Attrs) { SmallString<20> NameBuffer; appendTypeSuffix(Op, Name, NameBuffer); Module *M = B.GetInsertBlock()->getModule(); Value *Callee = M->getOrInsertFunction(Name, Op->getType(), - Op->getType(), nullptr); + Op->getType()); CallInst *CI = B.CreateCall(Callee, Op, Name); - CI->setAttributes(Attrs); + + // The incoming attribute set may have come from a speculatable intrinsic, but + // is being replaced with a library call which is not allowed to be + // speculatable. + CI->setAttributes(Attrs.removeAttribute(B.getContext(), + AttributeList::FunctionIndex, + Attribute::Speculatable)); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -897,13 +904,13 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, } Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, - IRBuilder<> &B, const AttributeSet &Attrs) { + IRBuilder<> &B, const AttributeList &Attrs) { SmallString<20> NameBuffer; appendTypeSuffix(Op1, Name, NameBuffer); Module *M = B.GetInsertBlock()->getModule(); Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(), - Op2->getType(), nullptr); + Op2->getType()); CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name); CI->setAttributes(Attrs); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) @@ -914,12 +921,12 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::putchar)) + if (!TLI->has(LibFunc_putchar)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); - Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), - B.getInt32Ty(), nullptr); + Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), B.getInt32Ty()); + inferLibFuncAttributes(*M->getFunction("putchar"), *TLI); CallInst *CI = B.CreateCall(PutChar, B.CreateIntCast(Char, B.getInt32Ty(), @@ -934,12 +941,12 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B, Value *llvm::emitPutS(Value *Str, IRBuilder<> &B, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::puts)) + if (!TLI->has(LibFunc_puts)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); Value *PutS = - M->getOrInsertFunction("puts", B.getInt32Ty(), B.getInt8PtrTy(), nullptr); + M->getOrInsertFunction("puts", B.getInt32Ty(), B.getInt8PtrTy()); inferLibFuncAttributes(*M->getFunction("puts"), *TLI); CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), "puts"); if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) @@ -949,12 +956,12 @@ Value *llvm::emitPutS(Value *Str, IRBuilder<> &B, Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::fputc)) + if (!TLI->has(LibFunc_fputc)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); Constant *F = M->getOrInsertFunction("fputc", B.getInt32Ty(), B.getInt32Ty(), - File->getType(), nullptr); + File->getType()); if (File->getType()->isPointerTy()) inferLibFuncAttributes(*M->getFunction("fputc"), *TLI); Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true, @@ -968,13 +975,13 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B, Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::fputs)) + if (!TLI->has(LibFunc_fputs)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); - StringRef FPutsName = TLI->getName(LibFunc::fputs); + StringRef FPutsName = TLI->getName(LibFunc_fputs); Constant *F = M->getOrInsertFunction( - FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), nullptr); + FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType()); if (File->getType()->isPointerTy()) inferLibFuncAttributes(*M->getFunction(FPutsName), *TLI); CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, "fputs"); @@ -986,16 +993,16 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B, Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::fwrite)) + if (!TLI->has(LibFunc_fwrite)) return nullptr; Module *M = B.GetInsertBlock()->getModule(); LLVMContext &Context = B.GetInsertBlock()->getContext(); - StringRef FWriteName = TLI->getName(LibFunc::fwrite); + StringRef FWriteName = TLI->getName(LibFunc_fwrite); Constant *F = M->getOrInsertFunction( FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(), - DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType(), - nullptr); + DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); + if (File->getType()->isPointerTy()) inferLibFuncAttributes(*M->getFunction(FWriteName), *TLI); CallInst *CI = diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index bc2cef2..83ec7f5 100644 --- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -17,9 +17,12 @@ #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -36,12 +39,21 @@ namespace { : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} }; - struct DivPhiNodes { - PHINode *Quotient; - PHINode *Remainder; + struct QuotRemPair { + Value *Quotient; + Value *Remainder; - DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) - : Quotient(InQuotient), Remainder(InRemainder) {} + QuotRemPair(Value *InQuotient, Value *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + }; + + /// A quotient and remainder, plus a BB from which they logically "originate". + /// If you use Quotient or Remainder in a Phi node, you should use BB as its + /// corresponding predecessor. + struct QuotRemWithBB { + BasicBlock *BB = nullptr; + Value *Quotient = nullptr; + Value *Remainder = nullptr; }; } @@ -69,159 +81,376 @@ namespace llvm { } }; - typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; + typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy; + typedef DenseMap<unsigned, unsigned> BypassWidthsTy; + typedef SmallPtrSet<Instruction *, 4> VisitedSetTy; } -// insertFastDiv - Substitutes the div/rem instruction with code that checks the -// value of the operands and uses a shorter-faster div/rem instruction when -// possible and the longer-slower div/rem instruction otherwise. -static bool insertFastDiv(Instruction *I, IntegerType *BypassType, - bool UseDivOp, bool UseSignedOp, - DivCacheTy &PerBBDivCache) { - Function *F = I->getParent()->getParent(); - // Get instruction operands - Value *Dividend = I->getOperand(0); - Value *Divisor = I->getOperand(1); +namespace { +enum ValueRange { + /// Operand definitely fits into BypassType. No runtime checks are needed. + VALRNG_KNOWN_SHORT, + /// A runtime check is required, as value range is unknown. + VALRNG_UNKNOWN, + /// Operand is unlikely to fit into BypassType. The bypassing should be + /// disabled. + VALRNG_LIKELY_LONG +}; + +class FastDivInsertionTask { + bool IsValidTask = false; + Instruction *SlowDivOrRem = nullptr; + IntegerType *BypassType = nullptr; + BasicBlock *MainBB = nullptr; + + bool isHashLikeValue(Value *V, VisitedSetTy &Visited); + ValueRange getValueRange(Value *Op, VisitedSetTy &Visited); + QuotRemWithBB createSlowBB(BasicBlock *Successor); + QuotRemWithBB createFastBB(BasicBlock *Successor); + QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS, + BasicBlock *PhiBB); + Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2); + Optional<QuotRemPair> insertFastDivAndRem(); + + bool isSignedOp() { + return SlowDivOrRem->getOpcode() == Instruction::SDiv || + SlowDivOrRem->getOpcode() == Instruction::SRem; + } + bool isDivisionOp() { + return SlowDivOrRem->getOpcode() == Instruction::SDiv || + SlowDivOrRem->getOpcode() == Instruction::UDiv; + } + Type *getSlowType() { return SlowDivOrRem->getType(); } + +public: + FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths); + Value *getReplacement(DivCacheTy &Cache); +}; +} // anonymous namespace + +FastDivInsertionTask::FastDivInsertionTask(Instruction *I, + const BypassWidthsTy &BypassWidths) { + switch (I->getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + SlowDivOrRem = I; + break; + default: + // I is not a div/rem operation. + return; + } - if (isa<ConstantInt>(Divisor)) { - // Division by a constant should have been been solved and replaced earlier - // in the pipeline. - return false; + // Skip division on vector types. Only optimize integer instructions. + IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType()); + if (!SlowType) + return; + + // Skip if this bitwidth is not bypassed. + auto BI = BypassWidths.find(SlowType->getBitWidth()); + if (BI == BypassWidths.end()) + return; + + // Get type for div/rem instruction with bypass bitwidth. + IntegerType *BT = IntegerType::get(I->getContext(), BI->second); + BypassType = BT; + + // The original basic block. + MainBB = I->getParent(); + + // The instruction is indeed a slow div or rem operation. + IsValidTask = true; +} + +/// Reuses previously-computed dividend or remainder from the current BB if +/// operands and operation are identical. Otherwise calls insertFastDivAndRem to +/// perform the optimization and caches the resulting dividend and remainder. +/// If no replacement can be generated, nullptr is returned. +Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { + // First, make sure that the task is valid. + if (!IsValidTask) + return nullptr; + + // Then, look for a value in Cache. + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + DivOpInfo Key(isSignedOp(), Dividend, Divisor); + auto CacheI = Cache.find(Key); + + if (CacheI == Cache.end()) { + // If previous instance does not exist, try to insert fast div. + Optional<QuotRemPair> OptResult = insertFastDivAndRem(); + // Bail out if insertFastDivAndRem has failed. + if (!OptResult) + return nullptr; + CacheI = Cache.insert({Key, *OptResult}).first; } - // If the numerator is a constant, bail if it doesn't fit into BypassType. - if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend)) - if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) + QuotRemPair &Value = CacheI->second; + return isDivisionOp() ? Value.Quotient : Value.Remainder; +} + +/// \brief Check if a value looks like a hash. +/// +/// The routine is expected to detect values computed using the most common hash +/// algorithms. Typically, hash computations end with one of the following +/// instructions: +/// +/// 1) MUL with a constant wider than BypassType +/// 2) XOR instruction +/// +/// And even if we are wrong and the value is not a hash, it is still quite +/// unlikely that such values will fit into BypassType. +/// +/// To detect string hash algorithms like FNV we have to look through PHI-nodes. +/// It is implemented as a depth-first search for values that look neither long +/// nor hash-like. +bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + switch (I->getOpcode()) { + case Instruction::Xor: + return true; + case Instruction::Mul: { + // After Constant Hoisting pass, long constants may be represented as + // bitcast instructions. As a result, some constants may look like an + // instruction at first, and an additional check is necessary to find out if + // an operand is actually a constant. + Value *Op1 = I->getOperand(1); + ConstantInt *C = dyn_cast<ConstantInt>(Op1); + if (!C && isa<BitCastInst>(Op1)) + C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0)); + return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth(); + } + case Instruction::PHI: { + // Stop IR traversal in case of a crazy input code. This limits recursion + // depth. + if (Visited.size() >= 16) return false; + // Do not visit nodes that have been visited already. We return true because + // it means that we couldn't find any value that doesn't look hash-like. + if (Visited.find(I) != Visited.end()) + return true; + Visited.insert(I); + return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) { + // Ignore undef values as they probably don't affect the division + // operands. + return getValueRange(V, Visited) == VALRNG_LIKELY_LONG || + isa<UndefValue>(V); + }); + } + default: + return false; + } +} + +/// Check if an integer value fits into our bypass type. +ValueRange FastDivInsertionTask::getValueRange(Value *V, + VisitedSetTy &Visited) { + unsigned ShortLen = BypassType->getBitWidth(); + unsigned LongLen = V->getType()->getIntegerBitWidth(); + + assert(LongLen > ShortLen && "Value type must be wider than BypassType"); + unsigned HiBits = LongLen - ShortLen; + + const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); + KnownBits Known(LongLen); - // Basic Block is split before divide - BasicBlock *MainBB = &*I->getParent(); - BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); - - // Add new basic block for slow divide operation - BasicBlock *SlowBB = - BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); - SlowBB->moveBefore(SuccessorBB); - IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); - Value *SlowQuotientV; - Value *SlowRemainderV; - if (UseSignedOp) { - SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); - SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + computeKnownBits(V, Known, DL); + + if (Known.countMinLeadingZeros() >= HiBits) + return VALRNG_KNOWN_SHORT; + + if (Known.countMaxLeadingZeros() < HiBits) + return VALRNG_LIKELY_LONG; + + // Long integer divisions are often used in hashtable implementations. It's + // not worth bypassing such divisions because hash values are extremely + // unlikely to have enough leading zeros. The call below tries to detect + // values that are unlikely to fit BypassType (including hashes). + if (isHashLikeValue(V, Visited)) + return VALRNG_LIKELY_LONG; + + return VALRNG_UNKNOWN; +} + +/// Add new basic block for slow div and rem operations and put it before +/// SuccessorBB. +QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { + QuotRemWithBB DivRemPair; + DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", + MainBB->getParent(), SuccessorBB); + IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); + + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + + if (isSignedOp()) { + DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor); + DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor); } else { - SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); - SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor); + DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor); } - SlowBuilder.CreateBr(SuccessorBB); - - // Add new basic block for fast divide operation - BasicBlock *FastBB = - BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB); - FastBB->moveBefore(SlowBB); - IRBuilder<> FastBuilder(FastBB, FastBB->begin()); - Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, - BypassType); - Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, - BypassType); - - // udiv/urem because optimization only handles positive numbers - Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV); - Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, - ShortDivisorV); - Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, - ShortQuotientV, - Dividend->getType()); - Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, - ShortRemainderV, - Dividend->getType()); - FastBuilder.CreateBr(SuccessorBB); - - // Phi nodes for result of div and rem - IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); - PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); - QuoPhi->addIncoming(SlowQuotientV, SlowBB); - QuoPhi->addIncoming(FastQuotientV, FastBB); - PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2); - RemPhi->addIncoming(SlowRemainderV, SlowBB); - RemPhi->addIncoming(FastRemainderV, FastBB); - - // Replace I with appropriate phi node - if (UseDivOp) - I->replaceAllUsesWith(QuoPhi); - else - I->replaceAllUsesWith(RemPhi); - I->eraseFromParent(); - // Combine operands into a single value with OR for value testing below - MainBB->getInstList().back().eraseFromParent(); - IRBuilder<> MainBuilder(MainBB, MainBB->end()); + Builder.CreateBr(SuccessorBB); + return DivRemPair; +} + +/// Add new basic block for fast div and rem operations and put it before +/// SuccessorBB. +QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) { + QuotRemWithBB DivRemPair; + DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", + MainBB->getParent(), SuccessorBB); + IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); + + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + Value *ShortDivisorV = + Builder.CreateCast(Instruction::Trunc, Divisor, BypassType); + Value *ShortDividendV = + Builder.CreateCast(Instruction::Trunc, Dividend, BypassType); + + // udiv/urem because this optimization only handles positive numbers. + Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV); + Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV); + DivRemPair.Quotient = + Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType()); + DivRemPair.Remainder = + Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType()); + Builder.CreateBr(SuccessorBB); + + return DivRemPair; +} - // We should have bailed out above if the divisor is a constant, but the - // dividend may still be a constant. Set OrV to our non-constant operands - // OR'ed together. - assert(!isa<ConstantInt>(Divisor)); +/// Creates Phi nodes for result of Div and Rem. +QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS, + QuotRemWithBB &RHS, + BasicBlock *PhiBB) { + IRBuilder<> Builder(PhiBB, PhiBB->begin()); + PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2); + QuoPhi->addIncoming(LHS.Quotient, LHS.BB); + QuoPhi->addIncoming(RHS.Quotient, RHS.BB); + PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2); + RemPhi->addIncoming(LHS.Remainder, LHS.BB); + RemPhi->addIncoming(RHS.Remainder, RHS.BB); + return QuotRemPair(QuoPhi, RemPhi); +} + +/// Creates a runtime check to test whether both the divisor and dividend fit +/// into BypassType. The check is inserted at the end of MainBB. True return +/// value means that the operands fit. Either of the operands may be NULL if it +/// doesn't need a runtime check. +Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) { + assert((Op1 || Op2) && "Nothing to check"); + IRBuilder<> Builder(MainBB, MainBB->end()); Value *OrV; - if (!isa<ConstantInt>(Dividend)) - OrV = MainBuilder.CreateOr(Dividend, Divisor); + if (Op1 && Op2) + OrV = Builder.CreateOr(Op1, Op2); else - OrV = Divisor; + OrV = Op1 ? Op1 : Op2; // BitMask is inverted to check if the operands are // larger than the bypass type uint64_t BitMask = ~BypassType->getBitMask(); - Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); - - // Compare operand values and branch - Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); - Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); - MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); - - // Cache phi nodes to be used later in place of other instances - // of div or rem with the same sign, dividend, and divisor - DivOpInfo Key(UseSignedOp, Dividend, Divisor); - DivPhiNodes Value(QuoPhi, RemPhi); - PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); - return true; + Value *AndV = Builder.CreateAnd(OrV, BitMask); + + // Compare operand values + Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0); + return Builder.CreateICmpEQ(AndV, ZeroV); } -// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from -// the current BB if operands and operation are identical. Otherwise calls -// insertFastDiv to perform the optimization and caches the resulting dividend -// and remainder. -static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType, - bool UseDivOp, bool UseSignedOp, - DivCacheTy &PerBBDivCache) { - // Get instruction operands - DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1)); - DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); - - if (CacheI == PerBBDivCache.end()) { - // If previous instance does not exist, insert fast div - return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); +/// Substitutes the div/rem instruction with code that checks the value of the +/// operands and uses a shorter-faster div/rem instruction when possible. +Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { + Value *Dividend = SlowDivOrRem->getOperand(0); + Value *Divisor = SlowDivOrRem->getOperand(1); + + if (isa<ConstantInt>(Divisor)) { + // Keep division by a constant for DAGCombiner. + return None; } - // Replace operation value with previously generated phi node - DivPhiNodes &Value = CacheI->second; - if (UseDivOp) { - // Replace all uses of div instruction with quotient phi node - I->replaceAllUsesWith(Value.Quotient); + VisitedSetTy SetL; + ValueRange DividendRange = getValueRange(Dividend, SetL); + if (DividendRange == VALRNG_LIKELY_LONG) + return None; + + VisitedSetTy SetR; + ValueRange DivisorRange = getValueRange(Divisor, SetR); + if (DivisorRange == VALRNG_LIKELY_LONG) + return None; + + bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT); + bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT); + + if (DividendShort && DivisorShort) { + // If both operands are known to be short then just replace the long + // division with a short one in-place. + + IRBuilder<> Builder(SlowDivOrRem); + Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); + Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType); + Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor); + Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor); + Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); + Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); + return QuotRemPair(ExtDiv, ExtRem); + } else if (DividendShort && !isSignedOp()) { + // If the division is unsigned and Dividend is known to be short, then + // either + // 1) Divisor is less or equal to Dividend, and the result can be computed + // with a short division. + // 2) Divisor is greater than Dividend. In this case, no division is needed + // at all: The quotient is 0 and the remainder is equal to Dividend. + // + // So instead of checking at runtime whether Divisor fits into BypassType, + // we emit a runtime check to differentiate between these two cases. This + // lets us entirely avoid a long div. + + // Split the basic block before the div/rem. + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); + // Remove the unconditional branch from MainBB to SuccessorBB. + MainBB->getInstList().back().eraseFromParent(); + QuotRemWithBB Long; + Long.BB = MainBB; + Long.Quotient = ConstantInt::get(getSlowType(), 0); + Long.Remainder = Dividend; + QuotRemWithBB Fast = createFastBB(SuccessorBB); + QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB); + IRBuilder<> Builder(MainBB, MainBB->end()); + Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor); + Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB); + return Result; } else { - // Replace all uses of rem instruction with remainder phi node - I->replaceAllUsesWith(Value.Remainder); + // General case. Create both slow and fast div/rem pairs and choose one of + // them at runtime. + + // Split the basic block before the div/rem. + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); + // Remove the unconditional branch from MainBB to SuccessorBB. + MainBB->getInstList().back().eraseFromParent(); + QuotRemWithBB Fast = createFastBB(SuccessorBB); + QuotRemWithBB Slow = createSlowBB(SuccessorBB); + QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); + Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend, + DivisorShort ? nullptr : Divisor); + IRBuilder<> Builder(MainBB, MainBB->end()); + Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); + return Result; } - - // Remove redundant operation - I->eraseFromParent(); - return true; } -// bypassSlowDivision - This optimization identifies DIV instructions in a BB -// that can be profitably bypassed and carried out with a shorter, faster -// divide. -bool llvm::bypassSlowDivision( - BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) { - DivCacheTy DivCache; +/// This optimization identifies DIV/REM instructions in a BB that can be +/// profitably bypassed and carried out with a shorter, faster divide. +bool llvm::bypassSlowDivision(BasicBlock *BB, + const BypassWidthsTy &BypassWidths) { + DivCacheTy PerBBDivCache; bool MadeChange = false; Instruction* Next = &*BB->begin(); @@ -231,42 +460,20 @@ bool llvm::bypassSlowDivision( Instruction* I = Next; Next = Next->getNextNode(); - // Get instruction details - unsigned Opcode = I->getOpcode(); - bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; - bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; - bool UseSignedOp = Opcode == Instruction::SDiv || - Opcode == Instruction::SRem; - - // Only optimize div or rem ops - if (!UseDivOp && !UseRemOp) - continue; - - // Skip division on vector types, only optimize integer instructions - if (!I->getType()->isIntegerTy()) - continue; - - // Get bitwidth of div/rem instruction - IntegerType *T = cast<IntegerType>(I->getType()); - unsigned int bitwidth = T->getBitWidth(); - - // Continue if bitwidth is not bypassed - DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); - if (BI == BypassWidths.end()) - continue; - - // Get type for div/rem instruction with bypass bitwidth - IntegerType *BT = IntegerType::get(I->getContext(), BI->second); - - MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache); + FastDivInsertionTask Task(I, BypassWidths); + if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { + I->replaceAllUsesWith(Replacement); + I->eraseFromParent(); + MadeChange = true; + } } // Above we eagerly create divs and rems, as pairs, so that we can efficiently // create divrem machine instructions. Now erase any unused divs / rems so we // don't leave extra instructions sitting around. - for (auto &KV : DivCache) - for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder}) - RecursivelyDeleteTriviallyDeadInstructions(Phi); + for (auto &KV : PerBBDivCache) + for (Value *V : {KV.second.Quotient, KV.second.Remainder}) + RecursivelyDeleteTriviallyDeadInstructions(V); return MadeChange; } diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 4d33e22..9c4e139 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" @@ -31,24 +30,38 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <map> using namespace llvm; /// See comments in Cloning.h. -BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, - ValueToValueMapTy &VMap, +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, - ClonedCodeInfo *CodeInfo) { + ClonedCodeInfo *CodeInfo, + DebugInfoFinder *DIFinder) { + DenseMap<const MDNode *, MDNode *> Cache; BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; - + Module *TheModule = F ? F->getParent() : nullptr; + // Loop over all instructions, and copy them over. for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { + + if (DIFinder && TheModule) { + if (auto *DDI = dyn_cast<DbgDeclareInst>(II)) + DIFinder->processDeclare(*TheModule, DDI); + else if (auto *DVI = dyn_cast<DbgValueInst>(II)) + DIFinder->processValue(*TheModule, DVI); + + if (auto DbgLoc = II->getDebugLoc()) + DIFinder->processLocation(*TheModule, DbgLoc.get()); + } + Instruction *NewInst = II->clone(); if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); @@ -90,9 +103,9 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, assert(VMap.count(&I) && "No mapping from source argument specified!"); #endif - // Copy all attributes other than those stored in the AttributeSet. We need - // to remap the parameter indices of the AttributeSet. - AttributeSet NewAttrs = NewFunc->getAttributes(); + // Copy all attributes other than those stored in the AttributeList. We need + // to remap the parameter indices of the AttributeList. + AttributeList NewAttrs = NewFunc->getAttributes(); NewFunc->copyAttributesFrom(OldFunc); NewFunc->setAttributes(NewAttrs); @@ -103,31 +116,54 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer)); - AttributeSet OldAttrs = OldFunc->getAttributes(); + SmallVector<AttributeSet, 4> NewArgAttrs(NewFunc->arg_size()); + AttributeList OldAttrs = OldFunc->getAttributes(); + // Clone any argument attributes that are present in the VMap. - for (const Argument &OldArg : OldFunc->args()) + for (const Argument &OldArg : OldFunc->args()) { if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) { - AttributeSet attrs = - OldAttrs.getParamAttributes(OldArg.getArgNo() + 1); - if (attrs.getNumSlots() > 0) - NewArg->addAttr(attrs); + NewArgAttrs[NewArg->getArgNo()] = + OldAttrs.getParamAttributes(OldArg.getArgNo()); } + } NewFunc->setAttributes( - NewFunc->getAttributes() - .addAttributes(NewFunc->getContext(), AttributeSet::ReturnIndex, - OldAttrs.getRetAttributes()) - .addAttributes(NewFunc->getContext(), AttributeSet::FunctionIndex, - OldAttrs.getFnAttributes())); + AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(), + OldAttrs.getRetAttributes(), NewArgAttrs)); + + bool MustCloneSP = + OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent(); + DISubprogram *SP = OldFunc->getSubprogram(); + if (SP) { + assert(!MustCloneSP || ModuleLevelChanges); + // Add mappings for some DebugInfo nodes that we don't want duplicated + // even if they're distinct. + auto &MD = VMap.MD(); + MD[SP->getUnit()].reset(SP->getUnit()); + MD[SP->getType()].reset(SP->getType()); + MD[SP->getFile()].reset(SP->getFile()); + // If we're not cloning into the same module, no need to clone the + // subprogram + if (!MustCloneSP) + MD[SP].reset(SP); + } SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; OldFunc->getAllMetadata(MDs); - for (auto MD : MDs) + for (auto MD : MDs) { NewFunc->addMetadata( MD.first, *MapMetadata(MD.second, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer)); + } + + // When we remap instructions, we want to avoid duplicating inlined + // DISubprograms, so record all subprograms we find as we duplicate + // instructions and then freeze them in the MD map. + // We also record information about dbg.value and dbg.declare to avoid + // duplicating the types. + DebugInfoFinder DIFinder; // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of @@ -138,7 +174,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! - BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, + SP ? &DIFinder : nullptr); // Add basic block mapping. VMap[&BB] = CBB; @@ -160,6 +197,16 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Returns.push_back(RI); } + for (DISubprogram *ISP : DIFinder.subprograms()) { + if (ISP != SP) { + VMap.MD()[ISP].reset(ISP); + } + } + + for (auto *Type : DIFinder.types()) { + VMap.MD()[Type].reset(Type); + } + // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = @@ -208,7 +255,7 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns, "", + CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "", CodeInfo); return NewF; @@ -247,7 +294,7 @@ namespace { void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, std::vector<const BasicBlock*> &ToClone){ - WeakVH &BBEntry = VMap[BB]; + WeakTrackingVH &BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; @@ -294,12 +341,13 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { // On the off-chance that this simplifies to an instruction in the old // function, map it back into the new function. - if (Value *MappedV = VMap.lookup(V)) - V = MappedV; + if (NewFunc != OldFunc) + if (Value *MappedV = VMap.lookup(V)) + V = MappedV; if (!NewInst->mayHaveSideEffects()) { VMap[&*II] = V; - delete NewInst; + NewInst->deleteValue(); continue; } } @@ -353,7 +401,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, Cond = dyn_cast_or_null<ConstantInt>(V); } if (Cond) { // Constant fold to uncond branch! - SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond); + SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond); BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor()); VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); @@ -549,7 +597,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the - // WeakVH in the VMap. Notably, we rely on that so that if we coalesce + // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). @@ -747,3 +795,40 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, return NewLoop; } + +/// \brief Duplicate non-Phi instructions from the beginning of block up to +/// StopAt instruction into a split block between BB and its predecessor. +BasicBlock * +llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB, + Instruction *StopAt, + ValueToValueMapTy &ValueMapping) { + // We are going to have to map operands from the original BB block to the new + // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to + // account for entry from PredBB. + BasicBlock::iterator BI = BB->begin(); + for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) + ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); + + BasicBlock *NewBB = SplitEdge(PredBB, BB); + NewBB->setName(PredBB->getName() + ".split"); + Instruction *NewTerm = NewBB->getTerminator(); + + // Clone the non-phi instructions of BB into NewBB, keeping track of the + // mapping and using it to remap operands in the cloned instructions. + for (; StopAt != &*BI; ++BI) { + Instruction *New = BI->clone(); + New->setName(BI->getName()); + New->insertBefore(NewTerm); + ValueMapping[&*BI] = New; + + // Remap operands to patch up intra-block references. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { + auto I = ValueMapping.find(Inst); + if (I != ValueMapping.end()) + New->setOperand(i, I->second); + } + } + + return NewBB; +} diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp index 7ebeb61..e5392b5 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -12,14 +12,23 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm-c/Core.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm-c/Core.h" using namespace llvm; +static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) { + const Comdat *SC = Src->getComdat(); + if (!SC) + return; + Comdat *DC = Dst->getParent()->getOrInsertComdat(SC->getName()); + DC->setSelectionKind(SC->getSelectionKind()); + Dst->setComdat(DC); +} + /// This is not as easy as it might seem because we have to worry about making /// copies of global variables and functions, and making their (initializers and /// references, respectively) refer to the right globals. @@ -87,7 +96,7 @@ std::unique_ptr<Module> llvm::CloneModule( else GV = new GlobalVariable( *New, I->getValueType(), false, GlobalValue::ExternalLinkage, - (Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr, + nullptr, I->getName(), nullptr, I->getThreadLocalMode(), I->getType()->getAddressSpace()); VMap[&*I] = GV; // We do not copy attributes (mainly because copying between different @@ -123,7 +132,10 @@ std::unique_ptr<Module> llvm::CloneModule( SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; I->getAllMetadata(MDs); for (auto MD : MDs) - GV->addMetadata(MD.first, *MapMetadata(MD.second, VMap)); + GV->addMetadata(MD.first, + *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs)); + + copyComdat(GV, &*I); } // Similarly, copy over function bodies now... @@ -153,6 +165,8 @@ std::unique_ptr<Module> llvm::CloneModule( if (I.hasPersonalityFn()) F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap)); + + copyComdat(F, &I); } // And aliases diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp index 60ae374..d9294c4 100644 --- a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -73,17 +73,17 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred, default: return false; case ICmpInst::ICMP_SLT: - // X < 0 is equivalent to (X & SignBit) != 0. + // X < 0 is equivalent to (X & SignMask) != 0. if (!C->isZero()) return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SGT: - // X > -1 is equivalent to (X & SignBit) == 0. - if (!C->isAllOnesValue()) + // X > -1 is equivalent to (X & SignMask) == 0. + if (!C->isMinusOne()) return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULT: diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index c514c9c..1189714 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" @@ -58,6 +59,33 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { // Landing pads must be in the function where they were inserted for cleanup. if (BB.isEHPad()) return false; + // taking the address of a basic block moved to another function is illegal + if (BB.hasAddressTaken()) + return false; + + // don't hoist code that uses another basicblock address, as it's likely to + // lead to unexpected behavior, like cross-function jumps + SmallPtrSet<User const *, 16> Visited; + SmallVector<User const *, 16> ToVisit; + + for (Instruction const &Inst : BB) + ToVisit.push_back(&Inst); + + while (!ToVisit.empty()) { + User const *Curr = ToVisit.pop_back_val(); + if (!Visited.insert(Curr).second) + continue; + if (isa<BlockAddress const>(Curr)) + return false; // even a reference to self is likely to be not compatible + + if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB) + continue; + + for (auto const &U : Curr->operands()) { + if (auto *UU = dyn_cast<User>(U)) + ToVisit.push_back(UU); + } + } // Don't hoist code containing allocas, invokes, or vastarts. for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { @@ -73,24 +101,26 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { } /// \brief Build a set of blocks to extract if the input blocks are viable. -template <typename IteratorT> -static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin, - IteratorT BBEnd) { +static SetVector<BasicBlock *> +buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) { + assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector<BasicBlock *> Result; - assert(BBBegin != BBEnd); - // Loop over the blocks, adding them to our set-vector, and aborting with an // empty set if we encounter invalid blocks. - do { - if (!Result.insert(*BBBegin)) - llvm_unreachable("Repeated basic blocks in extraction input"); + for (BasicBlock *BB : BBs) { - if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) { + // If this block is dead, don't process it. + if (DT && !DT->isReachableFromEntry(BB)) + continue; + + if (!Result.insert(BB)) + llvm_unreachable("Repeated basic blocks in extraction input"); + if (!CodeExtractor::isBlockValidForExtraction(*BB)) { Result.clear(); return Result; } - } while (++BBBegin != BBEnd); + } #ifndef NDEBUG for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()), @@ -106,49 +136,19 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin, return Result; } -/// \brief Helper to call buildExtractionBlockSet with an ArrayRef. -static SetVector<BasicBlock *> -buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) { - return buildExtractionBlockSet(BBs.begin(), BBs.end()); -} - -/// \brief Helper to call buildExtractionBlockSet with a RegionNode. -static SetVector<BasicBlock *> -buildExtractionBlockSet(const RegionNode &RN) { - if (!RN.isSubRegion()) - // Just a single BasicBlock. - return buildExtractionBlockSet(RN.getNodeAs<BasicBlock>()); - - const Region &R = *RN.getNodeAs<Region>(); - - return buildExtractionBlockSet(R.block_begin(), R.block_end()); -} - -CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs, - BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) - : DT(nullptr), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} - CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {} + BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())), + BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)), NumExitBlocks(~0U) {} -CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN, - bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) - : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {} - /// definedInRegion - Return true if the specified value is defined in the /// extracted region. static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) { @@ -169,16 +169,255 @@ static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) { return false; } -void CodeExtractor::findInputsOutputs(ValueSet &Inputs, - ValueSet &Outputs) const { +static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) { + BasicBlock *CommonExitBlock = nullptr; + auto hasNonCommonExitSucc = [&](BasicBlock *Block) { + for (auto *Succ : successors(Block)) { + // Internal edges, ok. + if (Blocks.count(Succ)) + continue; + if (!CommonExitBlock) { + CommonExitBlock = Succ; + continue; + } + if (CommonExitBlock == Succ) + continue; + + return true; + } + return false; + }; + + if (any_of(Blocks, hasNonCommonExitSucc)) + return nullptr; + + return CommonExitBlock; +} + +bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( + Instruction *Addr) const { + AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets()); + Function *Func = (*Blocks.begin())->getParent(); + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + for (Instruction &II : BB) { + + if (isa<DbgInfoIntrinsic>(II)) + continue; + + unsigned Opcode = II.getOpcode(); + Value *MemAddr = nullptr; + switch (Opcode) { + case Instruction::Store: + case Instruction::Load: { + if (Opcode == Instruction::Store) { + StoreInst *SI = cast<StoreInst>(&II); + MemAddr = SI->getPointerOperand(); + } else { + LoadInst *LI = cast<LoadInst>(&II); + MemAddr = LI->getPointerOperand(); + } + // Global variable can not be aliased with locals. + if (dyn_cast<Constant>(MemAddr)) + break; + Value *Base = MemAddr->stripInBoundsConstantOffsets(); + if (!dyn_cast<AllocaInst>(Base) || Base == AI) + return false; + break; + } + default: { + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || + IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + break; + return false; + } + // Treat all the other cases conservatively if it has side effects. + if (II.mayHaveSideEffects()) + return false; + } + } + } + } + + return true; +} + +BasicBlock * +CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { + BasicBlock *SinglePredFromOutlineRegion = nullptr; + assert(!Blocks.count(CommonExitBlock) && + "Expect a block outside the region!"); + for (auto *Pred : predecessors(CommonExitBlock)) { + if (!Blocks.count(Pred)) + continue; + if (!SinglePredFromOutlineRegion) { + SinglePredFromOutlineRegion = Pred; + } else if (SinglePredFromOutlineRegion != Pred) { + SinglePredFromOutlineRegion = nullptr; + break; + } + } + + if (SinglePredFromOutlineRegion) + return SinglePredFromOutlineRegion; + +#ifndef NDEBUG + auto getFirstPHI = [](BasicBlock *BB) { + BasicBlock::iterator I = BB->begin(); + PHINode *FirstPhi = nullptr; + while (I != BB->end()) { + PHINode *Phi = dyn_cast<PHINode>(I); + if (!Phi) + break; + if (!FirstPhi) { + FirstPhi = Phi; + break; + } + } + return FirstPhi; + }; + // If there are any phi nodes, the single pred either exists or has already + // be created before code extraction. + assert(!getFirstPHI(CommonExitBlock) && "Phi not expected"); +#endif + + BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock( + CommonExitBlock->getFirstNonPHI()->getIterator()); + + for (auto *Pred : predecessors(CommonExitBlock)) { + if (Blocks.count(Pred)) + continue; + Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock); + } + // Now add the old exit block to the outline region. + Blocks.insert(CommonExitBlock); + return CommonExitBlock; +} + +void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, + BasicBlock *&ExitBlock) const { + Function *Func = (*Blocks.begin())->getParent(); + ExitBlock = getCommonExitBlock(Blocks); + + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + for (Instruction &II : BB) { + auto *AI = dyn_cast<AllocaInst>(&II); + if (!AI) + continue; + + // Find the pair of life time markers for address 'Addr' that are either + // defined inside the outline region or can legally be shrinkwrapped into + // the outline region. If there are not other untracked uses of the + // address, return the pair of markers if found; otherwise return a pair + // of nullptr. + auto GetLifeTimeMarkers = + [&](Instruction *Addr, bool &SinkLifeStart, + bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> { + Instruction *LifeStart = nullptr, *LifeEnd = nullptr; + + for (User *U : Addr->users()) { + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { + // Do not handle the case where AI has multiple start markers. + if (LifeStart) + return std::make_pair<Instruction *>(nullptr, nullptr); + LifeStart = IntrInst; + } + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { + if (LifeEnd) + return std::make_pair<Instruction *>(nullptr, nullptr); + LifeEnd = IntrInst; + } + continue; + } + // Find untracked uses of the address, bail. + if (!definedInRegion(Blocks, U)) + return std::make_pair<Instruction *>(nullptr, nullptr); + } + + if (!LifeStart || !LifeEnd) + return std::make_pair<Instruction *>(nullptr, nullptr); + + SinkLifeStart = !definedInRegion(Blocks, LifeStart); + HoistLifeEnd = !definedInRegion(Blocks, LifeEnd); + // Do legality Check. + if ((SinkLifeStart || HoistLifeEnd) && + !isLegalToShrinkwrapLifetimeMarkers(Addr)) + return std::make_pair<Instruction *>(nullptr, nullptr); + + // Check to see if we have a place to do hoisting, if not, bail. + if (HoistLifeEnd && !ExitBlock) + return std::make_pair<Instruction *>(nullptr, nullptr); + + return std::make_pair(LifeStart, LifeEnd); + }; + + bool SinkLifeStart = false, HoistLifeEnd = false; + auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd); + + if (Markers.first) { + if (SinkLifeStart) + SinkCands.insert(Markers.first); + SinkCands.insert(AI); + if (HoistLifeEnd) + HoistCands.insert(Markers.second); + continue; + } + + // Follow the bitcast. + Instruction *MarkerAddr = nullptr; + for (User *U : AI->users()) { + + if (U->stripInBoundsConstantOffsets() == AI) { + SinkLifeStart = false; + HoistLifeEnd = false; + Instruction *Bitcast = cast<Instruction>(U); + Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd); + if (Markers.first) { + MarkerAddr = Bitcast; + continue; + } + } + + // Found unknown use of AI. + if (!definedInRegion(Blocks, U)) { + MarkerAddr = nullptr; + break; + } + } + + if (MarkerAddr) { + if (SinkLifeStart) + SinkCands.insert(Markers.first); + if (!definedInRegion(Blocks, MarkerAddr)) + SinkCands.insert(MarkerAddr); + SinkCands.insert(AI); + if (HoistLifeEnd) + HoistCands.insert(Markers.second); + } + } + } +} + +void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, + const ValueSet &SinkCands) const { + for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; - ++OI) - if (definedInCaller(Blocks, *OI)) - Inputs.insert(*OI); + ++OI) { + Value *V = *OI; + if (!SinkCands.count(V) && definedInCaller(Blocks, V)) + Inputs.insert(V); + } for (User *U : II.users()) if (!definedInRegion(Blocks, U)) { @@ -218,9 +457,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. - BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator(); - BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, - Header->getName()+".ce"); + BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. @@ -229,11 +466,6 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { Blocks.insert(NewBB); Header = NewBB; - // Okay, update dominator sets. The blocks that dominate the new one are the - // blocks that dominate TIBB plus the new block itself. - if (DT) - DT->splitBlock(NewBB); - // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { @@ -248,12 +480,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + BasicBlock::iterator AfterPHIs; for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName() + ".ce", &NewBB->front()); + PN->replaceAllUsesWith(NewPN); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they @@ -362,9 +596,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // "target-features" attribute allowing it to be lowered. // FIXME: This should be changed to check to see if a specific // attribute can not be inherited. - AttributeSet OldFnAttrs = oldFunction->getAttributes().getFnAttributes(); - AttrBuilder AB(OldFnAttrs, AttributeSet::FunctionIndex); - for (auto Attr : AB.td_attrs()) + AttrBuilder AB(oldFunction->getAttributes().getFnAttributes()); + for (const auto &Attr : AB.td_attrs()) newFunction->addFnAttr(Attr.first, Attr.second); newFunction->getBasicBlockList().push_back(newRootNode); @@ -440,8 +673,10 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs std::vector<Value*> params, StructValues, ReloadOutputs, Reloads; - - LLVMContext &Context = newFunction->getContext(); + + Module *M = newFunction->getParent(); + LLVMContext &Context = M->getContext(); + const DataLayout &DL = M->getDataLayout(); // Add inputs as params, or to be filled into the struct for (Value *input : inputs) @@ -456,8 +691,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, StructValues.push_back(output); } else { AllocaInst *alloca = - new AllocaInst(output->getType(), nullptr, output->getName() + ".loc", - &codeReplacer->getParent()->front().front()); + new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), + nullptr, output->getName() + ".loc", + &codeReplacer->getParent()->front().front()); ReloadOutputs.push_back(alloca); params.push_back(alloca); } @@ -473,7 +709,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Allocate a struct at the beginning of this function StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - Struct = new AllocaInst(StructArgTy, nullptr, "structArg", + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", &codeReplacer->getParent()->front().front()); params.push_back(Struct); @@ -748,7 +985,8 @@ Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) return nullptr; - ValueSet inputs, outputs; + ValueSet inputs, outputs, SinkingCands, HoistingCands; + BasicBlock *CommonExit = nullptr; // Assumption: this is a single-entry code region, and the header is the first // block in the region. @@ -787,8 +1025,23 @@ Function *CodeExtractor::extractCodeRegion() { "newFuncRoot"); newFuncRoot->getInstList().push_back(BranchInst::Create(header)); + findAllocas(SinkingCands, HoistingCands, CommonExit); + assert(HoistingCands.empty() || CommonExit); + // Find inputs to, outputs from the code region. - findInputsOutputs(inputs, outputs); + findInputsOutputs(inputs, outputs, SinkingCands); + + // Now sink all instructions which only have non-phi uses inside the region + for (auto *II : SinkingCands) + cast<Instruction>(II)->moveBefore(*newFuncRoot, + newFuncRoot->getFirstInsertionPt()); + + if (!HoistingCands.empty()) { + auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction *TI = HoistToBlock->getTerminator(); + for (auto *II : HoistingCands) + cast<Instruction>(II)->moveBefore(TI); + } // Calculate the exit blocks for the extracted region and the total exit // weights for each of those blocks. @@ -863,12 +1116,6 @@ Function *CodeExtractor::extractCodeRegion() { } } - //cerr << "NEW FUNCTION: " << *newFunction; - // verifyFunction(*newFunction); - - // cerr << "OLD FUNCTION: " << *oldFunction; - // verifyFunction(*oldFunction); - DEBUG(if (verifyFunction(*newFunction)) report_fatal_error("verifyFunction failed!")); return newFunction; diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index 75a1dde..6d3d287 100644 --- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/CFG.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -28,15 +28,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, return nullptr; } + Function *F = I.getParent()->getParent(); + const DataLayout &DL = F->getParent()->getDataLayout(); + // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(I.getType(), nullptr, + Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr, I.getName()+".reg2mem", AllocaPoint); } else { - Function *F = I.getParent()->getParent(); - Slot = new AllocaInst(I.getType(), nullptr, I.getName() + ".reg2mem", - &F->getEntryBlock().front()); + Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr, + I.getName() + ".reg2mem", &F->getEntryBlock().front()); } // We cannot demote invoke instructions to the stack if their normal edge @@ -110,14 +112,17 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { return nullptr; } + const DataLayout &DL = P->getModule()->getDataLayout(); + // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(P->getType(), nullptr, + Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr, P->getName()+".reg2mem", AllocaPoint); } else { Function *F = P->getParent()->getParent(); - Slot = new AllocaInst(P->getType(), nullptr, P->getName() + ".reg2mem", + Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr, + P->getName() + ".reg2mem", &F->getEntryBlock().front()); } diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp index 8c23865..78d7474 100644 --- a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp +++ b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -67,8 +67,7 @@ IRBuilder<> *EscapeEnumerator::Next() { // Create a cleanup block. LLVMContext &C = F.getContext(); BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); - Type *ExnTy = - StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr); + Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); if (!F.hasPersonalityFn()) { Constant *PersFn = getDefaultPersonalityFn(F.getParent()); F.setPersonalityFn(PersFn); diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp index 4adf175..1328f2f 100644 --- a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -16,11 +16,12 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -401,7 +402,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Value *Ptr = PtrArg->stripPointerCasts(); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { Type *ElemTy = GV->getValueType(); - if (!Size->isAllOnesValue() && + if (!Size->isMinusOne() && Size->getValue().getLimitedValue() >= DL.getTypeStoreSize(ElemTy)) { Invariants.insert(GV); @@ -438,7 +439,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { + if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) { InstResult = C; DEBUG(dbgs() << "Constant folded function call. Result: " << *InstResult << "\n"); @@ -486,7 +487,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, ConstantInt *Val = dyn_cast<ConstantInt>(getVal(SI->getCondition())); if (!Val) return false; // Cannot determine. - NextBB = SI->findCaseValue(Val).getCaseSuccessor(); + NextBB = SI->findCaseValue(Val)->getCaseSuccessor(); } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { Value *Val = getVal(IBI->getAddress())->stripPointerCasts(); if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp index 7b96fbb..435eff3 100644 --- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" @@ -19,6 +18,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "flattencfg" diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 81a7c4c..4a2be3a 100644 --- a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -15,8 +15,8 @@ #include "llvm/Transforms/Utils/FunctionComparator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -74,14 +74,16 @@ int FunctionComparator::cmpMem(StringRef L, StringRef R) const { return L.compare(R); } -int FunctionComparator::cmpAttrs(const AttributeSet L, - const AttributeSet R) const { - if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) +int FunctionComparator::cmpAttrs(const AttributeList L, + const AttributeList R) const { + if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets())) return Res; - for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { - AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), - RE = R.end(i); + for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) { + AttributeSet LAS = L.getAttributes(i); + AttributeSet RAS = R.getAttributes(i); + AttributeSet::iterator LI = LAS.begin(), LE = LAS.end(); + AttributeSet::iterator RI = RAS.begin(), RE = RAS.end(); for (; LI != LE && RI != RE; ++LI, ++RI) { Attribute LA = *LI; Attribute RA = *RI; @@ -511,8 +513,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) return Res; - if (int Res = - cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope())) + if (int Res = cmpNumbers(LI->getSyncScopeID(), + cast<LoadInst>(R)->getSyncScopeID())) return Res; return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); @@ -527,7 +529,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) return Res; - return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope()); + return cmpNumbers(SI->getSyncScopeID(), + cast<StoreInst>(R)->getSyncScopeID()); } if (const CmpInst *CI = dyn_cast<CmpInst>(L)) return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); @@ -582,7 +585,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) return Res; - return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope()); + return cmpNumbers(FI->getSyncScopeID(), + cast<FenceInst>(R)->getSyncScopeID()); } if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { if (int Res = cmpNumbers(CXI->isVolatile(), @@ -599,8 +603,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, cmpOrderings(CXI->getFailureOrdering(), cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) return Res; - return cmpNumbers(CXI->getSynchScope(), - cast<AtomicCmpXchgInst>(R)->getSynchScope()); + return cmpNumbers(CXI->getSyncScopeID(), + cast<AtomicCmpXchgInst>(R)->getSyncScopeID()); } if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { if (int Res = cmpNumbers(RMWI->getOperation(), @@ -612,8 +616,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(RMWI->getOrdering(), cast<AtomicRMWInst>(R)->getOrdering())) return Res; - return cmpNumbers(RMWI->getSynchScope(), - cast<AtomicRMWInst>(R)->getSynchScope()); + return cmpNumbers(RMWI->getSyncScopeID(), + cast<AtomicRMWInst>(R)->getSyncScopeID()); } if (const PHINode *PNL = dyn_cast<PHINode>(L)) { const PHINode *PNR = cast<PHINode>(R); diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 9844190..a98d072 100644 --- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" using namespace llvm; @@ -21,11 +21,11 @@ using namespace llvm; /// Checks if we should import SGV as a definition, otherwise import as a /// declaration. bool FunctionImportGlobalProcessing::doImportAsDefinition( - const GlobalValue *SGV, DenseSet<const GlobalValue *> *GlobalsToImport) { + const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) { // For alias, we tie the definition to the base object. Extract it and recurse if (auto *GA = dyn_cast<GlobalAlias>(SGV)) { - if (GA->hasWeakAnyLinkage()) + if (GA->isInterposable()) return false; const GlobalObject *GO = GA->getBaseObject(); if (!GO->hasLinkOnceODRLinkage()) @@ -34,7 +34,7 @@ bool FunctionImportGlobalProcessing::doImportAsDefinition( GO, GlobalsToImport); } // Only import the globals requested for importing. - if (GlobalsToImport->count(SGV)) + if (GlobalsToImport->count(const_cast<GlobalValue *>(SGV))) return true; // Otherwise no. return false; @@ -57,7 +57,8 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal( return false; if (isPerformingImport()) { - assert((!GlobalsToImport->count(SGV) || !isNonRenamableLocal(*SGV)) && + assert((!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)) || + !isNonRenamableLocal(*SGV)) && "Attempting to promote non-renamable local"); // We don't know for sure yet if we are importing this value (as either // a reference or a def), since we are simply walking all values in the @@ -254,9 +255,8 @@ bool FunctionImportGlobalProcessing::run() { return false; } -bool llvm::renameModuleForThinLTO( - Module &M, const ModuleSummaryIndex &Index, - DenseSet<const GlobalValue *> *GlobalsToImport) { +bool llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index, + SetVector<GlobalValue *> *GlobalsToImport) { FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport); return ThinLTOProcessing.run(); } diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp index 74ebcda..245fefb 100644 --- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -7,12 +7,25 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Transforms/Utils/GlobalStatus.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> using namespace llvm; @@ -175,13 +188,9 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, return false; } +GlobalStatus::GlobalStatus() = default; + bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { SmallPtrSet<const PHINode *, 16> PhiUsers; return analyzeGlobalAux(V, GS, PhiUsers); } - -GlobalStatus::GlobalStatus() - : IsCompared(false), IsLoaded(false), StoredType(NotStored), - StoredOnceValue(nullptr), AccessingFunction(nullptr), - HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), - Ordering(AtomicOrdering::NotAtomic) {} diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp index ed018bb..b8c12ad 100644 --- a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp @@ -62,6 +62,8 @@ void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller, void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) { ModuleName = M.getName(); for (const auto &F : M.functions()) { + if (F.isDeclaration()) + continue; AllFunctions++; ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr); } diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index a40079c..2a18c14 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -20,19 +19,21 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -40,8 +41,9 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; @@ -1107,26 +1109,23 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { bool DTCalculated = false; Function *CalledFunc = CS.getCalledFunction(); - for (Function::arg_iterator I = CalledFunc->arg_begin(), - E = CalledFunc->arg_end(); - I != E; ++I) { - unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0; - if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) { + for (Argument &Arg : CalledFunc->args()) { + unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0; + if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) { if (!DTCalculated) { - DT.recalculate(const_cast<Function&>(*CS.getInstruction()->getParent() - ->getParent())); + DT.recalculate(*CS.getCaller()); DTCalculated = true; } // If we can already prove the asserted alignment in the context of the // caller, then don't bother inserting the assumption. - Value *Arg = CS.getArgument(I->getArgNo()); - if (getKnownAlignment(Arg, DL, CS.getInstruction(), AC, &DT) >= Align) + Value *ArgVal = CS.getArgument(Arg.getArgNo()); + if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align) continue; - CallInst *NewAssumption = IRBuilder<>(CS.getInstruction()) - .CreateAlignmentAssumption(DL, Arg, Align); - AC->registerAssumption(NewAssumption); + CallInst *NewAsmp = IRBuilder<>(CS.getInstruction()) + .CreateAlignmentAssumption(DL, ArgVal, Align); + AC->registerAssumption(NewAsmp); } } } @@ -1140,7 +1139,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, ValueToValueMapTy &VMap, InlineFunctionInfo &IFI) { CallGraph &CG = *IFI.CG; - const Function *Caller = CS.getInstruction()->getParent()->getParent(); + const Function *Caller = CS.getCaller(); const Function *Callee = CS.getCalledFunction(); CallGraphNode *CalleeNode = CG[Callee]; CallGraphNode *CallerNode = CG[Caller]; @@ -1225,7 +1224,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, PointerType *ArgTy = cast<PointerType>(Arg->getType()); Type *AggTy = ArgTy->getElementType(); - Function *Caller = TheCall->getParent()->getParent(); + Function *Caller = TheCall->getFunction(); + const DataLayout &DL = Caller->getParent()->getDataLayout(); // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and @@ -1239,31 +1239,30 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, AssumptionCache *AC = IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; - const DataLayout &DL = Caller->getParent()->getDataLayout(); // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >= ByValAlignment) return Arg; - + // Otherwise, we have to make a memcpy to get a safe alignment. This is bad // for code quality, but rarely happens and is required for correctness. } // Create the alloca. If we have DataLayout, use nice alignment. - unsigned Align = - Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy); + unsigned Align = DL.getPrefTypeAlignment(AggTy); // If the byval had an alignment specified, we *must* use at least that // alignment, as it is required by the byval argument (and uses of the // pointer inside the callee). Align = std::max(Align, ByValAlignment); - - Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(), + + Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(), + nullptr, Align, Arg->getName(), &*Caller->begin()->begin()); IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); - + // Uses of the argument in the function should use our new alloca // instead. return NewAlloca; @@ -1303,41 +1302,6 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { return false; } -/// Rebuild the entire inlined-at chain for this instruction so that the top of -/// the chain now is inlined-at the new call site. -static DebugLoc -updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode, - LLVMContext &Ctx, - DenseMap<const DILocation *, DILocation *> &IANodes) { - SmallVector<DILocation *, 3> InlinedAtLocations; - DILocation *Last = InlinedAtNode; - DILocation *CurInlinedAt = DL; - - // Gather all the inlined-at nodes - while (DILocation *IA = CurInlinedAt->getInlinedAt()) { - // Skip any we've already built nodes for - if (DILocation *Found = IANodes[IA]) { - Last = Found; - break; - } - - InlinedAtLocations.push_back(IA); - CurInlinedAt = IA; - } - - // Starting from the top, rebuild the nodes to point to the new inlined-at - // location (then rebuilding the rest of the chain behind it) and update the - // map of already-constructed inlined-at nodes. - for (const DILocation *MD : reverse(InlinedAtLocations)) { - Last = IANodes[MD] = DILocation::getDistinct( - Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); - } - - // And finally create the normal location for this instruction, referring to - // the new inlined-at chain. - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last); -} - /// Return the result of AI->isStaticAlloca() if AI were moved to the entry /// block. Allocas used in inalloca calls and allocas of dynamic array size /// cannot be static. @@ -1365,14 +1329,16 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, // Cache the inlined-at nodes as they're built so they are reused, without // this every instruction's inlined-at chain would become distinct from each // other. - DenseMap<const DILocation *, DILocation *> IANodes; + DenseMap<const MDNode *, MDNode *> IANodes; for (; FI != Fn->end(); ++FI) { for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { if (DebugLoc DL = BI->getDebugLoc()) { - BI->setDebugLoc( - updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); + auto IA = DebugLoc::appendInlinedAt(DL, InlinedAtNode, BI->getContext(), + IANodes); + auto IDL = DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), IA); + BI->setDebugLoc(IDL); continue; } @@ -1393,6 +1359,91 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, } } } +/// Update the block frequencies of the caller after a callee has been inlined. +/// +/// Each block cloned into the caller has its block frequency scaled by the +/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of +/// callee's entry block gets the same frequency as the callsite block and the +/// relative frequencies of all cloned blocks remain the same after cloning. +static void updateCallerBFI(BasicBlock *CallSiteBlock, + const ValueToValueMapTy &VMap, + BlockFrequencyInfo *CallerBFI, + BlockFrequencyInfo *CalleeBFI, + const BasicBlock &CalleeEntryBlock) { + SmallPtrSet<BasicBlock *, 16> ClonedBBs; + for (auto const &Entry : VMap) { + if (!isa<BasicBlock>(Entry.first) || !Entry.second) + continue; + auto *OrigBB = cast<BasicBlock>(Entry.first); + auto *ClonedBB = cast<BasicBlock>(Entry.second); + uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency(); + if (!ClonedBBs.insert(ClonedBB).second) { + // Multiple blocks in the callee might get mapped to one cloned block in + // the caller since we prune the callee as we clone it. When that happens, + // we want to use the maximum among the original blocks' frequencies. + uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency(); + if (NewFreq > Freq) + Freq = NewFreq; + } + CallerBFI->setBlockFreq(ClonedBB, Freq); + } + BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock)); + CallerBFI->setBlockFreqAndScale( + EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(), + ClonedBBs); +} + +/// Update the branch metadata for cloned call instructions. +static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, + const Optional<uint64_t> &CalleeEntryCount, + const Instruction *TheCall, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *CallerBFI) { + if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1) + return; + Optional<uint64_t> CallSiteCount = + PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; + uint64_t CallCount = + std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, + CalleeEntryCount.getValue()); + + for (auto const &Entry : VMap) + if (isa<CallInst>(Entry.first)) + if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) + CI->updateProfWeight(CallCount, CalleeEntryCount.getValue()); + for (BasicBlock &BB : *Callee) + // No need to update the callsite if it is pruned during inlining. + if (VMap.count(&BB)) + for (Instruction &I : BB) + if (CallInst *CI = dyn_cast<CallInst>(&I)) + CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount, + CalleeEntryCount.getValue()); +} + +/// Update the entry count of callee after inlining. +/// +/// The callsite's block count is subtracted from the callee's function entry +/// count. +static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB, + Instruction *CallInst, Function *Callee, + ProfileSummaryInfo *PSI) { + // If the callee has a original count of N, and the estimated count of + // callsite is M, the new callee count is set to N - M. M is estimated from + // the caller's entry count, its entry block frequency and the block frequency + // of the callsite. + Optional<uint64_t> CalleeCount = Callee->getEntryCount(); + if (!CalleeCount.hasValue() || !PSI) + return; + Optional<uint64_t> CallCount = PSI->getProfileCount(CallInst, CallerBFI); + if (!CallCount.hasValue()) + return; + // Since CallSiteCount is an estimate, it could exceed the original callee + // count and has to be set to 0. + if (CallCount.getValue() > CalleeCount.getValue()) + Callee->setEntryCount(0); + else + Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue()); +} /// This function inlines the called function into the basic block of the /// caller. This returns false if it is not possible to inline this call. @@ -1405,13 +1456,13 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, AAResults *CalleeAAR, bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); - assert(TheCall->getParent() && TheCall->getParent()->getParent() && - "Instruction not in function!"); + assert(TheCall->getParent() && TheCall->getFunction() + && "Instruction not in function!"); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); - - const Function *CalledFunc = CS.getCalledFunction(); + + Function *CalledFunc = CS.getCalledFunction(); if (!CalledFunc || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; @@ -1548,7 +1599,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; - for (Function::const_arg_iterator I = CalledFunc->arg_begin(), + for (Function::arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; @@ -1558,7 +1609,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, - CalledFunc->getParamAlignment(ArgNo+1)); + CalledFunc->getParamAlignment(ArgNo)); if (ActualArg != *AI) ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); } @@ -1578,10 +1629,19 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, TheCall); - // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; + if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) + // Update the BFI of blocks cloned into the caller. + updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, + CalledFunc->front()); + + updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, + IFI.PSI, IFI.CallerBFI); + // Update the profile count of callee. + updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI); + // Inject byval arguments initialization. for (std::pair<Value*, Value*> &Init : ByValInit) HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), @@ -2087,6 +2147,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CalledFunc->getName() + ".exit"); } + if (IFI.CallerBFI) { + // Copy original BB's block frequency to AfterCallBB + IFI.CallerBFI->setBlockFreq( + AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency()); + } + // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // @@ -2206,7 +2272,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, AssumptionCache *AC = IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; auto &DL = Caller->getParent()->getDataLayout(); - if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, AC)) { + if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp index 8a1973d..23ec45e 100644 --- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -14,10 +14,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; namespace { @@ -26,16 +26,15 @@ namespace { InstNamer() : FunctionPass(ID) { initializeInstNamerPass(*PassRegistry::getPassRegistry()); } - + void getAnalysisUsage(AnalysisUsage &Info) const override { Info.setPreservesAll(); } bool runOnFunction(Function &F) override { - for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); - AI != AE; ++AI) - if (!AI->hasName() && !AI->getType()->isVoidTy()) - AI->setName("arg"); + for (auto &Arg : F.args()) + if (!Arg.hasName()) + Arg.setName("arg"); for (BasicBlock &BB : F) { if (!BB.hasName()) @@ -48,11 +47,11 @@ namespace { return true; } }; - + char InstNamer::ID = 0; } -INITIALIZE_PASS(InstNamer, "instnamer", +INITIALIZE_PASS(InstNamer, "instnamer", "Assign names to anonymous instructions", false, false) char &llvm::InstructionNamerID = InstNamer::ID; //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index 68c6b74..089f2b5 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -85,9 +85,11 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, UsesToRewrite.clear(); Instruction *I = Worklist.pop_back_val(); + assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist"); BasicBlock *InstBB = I->getParent(); Loop *L = LI.getLoopFor(InstBB); - if (!LoopExitBlocks.count(L)) + assert(L && "Instruction belongs to a BB that's not part of a loop"); + if (!LoopExitBlocks.count(L)) L->getExitBlocks(LoopExitBlocks[L]); assert(LoopExitBlocks.count(L)); const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L]; @@ -95,17 +97,10 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, if (ExitBlocks.empty()) continue; - // Tokens cannot be used in PHI nodes, so we skip over them. - // We can run into tokens which are live out of a loop with catchswitch - // instructions in Windows EH if the catchswitch has one catchpad which - // is inside the loop and another which is not. - if (I->getType()->isTokenTy()) - continue; - for (Use &U : I->uses()) { Instruction *User = cast<Instruction>(U.getUser()); BasicBlock *UserBB = User->getParent(); - if (PHINode *PN = dyn_cast<PHINode>(User)) + if (auto *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(U); if (InstBB != UserBB && !L->contains(UserBB)) @@ -123,7 +118,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, // DomBB dominates the value, so adjust DomBB to the normal destination // block, which is effectively where the value is first usable. BasicBlock *DomBB = InstBB; - if (InvokeInst *Inv = dyn_cast<InvokeInst>(I)) + if (auto *Inv = dyn_cast<InvokeInst>(I)) DomBB = Inv->getNormalDest(); DomTreeNode *DomNode = DT.getNode(DomBB); @@ -188,7 +183,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, // block. Instruction *User = cast<Instruction>(UseToRewrite->getUser()); BasicBlock *UserBB = User->getParent(); - if (PHINode *PN = dyn_cast<PHINode>(User)) + if (auto *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(*UseToRewrite); if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { @@ -213,13 +208,9 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, // Post process PHI instructions that were inserted into another disjoint // loop and update their exits properly. - for (auto *PostProcessPN : PostProcessPHIs) { - if (PostProcessPN->use_empty()) - continue; - - // Reprocess each PHI instruction. - Worklist.push_back(PostProcessPN); - } + for (auto *PostProcessPN : PostProcessPHIs) + if (!PostProcessPN->use_empty()) + Worklist.push_back(PostProcessPN); // Keep track of PHI nodes that we want to remove because they did not have // any uses rewritten. @@ -237,40 +228,75 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, return Changed; } -/// Return true if the specified block dominates at least -/// one of the blocks in the specified list. -static bool -blockDominatesAnExit(BasicBlock *BB, - DominatorTree &DT, - const SmallVectorImpl<BasicBlock *> &ExitBlocks) { - DomTreeNode *DomNode = DT.getNode(BB); - return any_of(ExitBlocks, [&](BasicBlock *EB) { - return DT.dominates(DomNode, DT.getNode(EB)); - }); +// Compute the set of BasicBlocks in the loop `L` dominating at least one exit. +static void computeBlocksDominatingExits( + Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks, + SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) { + SmallVector<BasicBlock *, 8> BBWorklist; + + // We start from the exit blocks, as every block trivially dominates itself + // (not strictly). + for (BasicBlock *BB : ExitBlocks) + BBWorklist.push_back(BB); + + while (!BBWorklist.empty()) { + BasicBlock *BB = BBWorklist.pop_back_val(); + + // Check if this is a loop header. If this is the case, we're done. + if (L.getHeader() == BB) + continue; + + // Otherwise, add its immediate predecessor in the dominator tree to the + // worklist, unless we visited it already. + BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock(); + + // Exit blocks can have an immediate dominator not beloinging to the + // loop. For an exit block to be immediately dominated by another block + // outside the loop, it implies not all paths from that dominator, to the + // exit block, go through the loop. + // Example: + // + // |---- A + // | | + // | B<-- + // | | | + // |---> C -- + // | + // D + // + // C is the exit block of the loop and it's immediately dominated by A, + // which doesn't belong to the loop. + if (!L.contains(IDomBB)) + continue; + + if (BlocksDominatingExits.insert(IDomBB)) + BBWorklist.push_back(IDomBB); + } } bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE) { bool Changed = false; - // Get the set of exiting blocks. SmallVector<BasicBlock *, 8> ExitBlocks; L.getExitBlocks(ExitBlocks); - if (ExitBlocks.empty()) return false; + SmallSetVector<BasicBlock *, 8> BlocksDominatingExits; + + // We want to avoid use-scanning leveraging dominance informations. + // If a block doesn't dominate any of the loop exits, the none of the values + // defined in the loop can be used outside. + // We compute the set of blocks fullfilling the conditions in advance + // walking the dominator tree upwards until we hit a loop header. + computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits); + SmallVector<Instruction *, 8> Worklist; // Look at all the instructions in the loop, checking to see if they have uses // outside the loop. If so, put them into the worklist to rewrite those uses. - for (BasicBlock *BB : L.blocks()) { - // For large loops, avoid use-scanning by using dominance information: In - // particular, if a block does not dominate any of the loop exits, then none - // of the values defined in the block could be used outside the loop. - if (!blockDominatesAnExit(BB, DT, ExitBlocks)) - continue; - + for (BasicBlock *BB : BlocksDominatingExits) { for (Instruction &I : *BB) { // Reject two common cases fast: instructions with no uses (like stores) // and instructions with one use that is in the same block as this. @@ -279,6 +305,13 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, !isa<PHINode>(I.user_back()))) continue; + // Tokens cannot be used in PHI nodes, so we skip over them. + // We can run into tokens which are live out of a loop with catchswitch + // instructions in Windows EH if the catchswitch has one catchpad which + // is inside the loop and another which is not. + if (I.getType()->isTokenTy()) + continue; + Worklist.push_back(&I); } } @@ -395,8 +428,8 @@ PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) { if (!formLCSSAOnAllLoops(&LI, DT, SE)) return PreservedAnalyses::all(); - // FIXME: This should also 'preserve the CFG'. PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); PA.preserve<BasicAA>(); PA.preserve<GlobalsAA>(); PA.preserve<SCEVAA>(); diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index d97cd75..42aca75 100644 --- a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -48,16 +49,6 @@ using namespace llvm; STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted"); STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted"); -static cl::opt<bool> LibCallsShrinkWrapDoDomainError( - "libcalls-shrinkwrap-domain-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with domain errors")); -static cl::opt<bool> LibCallsShrinkWrapDoRangeError( - "libcalls-shrinkwrap-range-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with range errors")); -static cl::opt<bool> LibCallsShrinkWrapDoPoleError( - "libcalls-shrinkwrap-pole-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with pole errors")); - namespace { class LibCallsShrinkWrapLegacyPass : public FunctionPass { public: @@ -82,10 +73,11 @@ INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap", namespace { class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> { public: - LibCallsShrinkWrap(const TargetLibraryInfo &TLI) : TLI(TLI), Changed(false){}; - bool isChanged() const { return Changed; } + LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT) + : TLI(TLI), DT(DT){}; void visitCallInst(CallInst &CI) { checkCandidate(CI); } - void perform() { + bool perform() { + bool Changed = false; for (auto &CI : WorkList) { DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName() << "\n"); @@ -94,18 +86,19 @@ public: DEBUG(dbgs() << "Transformed\n"); } } + return Changed; } private: bool perform(CallInst *CI); void checkCandidate(CallInst &CI); void shrinkWrapCI(CallInst *CI, Value *Cond); - bool performCallDomainErrorOnly(CallInst *CI, const LibFunc::Func &Func); - bool performCallErrors(CallInst *CI, const LibFunc::Func &Func); - bool performCallRangeErrorOnly(CallInst *CI, const LibFunc::Func &Func); - Value *generateOneRangeCond(CallInst *CI, const LibFunc::Func &Func); - Value *generateTwoRangeCond(CallInst *CI, const LibFunc::Func &Func); - Value *generateCondForPow(CallInst *CI, const LibFunc::Func &Func); + bool performCallDomainErrorOnly(CallInst *CI, const LibFunc &Func); + bool performCallErrors(CallInst *CI, const LibFunc &Func); + bool performCallRangeErrorOnly(CallInst *CI, const LibFunc &Func); + Value *generateOneRangeCond(CallInst *CI, const LibFunc &Func); + Value *generateTwoRangeCond(CallInst *CI, const LibFunc &Func); + Value *generateCondForPow(CallInst *CI, const LibFunc &Func); // Create an OR of two conditions. Value *createOrCond(CallInst *CI, CmpInst::Predicate Cmp, float Val, @@ -134,51 +127,51 @@ private: } const TargetLibraryInfo &TLI; + DominatorTree *DT; SmallVector<CallInst *, 16> WorkList; - bool Changed; }; } // end anonymous namespace // Perform the transformation to calls with errno set by domain error. bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI, - const LibFunc::Func &Func) { + const LibFunc &Func) { Value *Cond = nullptr; switch (Func) { - case LibFunc::acos: // DomainError: (x < -1 || x > 1) - case LibFunc::acosf: // Same as acos - case LibFunc::acosl: // Same as acos - case LibFunc::asin: // DomainError: (x < -1 || x > 1) - case LibFunc::asinf: // Same as asin - case LibFunc::asinl: // Same as asin + case LibFunc_acos: // DomainError: (x < -1 || x > 1) + case LibFunc_acosf: // Same as acos + case LibFunc_acosl: // Same as acos + case LibFunc_asin: // DomainError: (x < -1 || x > 1) + case LibFunc_asinf: // Same as asin + case LibFunc_asinl: // Same as asin { ++NumWrappedTwoCond; Cond = createOrCond(CI, CmpInst::FCMP_OLT, -1.0f, CmpInst::FCMP_OGT, 1.0f); break; } - case LibFunc::cos: // DomainError: (x == +inf || x == -inf) - case LibFunc::cosf: // Same as cos - case LibFunc::cosl: // Same as cos - case LibFunc::sin: // DomainError: (x == +inf || x == -inf) - case LibFunc::sinf: // Same as sin - case LibFunc::sinl: // Same as sin + case LibFunc_cos: // DomainError: (x == +inf || x == -inf) + case LibFunc_cosf: // Same as cos + case LibFunc_cosl: // Same as cos + case LibFunc_sin: // DomainError: (x == +inf || x == -inf) + case LibFunc_sinf: // Same as sin + case LibFunc_sinl: // Same as sin { ++NumWrappedTwoCond; Cond = createOrCond(CI, CmpInst::FCMP_OEQ, INFINITY, CmpInst::FCMP_OEQ, -INFINITY); break; } - case LibFunc::acosh: // DomainError: (x < 1) - case LibFunc::acoshf: // Same as acosh - case LibFunc::acoshl: // Same as acosh + case LibFunc_acosh: // DomainError: (x < 1) + case LibFunc_acoshf: // Same as acosh + case LibFunc_acoshl: // Same as acosh { ++NumWrappedOneCond; Cond = createCond(CI, CmpInst::FCMP_OLT, 1.0f); break; } - case LibFunc::sqrt: // DomainError: (x < 0) - case LibFunc::sqrtf: // Same as sqrt - case LibFunc::sqrtl: // Same as sqrt + case LibFunc_sqrt: // DomainError: (x < 0) + case LibFunc_sqrtf: // Same as sqrt + case LibFunc_sqrtl: // Same as sqrt { ++NumWrappedOneCond; Cond = createCond(CI, CmpInst::FCMP_OLT, 0.0f); @@ -193,31 +186,31 @@ bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI, // Perform the transformation to calls with errno set by range error. bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI, - const LibFunc::Func &Func) { + const LibFunc &Func) { Value *Cond = nullptr; switch (Func) { - case LibFunc::cosh: - case LibFunc::coshf: - case LibFunc::coshl: - case LibFunc::exp: - case LibFunc::expf: - case LibFunc::expl: - case LibFunc::exp10: - case LibFunc::exp10f: - case LibFunc::exp10l: - case LibFunc::exp2: - case LibFunc::exp2f: - case LibFunc::exp2l: - case LibFunc::sinh: - case LibFunc::sinhf: - case LibFunc::sinhl: { + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_coshl: + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_expl: + case LibFunc_exp10: + case LibFunc_exp10f: + case LibFunc_exp10l: + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinhl: { Cond = generateTwoRangeCond(CI, Func); break; } - case LibFunc::expm1: // RangeError: (709, inf) - case LibFunc::expm1f: // RangeError: (88, inf) - case LibFunc::expm1l: // RangeError: (11356, inf) + case LibFunc_expm1: // RangeError: (709, inf) + case LibFunc_expm1f: // RangeError: (88, inf) + case LibFunc_expm1l: // RangeError: (11356, inf) { Cond = generateOneRangeCond(CI, Func); break; @@ -231,63 +224,54 @@ bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI, // Perform the transformation to calls with errno set by combination of errors. bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, - const LibFunc::Func &Func) { + const LibFunc &Func) { Value *Cond = nullptr; switch (Func) { - case LibFunc::atanh: // DomainError: (x < -1 || x > 1) + case LibFunc_atanh: // DomainError: (x < -1 || x > 1) // PoleError: (x == -1 || x == 1) // Overall Cond: (x <= -1 || x >= 1) - case LibFunc::atanhf: // Same as atanh - case LibFunc::atanhl: // Same as atanh + case LibFunc_atanhf: // Same as atanh + case LibFunc_atanhl: // Same as atanh { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedTwoCond; Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f); break; } - case LibFunc::log: // DomainError: (x < 0) + case LibFunc_log: // DomainError: (x < 0) // PoleError: (x == 0) // Overall Cond: (x <= 0) - case LibFunc::logf: // Same as log - case LibFunc::logl: // Same as log - case LibFunc::log10: // Same as log - case LibFunc::log10f: // Same as log - case LibFunc::log10l: // Same as log - case LibFunc::log2: // Same as log - case LibFunc::log2f: // Same as log - case LibFunc::log2l: // Same as log - case LibFunc::logb: // Same as log - case LibFunc::logbf: // Same as log - case LibFunc::logbl: // Same as log + case LibFunc_logf: // Same as log + case LibFunc_logl: // Same as log + case LibFunc_log10: // Same as log + case LibFunc_log10f: // Same as log + case LibFunc_log10l: // Same as log + case LibFunc_log2: // Same as log + case LibFunc_log2f: // Same as log + case LibFunc_log2l: // Same as log + case LibFunc_logb: // Same as log + case LibFunc_logbf: // Same as log + case LibFunc_logbl: // Same as log { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedOneCond; Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f); break; } - case LibFunc::log1p: // DomainError: (x < -1) + case LibFunc_log1p: // DomainError: (x < -1) // PoleError: (x == -1) // Overall Cond: (x <= -1) - case LibFunc::log1pf: // Same as log1p - case LibFunc::log1pl: // Same as log1p + case LibFunc_log1pf: // Same as log1p + case LibFunc_log1pl: // Same as log1p { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedOneCond; Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f); break; } - case LibFunc::pow: // DomainError: x < 0 and y is noninteger + case LibFunc_pow: // DomainError: x < 0 and y is noninteger // PoleError: x == 0 and y < 0 // RangeError: overflow or underflow - case LibFunc::powf: - case LibFunc::powl: { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError || - !LibCallsShrinkWrapDoRangeError) - return false; + case LibFunc_powf: + case LibFunc_powl: { Cond = generateCondForPow(CI, Func); if (Cond == nullptr) return false; @@ -313,7 +297,7 @@ void LibCallsShrinkWrap::checkCandidate(CallInst &CI) { if (!CI.use_empty()) return; - LibFunc::Func Func; + LibFunc Func; Function *Callee = CI.getCalledFunction(); if (!Callee) return; @@ -333,20 +317,20 @@ void LibCallsShrinkWrap::checkCandidate(CallInst &CI) { // Generate the upper bound condition for RangeError. Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI, - const LibFunc::Func &Func) { + const LibFunc &Func) { float UpperBound; switch (Func) { - case LibFunc::expm1: // RangeError: (709, inf) + case LibFunc_expm1: // RangeError: (709, inf) UpperBound = 709.0f; break; - case LibFunc::expm1f: // RangeError: (88, inf) + case LibFunc_expm1f: // RangeError: (88, inf) UpperBound = 88.0f; break; - case LibFunc::expm1l: // RangeError: (11356, inf) + case LibFunc_expm1l: // RangeError: (11356, inf) UpperBound = 11356.0f; break; default: - llvm_unreachable("Should be reach here"); + llvm_unreachable("Unhandled library call!"); } ++NumWrappedOneCond; @@ -355,62 +339,62 @@ Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI, // Generate the lower and upper bound condition for RangeError. Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI, - const LibFunc::Func &Func) { + const LibFunc &Func) { float UpperBound, LowerBound; switch (Func) { - case LibFunc::cosh: // RangeError: (x < -710 || x > 710) - case LibFunc::sinh: // Same as cosh + case LibFunc_cosh: // RangeError: (x < -710 || x > 710) + case LibFunc_sinh: // Same as cosh LowerBound = -710.0f; UpperBound = 710.0f; break; - case LibFunc::coshf: // RangeError: (x < -89 || x > 89) - case LibFunc::sinhf: // Same as coshf + case LibFunc_coshf: // RangeError: (x < -89 || x > 89) + case LibFunc_sinhf: // Same as coshf LowerBound = -89.0f; UpperBound = 89.0f; break; - case LibFunc::coshl: // RangeError: (x < -11357 || x > 11357) - case LibFunc::sinhl: // Same as coshl + case LibFunc_coshl: // RangeError: (x < -11357 || x > 11357) + case LibFunc_sinhl: // Same as coshl LowerBound = -11357.0f; UpperBound = 11357.0f; break; - case LibFunc::exp: // RangeError: (x < -745 || x > 709) + case LibFunc_exp: // RangeError: (x < -745 || x > 709) LowerBound = -745.0f; UpperBound = 709.0f; break; - case LibFunc::expf: // RangeError: (x < -103 || x > 88) + case LibFunc_expf: // RangeError: (x < -103 || x > 88) LowerBound = -103.0f; UpperBound = 88.0f; break; - case LibFunc::expl: // RangeError: (x < -11399 || x > 11356) + case LibFunc_expl: // RangeError: (x < -11399 || x > 11356) LowerBound = -11399.0f; UpperBound = 11356.0f; break; - case LibFunc::exp10: // RangeError: (x < -323 || x > 308) + case LibFunc_exp10: // RangeError: (x < -323 || x > 308) LowerBound = -323.0f; UpperBound = 308.0f; break; - case LibFunc::exp10f: // RangeError: (x < -45 || x > 38) + case LibFunc_exp10f: // RangeError: (x < -45 || x > 38) LowerBound = -45.0f; UpperBound = 38.0f; break; - case LibFunc::exp10l: // RangeError: (x < -4950 || x > 4932) + case LibFunc_exp10l: // RangeError: (x < -4950 || x > 4932) LowerBound = -4950.0f; UpperBound = 4932.0f; break; - case LibFunc::exp2: // RangeError: (x < -1074 || x > 1023) + case LibFunc_exp2: // RangeError: (x < -1074 || x > 1023) LowerBound = -1074.0f; UpperBound = 1023.0f; break; - case LibFunc::exp2f: // RangeError: (x < -149 || x > 127) + case LibFunc_exp2f: // RangeError: (x < -149 || x > 127) LowerBound = -149.0f; UpperBound = 127.0f; break; - case LibFunc::exp2l: // RangeError: (x < -16445 || x > 11383) + case LibFunc_exp2l: // RangeError: (x < -16445 || x > 11383) LowerBound = -16445.0f; UpperBound = 11383.0f; break; default: - llvm_unreachable("Should be reach here"); + llvm_unreachable("Unhandled library call!"); } ++NumWrappedTwoCond; @@ -434,9 +418,9 @@ Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI, // (i.e. we might invoke the calls that will not set the errno.). // Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, - const LibFunc::Func &Func) { - // FIXME: LibFunc::powf and powl TBD. - if (Func != LibFunc::pow) { + const LibFunc &Func) { + // FIXME: LibFunc_powf and powl TBD. + if (Func != LibFunc_pow) { DEBUG(dbgs() << "Not handled powf() and powl()\n"); return nullptr; } @@ -499,14 +483,17 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, // Wrap conditions that can potentially generate errno to the library call. void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) { - assert(Cond != nullptr && "hrinkWrapCI is not expecting an empty call inst"); + assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst"); MDNode *BranchWeights = MDBuilder(CI->getContext()).createBranchWeights(1, 2000); + TerminatorInst *NewInst = - SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights); + SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT); BasicBlock *CallBB = NewInst->getParent(); CallBB->setName("cdce.call"); - CallBB->getSingleSuccessor()->setName("cdce.end"); + BasicBlock *SuccBB = CallBB->getSingleSuccessor(); + assert(SuccBB && "The split block should have a single successor"); + SuccBB->setName("cdce.end"); CI->removeFromParent(); CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI); DEBUG(dbgs() << "== Basic Block After =="); @@ -516,38 +503,44 @@ void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) { // Perform the transformation to a single candidate. bool LibCallsShrinkWrap::perform(CallInst *CI) { - LibFunc::Func Func; + LibFunc Func; Function *Callee = CI->getCalledFunction(); assert(Callee && "perform() should apply to a non-empty callee"); TLI.getLibFunc(*Callee, Func); assert(Func && "perform() is not expecting an empty function"); - if (LibCallsShrinkWrapDoDomainError && performCallDomainErrorOnly(CI, Func)) - return true; - - if (LibCallsShrinkWrapDoRangeError && performCallRangeErrorOnly(CI, Func)) + if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func)) return true; - return performCallErrors(CI, Func); } void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); } -static bool runImpl(Function &F, const TargetLibraryInfo &TLI) { +static bool runImpl(Function &F, const TargetLibraryInfo &TLI, + DominatorTree *DT) { if (F.hasFnAttribute(Attribute::OptimizeForSize)) return false; - LibCallsShrinkWrap CCDCE(TLI); + LibCallsShrinkWrap CCDCE(TLI, DT); CCDCE.visit(F); - CCDCE.perform(); - return CCDCE.isChanged(); + bool Changed = CCDCE.perform(); + +// Verify the dominator after we've updated it locally. +#ifndef NDEBUG + if (DT) + DT->verifyDomTree(); +#endif + return Changed; } bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) { auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - return runImpl(F, TLI); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + return runImpl(F, TLI, DT); } namespace llvm { @@ -561,11 +554,12 @@ FunctionPass *createLibCallsShrinkWrapPass() { PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F, FunctionAnalysisManager &FAM) { auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); - bool Changed = runImpl(F, TLI); - if (!Changed) + auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); + if (!runImpl(F, TLI, DT)) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); PA.preserve<GlobalsAA>(); + PA.preserve<DominatorTreeAnalysis>(); return PA; } } diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 6e4174a..7461061 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -22,10 +22,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -45,6 +46,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -126,21 +128,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // If the default is unreachable, ignore it when searching for TheOnlyDest. if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) && SI->getNumCases() > 0) { - TheOnlyDest = SI->case_begin().getCaseSuccessor(); + TheOnlyDest = SI->case_begin()->getCaseSuccessor(); } // Figure out which case it goes to. - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) { + for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) { // Found case matching a constant operand? - if (i.getCaseValue() == CI) { - TheOnlyDest = i.getCaseSuccessor(); + if (i->getCaseValue() == CI) { + TheOnlyDest = i->getCaseSuccessor(); break; } // Check to see if this branch is going to the same place as the default // dest. If so, eliminate it as an explicit compare. - if (i.getCaseSuccessor() == DefaultDest) { + if (i->getCaseSuccessor() == DefaultDest) { MDNode *MD = SI->getMetadata(LLVMContext::MD_prof); unsigned NCases = SI->getNumCases(); // Fold the case metadata into the default if there will be any branches @@ -154,7 +155,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, Weights.push_back(CI->getValue().getZExtValue()); } // Merge weight of this case to the default weight. - unsigned idx = i.getCaseIndex(); + unsigned idx = i->getCaseIndex(); Weights[0] += Weights[idx+1]; // Remove weight for this case. std::swap(Weights[idx+1], Weights.back()); @@ -165,15 +166,19 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, } // Remove this entry. DefaultDest->removePredecessor(SI->getParent()); - SI->removeCase(i); - --i; --e; + i = SI->removeCase(i); + e = SI->case_end(); continue; } // Otherwise, check to see if the switch only branches to one destination. // We do this by reseting "TheOnlyDest" to null when we find two non-equal // destinations. - if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = nullptr; + if (i->getCaseSuccessor() != TheOnlyDest) + TheOnlyDest = nullptr; + + // Increment this iterator as we haven't removed the case. + ++i; } if (CI && !TheOnlyDest) { @@ -209,7 +214,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, if (SI->getNumCases() == 1) { // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. - SwitchInst::CaseIt FirstCase = SI->case_begin(); + auto FirstCase = *SI->case_begin(); Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), FirstCase.getCaseValue(), "cond"); @@ -287,7 +292,15 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, /// bool llvm::isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI) { - if (!I->use_empty() || isa<TerminatorInst>(I)) return false; + if (!I->use_empty()) + return false; + return wouldInstructionBeTriviallyDead(I, TLI); +} + +bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, + const TargetLibraryInfo *TLI) { + if (isa<TerminatorInst>(I)) + return false; // We don't want the landingpad-like instructions removed by anything this // general. @@ -307,7 +320,8 @@ bool llvm::isInstructionTriviallyDead(Instruction *I, return true; } - if (!I->mayHaveSideEffects()) return true; + if (!I->mayHaveSideEffects()) + return true; // Special case intrinsics that "may have side effects" but can be deleted // when dead. @@ -334,7 +348,8 @@ bool llvm::isInstructionTriviallyDead(Instruction *I, } } - if (isAllocLikeFn(I, TLI)) return true; + if (isAllocLikeFn(I, TLI)) + return true; if (CallInst *CI = isFreeCall(I, TLI)) if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) @@ -548,7 +563,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { // that can be removed. BB->removePredecessor(Pred, true); - WeakVH PhiIt = &BB->front(); + WeakTrackingVH PhiIt = &BB->front(); while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); Value *OldPhiIt = PhiIt; @@ -1023,17 +1038,15 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DominatorTree *DT) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT); - unsigned TrailZ = KnownZero.countTrailingOnes(); + KnownBits Known = computeKnownBits(V, DL, 0, AC, CxtI, DT); + unsigned TrailZ = Known.countMinTrailingZeros(); // Avoid trouble with ridiculously large TrailZ values, such as // those computed from a null pointer. TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); - unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + unsigned Align = 1u << std::min(Known.getBitWidth() - 1, TrailZ); // LLVM doesn't support alignments larger than this currently. Align = std::min(Align, +Value::MaximumAlignment); @@ -1069,17 +1082,17 @@ static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, } /// See if there is a dbg.value intrinsic for DIVar for the PHI node. -static bool PhiHasDebugValue(DILocalVariable *DIVar, +static bool PhiHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, PHINode *APN) { // Since we can't guarantee that the original dbg.declare instrinsic // is removed by LowerDbgDeclare(), we need to make sure that we are // not inserting the same dbg.value intrinsic over and over. - DbgValueList DbgValues; - FindAllocaDbgValues(DbgValues, APN); - for (auto DVI : DbgValues) { - assert (DVI->getValue() == APN); - assert (DVI->getOffset() == 0); + SmallVector<DbgValueInst *, 1> DbgValues; + findDbgValues(DbgValues, APN); + for (auto *DVI : DbgValues) { + assert(DVI->getValue() == APN); + assert(DVI->getOffset() == 0); if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr)) return true; } @@ -1091,8 +1104,9 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar, void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder) { auto *DIVar = DDI->getVariable(); - auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); + auto *DIExpr = DDI->getExpression(); + Value *DV = SI->getOperand(0); // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -1102,34 +1116,28 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); if (ExtendedArg) { - // We're now only describing a subset of the variable. The fragment we're - // describing will always be smaller than the variable size, because - // VariableSize == Size of Alloca described by DDI. Since SI stores - // to the alloca described by DDI, if it's first operand is an extend, - // we're guaranteed that before extension, the value was narrower than - // the size of the alloca, hence the size of the described variable. - SmallVector<uint64_t, 3> Ops; - unsigned FragmentOffset = 0; - // If this already is a bit fragment, we drop the bit fragment from the - // expression and record the offset. - auto Fragment = DIExpr->getFragmentInfo(); - if (Fragment) { - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()-3); - FragmentOffset = Fragment->OffsetInBits; - } else { - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); + // If this DDI was already describing only a fragment of a variable, ensure + // that fragment is appropriately narrowed here. + // But if a fragment wasn't used, describe the value as the original + // argument (rather than the zext or sext) so that it remains described even + // if the sext/zext is optimized away. This widens the variable description, + // leaving it up to the consumer to know how the smaller value may be + // represented in a larger register. + if (auto Fragment = DIExpr->getFragmentInfo()) { + unsigned FragmentOffset = Fragment->OffsetInBits; + SmallVector<uint64_t, 3> Ops(DIExpr->elements_begin(), + DIExpr->elements_end() - 3); + Ops.push_back(dwarf::DW_OP_LLVM_fragment); + Ops.push_back(FragmentOffset); + const DataLayout &DL = DDI->getModule()->getDataLayout(); + Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); + DIExpr = Builder.createExpression(Ops); } - Ops.push_back(dwarf::DW_OP_LLVM_fragment); - Ops.push_back(FragmentOffset); - const DataLayout &DL = DDI->getModule()->getDataLayout(); - Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); - auto NewDIExpr = Builder.createExpression(Ops); - if (!LdStHasDebugValue(DIVar, NewDIExpr, SI)) - Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, NewDIExpr, - DDI->getDebugLoc(), SI); - } else if (!LdStHasDebugValue(DIVar, DIExpr, SI)) - Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, - DDI->getDebugLoc(), SI); + DV = ExtendedArg; + } + if (!LdStHasDebugValue(DIVar, DIExpr, SI)) + Builder.insertDbgValueIntrinsic(DV, 0, DIVar, DIExpr, DDI->getDebugLoc(), + SI); } /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value @@ -1152,7 +1160,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, DbgValue->insertAfter(LI); } -/// Inserts a llvm.dbg.value intrinsic after a phi +/// Inserts a llvm.dbg.value intrinsic after a phi /// that has an associated llvm.dbg.decl intrinsic. void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, PHINode *APN, DIBuilder &Builder) { @@ -1214,13 +1222,9 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. - SmallVector<uint64_t, 1> NewDIExpr; - auto *DIExpr = DDI->getExpression(); - NewDIExpr.push_back(dwarf::DW_OP_deref); - NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(), - DIB.createExpression(NewDIExpr), - DDI->getDebugLoc(), CI); + DDI->getExpression(), DDI->getDebugLoc(), + CI); } } DDI->eraseFromParent(); @@ -1241,9 +1245,7 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { return nullptr; } -/// FindAllocaDbgValues - Finds the llvm.dbg.value intrinsics describing the -/// alloca 'V', if any. -void llvm::FindAllocaDbgValues(DbgValueList &DbgValues, Value *V) { +void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) { if (auto *L = LocalAsMetadata::getIfExists(V)) if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) for (User *U : MDV->users()) @@ -1251,37 +1253,6 @@ void llvm::FindAllocaDbgValues(DbgValueList &DbgValues, Value *V) { DbgValues.push_back(DVI); } -static void DIExprAddDeref(SmallVectorImpl<uint64_t> &Expr) { - Expr.push_back(dwarf::DW_OP_deref); -} - -static void DIExprAddOffset(SmallVectorImpl<uint64_t> &Expr, int Offset) { - if (Offset > 0) { - Expr.push_back(dwarf::DW_OP_plus); - Expr.push_back(Offset); - } else if (Offset < 0) { - Expr.push_back(dwarf::DW_OP_minus); - Expr.push_back(-Offset); - } -} - -static DIExpression *BuildReplacementDIExpr(DIBuilder &Builder, - DIExpression *DIExpr, bool Deref, - int Offset) { - if (!Deref && !Offset) - return DIExpr; - // Create a copy of the original DIDescriptor for user variable, prepending - // "deref" operation to a list of address elements, as new llvm.dbg.declare - // will take a value storing address of the memory for variable, not - // alloca itself. - SmallVector<uint64_t, 4> NewDIExpr; - if (Deref) - DIExprAddDeref(NewDIExpr); - DIExprAddOffset(NewDIExpr, Offset); - if (DIExpr) - NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); - return Builder.createExpression(NewDIExpr); -} bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, Instruction *InsertBefore, DIBuilder &Builder, @@ -1293,9 +1264,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, auto *DIVar = DDI->getVariable(); auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); - - DIExpr = BuildReplacementDIExpr(Builder, DIExpr, Deref, Offset); - + DIExpr = DIExpression::prepend(DIExpr, Deref, Offset); // Insert llvm.dbg.declare immediately after the original alloca, and remove // old llvm.dbg.declare. Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); @@ -1326,11 +1295,11 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, // Insert the offset immediately after the first deref. // We could just change the offset argument of dbg.value, but it's unsigned... if (Offset) { - SmallVector<uint64_t, 4> NewDIExpr; - DIExprAddDeref(NewDIExpr); - DIExprAddOffset(NewDIExpr, Offset); - NewDIExpr.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); - DIExpr = Builder.createExpression(NewDIExpr); + SmallVector<uint64_t, 4> Ops; + Ops.push_back(dwarf::DW_OP_deref); + DIExpression::appendOffset(Ops, Offset); + Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); + DIExpr = Builder.createExpression(Ops); } Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr, @@ -1349,6 +1318,57 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, } } +void llvm::salvageDebugInfo(Instruction &I) { + SmallVector<DbgValueInst *, 1> DbgValues; + auto &M = *I.getModule(); + + auto MDWrap = [&](Value *V) { + return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V)); + }; + + if (isa<BitCastInst>(&I)) { + findDbgValues(DbgValues, &I); + for (auto *DVI : DbgValues) { + // Bitcasts are entirely irrelevant for debug info. Rewrite the dbg.value + // to use the cast's source. + DVI->setOperand(0, MDWrap(I.getOperand(0))); + DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + } + } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { + findDbgValues(DbgValues, &I); + for (auto *DVI : DbgValues) { + unsigned BitWidth = + M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace()); + APInt Offset(BitWidth, 0); + // Rewrite a constant GEP into a DIExpression. Since we are performing + // arithmetic to compute the variable's *value* in the DIExpression, we + // need to mark the expression with a DW_OP_stack_value. + if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { + auto *DIExpr = DVI->getExpression(); + DIBuilder DIB(M, /*AllowUnresolved*/ false); + // GEP offsets are i32 and thus always fit into an int64_t. + DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, + Offset.getSExtValue(), + DIExpression::WithStackValue); + DVI->setOperand(0, MDWrap(I.getOperand(0))); + DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); + DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + } + } + } else if (isa<LoadInst>(&I)) { + findDbgValues(DbgValues, &I); + for (auto *DVI : DbgValues) { + // Rewrite the load into DW_OP_deref. + auto *DIExpr = DVI->getExpression(); + DIBuilder DIB(M, /*AllowUnresolved*/ false); + DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref); + DVI->setOperand(0, MDWrap(I.getOperand(0))); + DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); + DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + } + } +} + unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { unsigned NumDeadInst = 0; // Delete the instructions backwards, as it has a reduced likelihood of @@ -1450,7 +1470,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, II->setAttributes(CI->getAttributes()); // Make sure that anything using the call now uses the invoke! This also - // updates the CallGraph if present, because it uses a WeakVH. + // updates the CallGraph if present, because it uses a WeakTrackingVH. CI->replaceAllUsesWith(II); // Delete the original call @@ -1642,9 +1662,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB) { TI->eraseFromParent(); } -/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// removeUnreachableBlocks - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false -/// otherwise. +/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo +/// after modifying the CFG. bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { SmallPtrSet<BasicBlock*, 16> Reachable; bool Changed = markAliveBlocks(F, Reachable); @@ -1723,12 +1744,12 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, // Preserve !invariant.group in K. break; case LLVMContext::MD_align: - K->setMetadata(Kind, + K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; case LLVMContext::MD_dereferenceable: case LLVMContext::MD_dereferenceable_or_null: - K->setMetadata(Kind, + K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; } @@ -1755,46 +1776,62 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) { combineMetadata(K, J, KnownIDs); } -unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, - DominatorTree &DT, - const BasicBlockEdge &Root) { +template <typename RootType, typename DominatesFn> +static unsigned replaceDominatedUsesWith(Value *From, Value *To, + const RootType &Root, + const DominatesFn &Dominates) { assert(From->getType() == To->getType()); - + unsigned Count = 0; for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); - UI != UE; ) { + UI != UE;) { Use &U = *UI++; - if (DT.dominates(Root, U)) { - U.set(To); - DEBUG(dbgs() << "Replace dominated use of '" - << From->getName() << "' as " - << *To << " in " << *U << "\n"); - ++Count; - } + if (!Dominates(Root, U)) + continue; + U.set(To); + DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " + << *To << " in " << *U << "\n"); + ++Count; } return Count; } -unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, - DominatorTree &DT, - const BasicBlock *BB) { - assert(From->getType() == To->getType()); +unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) { + assert(From->getType() == To->getType()); + auto *BB = From->getParent(); + unsigned Count = 0; - unsigned Count = 0; for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); UI != UE;) { Use &U = *UI++; auto *I = cast<Instruction>(U.getUser()); - if (DT.properlyDominates(BB, I->getParent())) { - U.set(To); - DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " - << *To << " in " << *U << "\n"); - ++Count; - } + if (I->getParent() == BB) + continue; + U.set(To); + ++Count; } return Count; } +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlockEdge &Root) { + auto Dominates = [&DT](const BasicBlockEdge &Root, const Use &U) { + return DT.dominates(Root, U); + }; + return ::replaceDominatedUsesWith(From, To, Root, Dominates); +} + +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlock *BB) { + auto ProperlyDominates = [&DT](const BasicBlock *BB, const Use &U) { + auto *I = cast<Instruction>(U.getUser())->getParent(); + return DT.properlyDominates(BB, I); + }; + return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates); +} + bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { // Check if the function is specifically marked as a gc leaf function. if (CS.hasFnAttr("gc-leaf-function")) @@ -1812,6 +1849,49 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { return false; } +void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, + LoadInst &NewLI) { + auto *NewTy = NewLI.getType(); + + // This only directly applies if the new type is also a pointer. + if (NewTy->isPointerTy()) { + NewLI.setMetadata(LLVMContext::MD_nonnull, N); + return; + } + + // The only other translation we can do is to integral loads with !range + // metadata. + if (!NewTy->isIntegerTy()) + return; + + MDBuilder MDB(NewLI.getContext()); + const Value *Ptr = OldLI.getPointerOperand(); + auto *ITy = cast<IntegerType>(NewTy); + auto *NullInt = ConstantExpr::getPtrToInt( + ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy); + auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); + NewLI.setMetadata(LLVMContext::MD_range, + MDB.createRange(NonNullInt, NullInt)); +} + +void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, + MDNode *N, LoadInst &NewLI) { + auto *NewTy = NewLI.getType(); + + // Give up unless it is converted to a pointer where there is a single very + // valuable mapping we can do reliably. + // FIXME: It would be nice to propagate this in more ways, but the type + // conversions make it hard. + if (!NewTy->isPointerTy()) + return; + + unsigned BitWidth = DL.getTypeSizeInBits(NewTy); + if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { + MDNode *NN = MDNode::get(OldLI.getContext(), None); + NewLI.setMetadata(LLVMContext::MD_nonnull, NN); + } +} + namespace { /// A potential constituent of a bitreverse or bswap expression. See /// collectBitParts for a fuller explanation. @@ -1933,7 +2013,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, unsigned NumMaskedBits = AndMask.countPopulation(); if (!MatchBitReversals && NumMaskedBits % 8 != 0) return Result; - + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, MatchBitReversals, BPS); if (!Res) @@ -2068,9 +2148,63 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( void llvm::maybeMarkSanitizerLibraryCallNoBuiltin( CallInst *CI, const TargetLibraryInfo *TLI) { Function *F = CI->getCalledFunction(); - LibFunc::Func Func; + LibFunc Func; if (F && !F->hasLocalLinkage() && F->hasName() && TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) && !F->doesNotAccessMemory()) - CI->addAttribute(AttributeSet::FunctionIndex, Attribute::NoBuiltin); + CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin); +} + +bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { + // We can't have a PHI with a metadata type. + if (I->getOperand(OpIdx)->getType()->isMetadataTy()) + return false; + + // Early exit. + if (!isa<Constant>(I->getOperand(OpIdx))) + return true; + + switch (I->getOpcode()) { + default: + return true; + case Instruction::Call: + case Instruction::Invoke: + // Can't handle inline asm. Skip it. + if (isa<InlineAsm>(ImmutableCallSite(I).getCalledValue())) + return false; + // Many arithmetic intrinsics have no issue taking a + // variable, however it's hard to distingish these from + // specials such as @llvm.frameaddress that require a constant. + if (isa<IntrinsicInst>(I)) + return false; + + // Constant bundle operands may need to retain their constant-ness for + // correctness. + if (ImmutableCallSite(I).isBundleOperand(OpIdx)) + return false; + return true; + case Instruction::ShuffleVector: + // Shufflevector masks are constant. + return OpIdx != 2; + case Instruction::Switch: + case Instruction::ExtractValue: + // All operands apart from the first are constant. + return OpIdx == 0; + case Instruction::InsertValue: + // All operands apart from the first and the second are constant. + return OpIdx < 2; + case Instruction::Alloca: + // Static allocas (constant size in the entry block) are handled by + // prologue/epilogue insertion so they're free anyway. We definitely don't + // want to make them non-constant. + return !dyn_cast<AllocaInst>(I)->isStaticAlloca(); + case Instruction::GetElementPtr: + if (OpIdx == 0) + return true; + gep_type_iterator It = gep_type_begin(I); + for (auto E = std::next(It, OpIdx); It != E; ++It) + if (It.isStruct()) + return false; + return true; + } } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 00cda2a..e21e34d 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -38,15 +38,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -65,6 +64,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -72,7 +72,6 @@ using namespace llvm; #define DEBUG_TYPE "loop-simplify" -STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); STATISTIC(NumNested , "Number of nested loops split out"); // If the block isn't already, move the new block to right after some 'outside @@ -152,37 +151,6 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, return PreheaderBB; } -/// \brief Ensure that the loop preheader dominates all exit blocks. -/// -/// This method is used to split exit blocks that have predecessors outside of -/// the loop. -static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, - DominatorTree *DT, LoopInfo *LI, - bool PreserveLCSSA) { - SmallVector<BasicBlock*, 8> LoopBlocks; - for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { - BasicBlock *P = *I; - if (L->contains(P)) { - // Don't do this if the loop is exited via an indirect branch. - if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; - - LoopBlocks.push_back(P); - } - } - - assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); - BasicBlock *NewExitBB = nullptr; - - NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", DT, LI, - PreserveLCSSA); - if (!NewExitBB) - return nullptr; - - DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " - << NewExitBB->getName() << "\n"); - return NewExitBB; -} - /// Add the specified block, and all of its predecessors, to the specified set, /// if it's not already in there. Stop predecessor traversal when we reach /// StopBlock. @@ -210,7 +178,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -346,16 +314,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, // Split edges to exit blocks from the inner loop, if they emerged in the // process of separating the outer one. - SmallVector<BasicBlock *, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), - ExitBlocks.end()); - for (BasicBlock *ExitBlock : ExitBlockSet) { - if (any_of(predecessors(ExitBlock), - [L](BasicBlock *BB) { return !L->contains(BB); })) { - rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA); - } - } + formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); if (PreserveLCSSA) { // Fix LCSSA form for L. Some values, which previously were only used inside @@ -563,29 +522,16 @@ ReprocessLoop: BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); - if (Preheader) { - ++NumInserted; + if (Preheader) Changed = true; - } } // Next, check to make sure that all exit nodes of the loop only have // predecessors that are inside of the loop. This check guarantees that the // loop preheader/header will dominate the exit blocks. If the exit block has // predecessors from outside of the loop, split the edge now. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - - SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), - ExitBlocks.end()); - for (BasicBlock *ExitBlock : ExitBlockSet) { - if (any_of(predecessors(ExitBlock), - [L](BasicBlock *BB) { return !L->contains(BB); })) { - rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA); - ++NumInserted; - Changed = true; - } - } + if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA)) + Changed = true; // If the header has more than two predecessors at this point (from the // preheader and from multiple backedges), we must adjust the loop. @@ -614,10 +560,8 @@ ReprocessLoop: // insert a new block that all backedges target, then make it jump to the // loop header. LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI); - if (LoopLatch) { - ++NumInserted; + if (LoopLatch) Changed = true; - } } const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); @@ -628,7 +572,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { if (SE) SE->forgetValue(PN); if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) { PN->replaceAllUsesWith(V); @@ -645,14 +589,22 @@ ReprocessLoop: // loop-invariant instructions out of the way to open up more // opportunities, and the disadvantage of having the responsibility // to preserve dominator information. - bool UniqueExit = true; - if (!ExitBlocks.empty()) - for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i) - if (ExitBlocks[i] != ExitBlocks[0]) { - UniqueExit = false; - break; + auto HasUniqueExitBlock = [&]() { + BasicBlock *UniqueExit = nullptr; + for (auto *ExitingBB : ExitingBlocks) + for (auto *SuccBB : successors(ExitingBB)) { + if (L->contains(SuccBB)) + continue; + + if (!UniqueExit) + UniqueExit = SuccBB; + else if (UniqueExit != SuccBB) + return false; } - if (UniqueExit) { + + return true; + }; + if (HasUniqueExitBlock()) { for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitingBlock = ExitingBlocks[i]; if (!ExitingBlock->getSinglePredecessor()) continue; @@ -735,6 +687,17 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { bool Changed = false; +#ifndef NDEBUG + // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA + // form. + if (PreserveLCSSA) { + assert(DT && "DT not available."); + assert(LI && "LI not available."); + assert(L->isRecursivelyLCSSAForm(*DT, *LI) && + "Requested to preserve LCSSA, but it's already broken."); + } +#endif + // Worklist maintains our depth-first queue of loops in this nest to process. SmallVector<Loop *, 4> Worklist; Worklist.push_back(L); @@ -814,15 +777,6 @@ bool LoopSimplify::runOnFunction(Function &F) { &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); -#ifndef NDEBUG - if (PreserveLCSSA) { - assert(DT && "DT not available."); - assert(LI && "LI not available."); - bool InLCSSA = all_of( - *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); }); - assert(InLCSSA && "Requested to preserve LCSSA, but it's already broken."); - } -#endif // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) @@ -846,17 +800,14 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F); AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F); - // FIXME: This pass should verify that the loops on which it's operating - // are in canonical SSA form, and that the pass itself preserves this form. + // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA + // after simplifying the loops. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, SE, AC, true /* PreserveLCSSA */); - - // FIXME: We need to invalidate this to avoid PR28400. Is there a better - // solution? - AM.invalidate<ScalarEvolutionAnalysis>(F); + Changed |= simplifyLoop(*I, DT, LI, SE, AC, /*PreserveLCSSA*/ false); if (!Changed) return PreservedAnalyses::all(); + PreservedAnalyses PA; PA.preserve<DominatorTreeAnalysis>(); PA.preserve<LoopAnalysis>(); diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index e346ebd..f2527f8 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" @@ -27,6 +26,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" @@ -38,6 +38,7 @@ #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" using namespace llvm; #define DEBUG_TYPE "loop-unroll" @@ -51,6 +52,16 @@ UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolled loops to be unrolled " "with epilog instead of prolog.")); +static cl::opt<bool> +UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, + cl::desc("Verify domtree after unrolling"), +#ifdef NDEBUG + cl::init(false) +#else + cl::init(true) +#endif + ); + /// Convert the instruction operands from referencing the current values into /// those specified by VMap. static inline void remapInstruction(Instruction *I, @@ -205,6 +216,45 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, } } +/// The function chooses which type of unroll (epilog or prolog) is more +/// profitabale. +/// Epilog unroll is more profitable when there is PHI that starts from +/// constant. In this case epilog will leave PHI start from constant, +/// but prolog will convert it to non-constant. +/// +/// loop: +/// PN = PHI [I, Latch], [CI, PreHeader] +/// I = foo(PN) +/// ... +/// +/// Epilog unroll case. +/// loop: +/// PN = PHI [I2, Latch], [CI, PreHeader] +/// I1 = foo(PN) +/// I2 = foo(I1) +/// ... +/// Prolog unroll case. +/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader] +/// loop: +/// PN = PHI [I2, Latch], [NewPN, PreHeader] +/// I1 = foo(PN) +/// I2 = foo(I1) +/// ... +/// +static bool isEpilogProfitable(Loop *L) { + BasicBlock *PreHeader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + assert(PreHeader && Header); + for (Instruction &BBI : *Header) { + PHINode *PN = dyn_cast<PHINode>(&BBI); + if (!PN) + break; + if (isa<ConstantInt>(PN->getIncomingValueForBlock(PreHeader))) + return true; + } + return false; +} + /// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional @@ -268,6 +318,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, return false; } + // The current loop unroll pass can only unroll loops with a single latch + // that's a conditional branch exiting the loop. + // FIXME: The implementation can be extended to work with more complicated + // cases, e.g. loops with multiple latches. BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); @@ -278,6 +332,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, return false; } + auto CheckSuccessors = [&](unsigned S1, unsigned S2) { + return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2)); + }; + + if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) { + DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" + " exiting the loop can be unrolled\n"); + return false; + } + if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << @@ -296,8 +360,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, Count = TripCount; // Don't enter the unroll code if there is nothing to do. - if (TripCount == 0 && Count < 2 && PeelCount == 0) + if (TripCount == 0 && Count < 2 && PeelCount == 0) { + DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); return false; + } assert(Count > 0); assert(TripMultiple > 0); @@ -330,7 +396,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, "and peeling for the same loop"); if (PeelCount) - peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA); + peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); // Loops containing convergent instructions must have a count that divides // their TripMultiple. @@ -346,14 +412,22 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, "convergent operation."); }); + bool EpilogProfitability = + UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog + : isEpilogProfitable(L); + if (RuntimeTripCount && TripMultiple % Count != 0 && !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount, - UnrollRuntimeEpilog, LI, SE, DT, + EpilogProfitability, LI, SE, DT, PreserveLCSSA)) { if (Force) RuntimeTripCount = false; - else + else { + DEBUG( + dbgs() << "Wont unroll; remainder loop could not be generated" + "when assuming runtime trip count\n"); return false; + } } // Notify ScalarEvolution that the loop will be substantially changed, @@ -446,6 +520,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, for (Loop *SubLoop : *L) LoopsToSimplify.insert(SubLoop); + if (Header->getParent()->isDebugInfoForProfiling()) + for (BasicBlock *BB : L->getBlocks()) + for (Instruction &I : *BB) + if (const DILocation *DIL = I.getDebugLoc()) + I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count)); + for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; SmallDenseMap<const Loop *, Loop *, 4> NewLoops; @@ -456,19 +536,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); + assert((*BB != Header || LI->getLoopFor(*BB) == L) && + "Header should not be in a sub-loop"); // Tell LI about New. - if (*BB == Header) { - assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop"); - L->addBasicBlockToLoop(New, *LI); - } else { - const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); - if (OldLoop) { - LoopsToSimplify.insert(NewLoops[OldLoop]); + const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); + if (OldLoop) { + LoopsToSimplify.insert(NewLoops[OldLoop]); - // Forget the old loop, since its inputs may have changed. - if (SE) - SE->forgetLoop(OldLoop); - } + // Forget the old loop, since its inputs may have changed. + if (SE) + SE->forgetLoop(OldLoop); } if (*BB == Header) @@ -615,14 +692,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, Term->eraseFromParent(); } } + // Update dominators of blocks we might reach through exits. // Immediate dominator of such block might change, because we add more // routes which can lead to the exit: we can now reach it from the copied - // iterations too. Thus, the new idom of the block will be the nearest - // common dominator of the previous idom and common dominator of all copies of - // the previous idom. This is equivalent to the nearest common dominator of - // the previous idom and the first latch, which dominates all copies of the - // previous idom. + // iterations too. if (DT && Count > 1) { for (auto *BB : OriginalLoopBlocks) { auto *BBDomNode = DT->getNode(BB); @@ -632,12 +706,38 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, if (!L->contains(ChildBB)) ChildrenToUpdate.push_back(ChildBB); } - BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]); + BasicBlock *NewIDom; + if (BB == LatchBlock) { + // The latch is special because we emit unconditional branches in + // some cases where the original loop contained a conditional branch. + // Since the latch is always at the bottom of the loop, if the latch + // dominated an exit before unrolling, the new dominator of that exit + // must also be a latch. Specifically, the dominator is the first + // latch which ends in a conditional branch, or the last latch if + // there is no such latch. + NewIDom = Latches.back(); + for (BasicBlock *IterLatch : Latches) { + TerminatorInst *Term = IterLatch->getTerminator(); + if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) { + NewIDom = IterLatch; + break; + } + } + } else { + // The new idom of the block will be the nearest common dominator + // of all copies of the previous idom. This is equivalent to the + // nearest common dominator of the previous idom and the first latch, + // which dominates all copies of the previous idom. + NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); + } for (auto *ChildBB : ChildrenToUpdate) DT->changeImmediateDominator(ChildBB, NewIDom); } } + if (DT && UnrollVerifyDomtree) + DT->verifyDomTree(); + // Merge adjacent basic blocks, if possible. SmallPtrSet<Loop *, 4> ForgottenLoops; for (BasicBlock *Latch : Latches) { @@ -655,16 +755,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, } } - // FIXME: We only preserve DT info for complete unrolling now. Incrementally - // updating domtree after partial loop unrolling should also be easy. - if (DT && !CompletelyUnroll) - DT->recalculate(*L->getHeader()->getParent()); - else if (DT) - DEBUG(DT->verifyDomTree()); - // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll && Count > 1) { - SmallVector<WeakVH, 16> DeadInsts; + SmallVector<WeakTrackingVH, 16> DeadInsts; simplifyLoopIVs(L, SE, DT, LI, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already @@ -684,7 +777,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = &*I++; - if (Value *V = SimplifyInstruction(Inst, DL)) + if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) if (LI->replacementPreservesLCSSAForm(Inst, V)) Inst->replaceAllUsesWith(V); if (isInstructionTriviallyDead(Inst)) @@ -721,29 +814,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. if (DT) { - if (!OuterL && !CompletelyUnroll) - OuterL = L; if (OuterL) { // OuterL includes all loops for which we can break loop-simplify, so // it's sufficient to simplify only it (it'll recursively simplify inner // loops too). + if (NeedToFixLCSSA) { + // LCSSA must be performed on the outermost affected loop. The unrolled + // loop's last loop latch is guaranteed to be in the outermost loop + // after LoopInfo's been updated by markAsRemoved. + Loop *LatchLoop = LI->getLoopFor(Latches.back()); + Loop *FixLCSSALoop = OuterL; + if (!FixLCSSALoop->contains(LatchLoop)) + while (FixLCSSALoop->getParentLoop() != LatchLoop) + FixLCSSALoop = FixLCSSALoop->getParentLoop(); + + formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE); + } else if (PreserveLCSSA) { + assert(OuterL->isLCSSAForm(*DT) && + "Loops should be in LCSSA form after loop-unroll."); + } + // TODO: That potentially might be compile-time expensive. We should try // to fix the loop-simplified form incrementally. simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); - - // LCSSA must be performed on the outermost affected loop. The unrolled - // loop's last loop latch is guaranteed to be in the outermost loop after - // LoopInfo's been updated by markAsRemoved. - Loop *LatchLoop = LI->getLoopFor(Latches.back()); - if (!OuterL->contains(LatchLoop)) - while (OuterL->getParentLoop() != LatchLoop) - OuterL = OuterL->getParentLoop(); - - if (NeedToFixLCSSA) - formLCSSARecursively(*OuterL, *DT, LI, SE); - else - assert(OuterL->isLCSSAForm(*DT) && - "Loops should be in LCSSA form after loop-unroll."); } else { // Simplify loops for which we might've broken loop-simplify form. for (Loop *SubLoop : LoopsToSimplify) diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp index 842cf31..5c21490 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -28,6 +28,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include <algorithm> @@ -45,6 +46,11 @@ static cl::opt<unsigned> UnrollForcePeelCount( "unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information.")); +// Designates that a Phi is estimated to become invariant after an "infinite" +// number of loop iterations (i.e. only may become an invariant if the loop is +// fully unrolled). +static const unsigned InfiniteIterationsToInvariance = UINT_MAX; + // Check whether we are capable of peeling this loop. static bool canPeel(Loop *L) { // Make sure the loop is in simplified form @@ -55,12 +61,72 @@ static bool canPeel(Loop *L) { if (!L->getExitingBlock() || !L->getUniqueExitBlock()) return false; + // Don't try to peel loops where the latch is not the exiting block. + // This can be an indication of two different things: + // 1) The loop is not rotated. + // 2) The loop contains irreducible control flow that involves the latch. + if (L->getLoopLatch() != L->getExitingBlock()) + return false; + return true; } +// This function calculates the number of iterations after which the given Phi +// becomes an invariant. The pre-calculated values are memorized in the map. The +// function (shortcut is I) is calculated according to the following definition: +// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge]. +// If %y is a loop invariant, then I(%x) = 1. +// If %y is a Phi from the loop header, I(%x) = I(%y) + 1. +// Otherwise, I(%x) is infinite. +// TODO: Actually if %y is an expression that depends only on Phi %z and some +// loop invariants, we can estimate I(%x) = I(%z) + 1. The example +// looks like: +// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration. +// %y = phi(0, 5), +// %a = %y + 1. +static unsigned calculateIterationsToInvariance( + PHINode *Phi, Loop *L, BasicBlock *BackEdge, + SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) { + assert(Phi->getParent() == L->getHeader() && + "Non-loop Phi should not be checked for turning into invariant."); + assert(BackEdge == L->getLoopLatch() && "Wrong latch?"); + // If we already know the answer, take it from the map. + auto I = IterationsToInvariance.find(Phi); + if (I != IterationsToInvariance.end()) + return I->second; + + // Otherwise we need to analyze the input from the back edge. + Value *Input = Phi->getIncomingValueForBlock(BackEdge); + // Place infinity to map to avoid infinite recursion for cycled Phis. Such + // cycles can never stop on an invariant. + IterationsToInvariance[Phi] = InfiniteIterationsToInvariance; + unsigned ToInvariance = InfiniteIterationsToInvariance; + + if (L->isLoopInvariant(Input)) + ToInvariance = 1u; + else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) { + // Only consider Phis in header block. + if (IncPhi->getParent() != L->getHeader()) + return InfiniteIterationsToInvariance; + // If the input becomes an invariant after X iterations, then our Phi + // becomes an invariant after X + 1 iterations. + unsigned InputToInvariance = calculateIterationsToInvariance( + IncPhi, L, BackEdge, IterationsToInvariance); + if (InputToInvariance != InfiniteIterationsToInvariance) + ToInvariance = InputToInvariance + 1u; + } + + // If we found that this Phi lies in an invariant chain, update the map. + if (ToInvariance != InfiniteIterationsToInvariance) + IterationsToInvariance[Phi] = ToInvariance; + return ToInvariance; +} + // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP) { + TargetTransformInfo::UnrollingPreferences &UP, + unsigned &TripCount) { + assert(LoopSize > 0 && "Zero loop size is not allowed!"); UP.PeelCount = 0; if (!canPeel(L)) return; @@ -69,6 +135,46 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!L->empty()) return; + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N + // iterations of the loop. For this we compute the number for iterations after + // which every Phi is guaranteed to become an invariant, and try to peel the + // maximum number of iterations among these values, thus turning all those + // Phis into invariants. + // First, check that we can peel at least one iteration. + if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) { + // Store the pre-calculated values here. + SmallDenseMap<PHINode *, unsigned> IterationsToInvariance; + // Now go through all Phis to calculate their the number of iterations they + // need to become invariants. + unsigned DesiredPeelCount = 0; + BasicBlock *BackEdge = L->getLoopLatch(); + assert(BackEdge && "Loop is not in simplified form?"); + for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) { + PHINode *Phi = cast<PHINode>(&*BI); + unsigned ToInvariance = calculateIterationsToInvariance( + Phi, L, BackEdge, IterationsToInvariance); + if (ToInvariance != InfiniteIterationsToInvariance) + DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); + } + if (DesiredPeelCount > 0) { + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); + // Consider max peel count limitation. + assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); + DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn" + << " some Phis into invariants.\n"); + UP.PeelCount = DesiredPeelCount; + return; + } + } + + // Bail if we know the statically calculated trip count. + // In this case we rather prefer partial unrolling. + if (TripCount) + return; + // If the user provided a peel count, use that. bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; if (UserPeelCount) { @@ -164,7 +270,8 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Exit, SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - ValueToValueMapTy &LVMap, LoopInfo *LI) { + ValueToValueMapTy &LVMap, DominatorTree *DT, + LoopInfo *LI) { BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -185,6 +292,17 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; + + // If dominator tree is available, insert nodes to represent cloned blocks. + if (DT) { + if (Header == *BB) + DT->addNewBlock(NewBB, InsertTop); + else { + DomTreeNode *IDom = DT->getNode(*BB)->getIDom(); + // VMap must contain entry for IDom, as the iteration order is RPO. + DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()])); + } + } } // Hook-up the control flow for the newly inserted blocks. @@ -198,11 +316,13 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, // The backedge now goes to the "bottom", which is either the loop's real // header (for the last peeled iteration) or the copied header of the next // iteration (for every other iteration) - BranchInst *LatchBR = - cast<BranchInst>(cast<BasicBlock>(VMap[Latch])->getTerminator()); + BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); + BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator()); unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); LatchBR->setSuccessor(HeaderIdx, InsertBot); LatchBR->setSuccessor(1 - HeaderIdx, Exit); + if (DT) + DT->changeImmediateDominator(InsertBot, NewLatch); // The new copy of the loop body starts with a bunch of PHI nodes // that pick an incoming value from either the preheader, or the previous @@ -257,7 +377,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - bool PreserveLCSSA) { + AssumptionCache *AC, bool PreserveLCSSA) { if (!canPeel(L)) return false; @@ -358,7 +478,24 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, CurHeaderWeight = 1; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, - NewBlocks, LoopBlocks, VMap, LVMap, LI); + NewBlocks, LoopBlocks, VMap, LVMap, DT, LI); + + // Remap to use values from the current iteration instead of the + // previous one. + remapInstructionsInBlocks(NewBlocks, VMap); + + if (DT) { + // Latches of the cloned loops dominate over the loop exit, so idom of the + // latter is the first cloned loop body, as original PreHeader dominates + // the original loop body. + if (Iter == 0) + DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch])); +#ifndef NDEBUG + if (VerifyDomInfo) + DT->verifyDomTree(); +#endif + } + updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter, PeelCount, ExitWeight); @@ -369,10 +506,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, F->getBasicBlockList().splice(InsertTop->getIterator(), F->getBasicBlockList(), NewBlocks[0]->getIterator(), F->end()); - - // Remap to use values from the current iteration instead of the - // previous one. - remapInstructionsInBlocks(NewBlocks, VMap); } // Now adjust the phi nodes in the loop header to get their initial values @@ -405,9 +538,16 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, } // If the loop is nested, we changed the parent loop, update SE. - if (Loop *ParentLoop = L->getParentLoop()) + if (Loop *ParentLoop = L->getParentLoop()) { SE->forgetLoop(ParentLoop); + // FIXME: Incrementally update loop-simplify + simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA); + } else { + // FIXME: Incrementally update loop-simplify + simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA); + } + NumPeeled++; return true; diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index d3ea156..d43ce7a 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -21,8 +21,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" @@ -37,6 +37,8 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" #include <algorithm> using namespace llvm; @@ -45,6 +47,10 @@ using namespace llvm; STATISTIC(NumRuntimeUnrolled, "Number of loops unrolled with run-time trip counts"); +static cl::opt<bool> UnrollRuntimeMultiExit( + "unroll-runtime-multi-exit", cl::init(false), cl::Hidden, + cl::desc("Allow runtime unrolling for loops with multiple exits, when " + "epilog is generated")); /// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the @@ -60,9 +66,11 @@ STATISTIC(NumRuntimeUnrolled, /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, - BasicBlock *PrologExit, BasicBlock *PreHeader, - BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, - DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { + BasicBlock *PrologExit, + BasicBlock *OriginalLoopLatchExit, + BasicBlock *PreHeader, BasicBlock *NewPreHeader, + ValueToValueMapTy &VMap, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); @@ -137,15 +145,15 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, // then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); - BasicBlock *Exit = L->getUniqueExitBlock(); - assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees - SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); - SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, + SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); + SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) - B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); + B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); InsertPt->eraseFromParent(); + if (DT) + DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); } /// Connect the unrolling epilog code to the original loop. @@ -260,13 +268,20 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, IRBuilder<> B(InsertPt); Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); assert(Exit && "Loop must have a single exit block only"); - // Split the exit to maintain loop canonicalization guarantees + // Split the epilogue exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolling loop) B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); InsertPt->eraseFromParent(); + if (DT) + DT->changeImmediateDominator(Exit, NewExit); + + // Split the main loop exit to maintain canonicalization guarantees. + SmallVector<BasicBlock*, 4> NewExitPreds{Latch}; + SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, + PreserveLCSSA); } /// Create a clone of the blocks in a loop and connect them together. @@ -276,35 +291,23 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. -/// -static void CloneLoopBlocks(Loop *L, Value *NewIter, - const bool CreateRemainderLoop, - const bool UseEpilogRemainder, - BasicBlock *InsertTop, BasicBlock *InsertBot, - BasicBlock *Preheader, - std::vector<BasicBlock *> &NewBlocks, - LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - LoopInfo *LI) { +/// Return the new cloned loop that is created when CreateRemainderLoop is true. +static Loop * +CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, + const bool UseEpilogRemainder, BasicBlock *InsertTop, + BasicBlock *InsertBot, BasicBlock *Preheader, + std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); - Loop *NewLoop = nullptr; Loop *ParentLoop = L->getParentLoop(); - if (CreateRemainderLoop) { - NewLoop = new Loop(); - if (ParentLoop) - ParentLoop->addChildLoop(NewLoop); - else - LI->addTopLevelLoop(NewLoop); - } - NewLoopsMap NewLoops; - if (NewLoop) - NewLoops[L] = NewLoop; - else if (ParentLoop) + NewLoops[ParentLoop] = ParentLoop; + if (!CreateRemainderLoop) NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, @@ -312,7 +315,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); - + // If we're unrolling the outermost loop, there's no remainder loop, // and this block isn't in a nested loop, then the new block is not // in any loop. Otherwise, add it to loopinfo. @@ -326,6 +329,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, InsertTop->getTerminator()->setSuccessor(0, NewBB); } + if (DT) { + if (Header == *BB) { + // The header is dominated by the preheader. + DT->addNewBlock(NewBB, InsertTop); + } else { + // Copy information from original loop to unrolled loop. + BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); + DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); + } + } + if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. @@ -376,7 +390,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, NewPHI->setIncomingValue(idx, V); } } - if (NewLoop) { + if (CreateRemainderLoop) { + Loop *NewLoop = NewLoops[L]; + assert(NewLoop && "L should have been cloned"); // Add unroll disable metadata to disable future unrolling for this loop. SmallVector<Metadata *, 4> MDs; // Reserve first location for self reference to the LoopID metadata node. @@ -406,9 +422,56 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); + return NewLoop; } + else + return nullptr; +} + +/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits +/// is populated with all the loop exit blocks other than the LatchExit block. +static bool +canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, + BasicBlock *LatchExit, bool PreserveLCSSA, + bool UseEpilogRemainder) { + + // Support runtime unrolling for multiple exit blocks and multiple exiting + // blocks. + if (!UnrollRuntimeMultiExit) + return false; + // Even if runtime multi exit is enabled, we currently have some correctness + // constrains in unrolling a multi-exit loop. + // We rely on LCSSA form being preserved when the exit blocks are transformed. + if (!PreserveLCSSA) + return false; + SmallVector<BasicBlock *, 4> Exits; + L->getUniqueExitBlocks(Exits); + for (auto *BB : Exits) + if (BB != LatchExit) + OtherExits.push_back(BB); + + // TODO: Support multiple exiting blocks jumping to the `LatchExit` when + // UnrollRuntimeMultiExit is true. This will need updating the logic in + // connectEpilog/connectProlog. + if (!LatchExit->getSinglePredecessor()) { + DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 " + "predecessor.\n"); + return false; + } + // FIXME: We bail out of multi-exit unrolling when epilog loop is generated + // and L is an inner loop. This is because in presence of multiple exits, the + // outer loop is incorrect: we do not add the EpilogPreheader and exit to the + // outer loop. This is automatically handled in the prolog case, so we do not + // have that bug in prolog generation. + if (UseEpilogRemainder && L->getParentLoop()) + return false; + + // All constraints have been satisfied. + return true; } + + /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// @@ -452,18 +515,40 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool UseEpilogRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { - // for now, only unroll loops that contain a single exit - if (!L->getExitingBlock()) - return false; + DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); + DEBUG(L->dump()); - // Make sure the loop is in canonical form, and there is a single - // exit block only. - if (!L->isLoopSimplifyForm()) - return false; - BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop - if (!Exit) + // Make sure the loop is in canonical form. + if (!L->isLoopSimplifyForm()) { + DEBUG(dbgs() << "Not in simplify form!\n"); return false; + } + // Guaranteed by LoopSimplifyForm. + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Header = L->getHeader(); + + BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); + unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; + BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); + // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the + // targets of the Latch be an exit block out of the loop. This needs + // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. + assert(!L->contains(LatchExit) && + "one of the loop latch successors should be the exit block!"); + // These are exit blocks other than the target of the latch exiting block. + SmallVector<BasicBlock *, 4> OtherExits; + bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop( + L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); + // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. + if (!isMultiExitUnrollingEnabled && + (!L->getExitingBlock() || OtherExits.size())) { + DEBUG( + dbgs() + << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " + "enabled!\n"); + return false; + } // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) @@ -471,34 +556,44 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Only unroll loops with a computable trip count, and the trip count needs // to be an int value (allowing a pointer type is a TODO item). - const SCEV *BECountSC = SE->getBackedgeTakenCount(L); + // We calculate the backedge count by using getExitCount on the Latch block, + // which is proven to be the only exiting block in this loop. This is same as + // calculating getBackedgeTakenCount on the loop (which computes SCEV for all + // exiting blocks). + const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || - !BECountSC->getType()->isIntegerTy()) + !BECountSC->getType()->isIntegerTy()) { + DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; + } unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); - if (isa<SCEVCouldNotCompute>(TripCountSC)) + if (isa<SCEVCouldNotCompute>(TripCountSC)) { + DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; + } - BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && - Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) + Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { + DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; + } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. - if (Log2_32(Count) > BEWidth) + if (Log2_32(Count) > BEWidth) { + DEBUG(dbgs() + << "Count failed constraint on overflow trip count calculation.\n"); return false; - - BasicBlock *Latch = L->getLoopLatch(); + } // Loop structure is the following: // @@ -506,7 +601,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Header // ... // Latch - // Exit + // LatchExit BasicBlock *NewPreHeader; BasicBlock *NewExit = nullptr; @@ -519,9 +614,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Split PreHeader to insert a branch around loop for unrolling. NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); NewPreHeader->setName(PreHeader->getName() + ".new"); - // Split Exit to create phi nodes from branch above. - SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); - NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", + // Split LatchExit to create phi nodes from branch above. + SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); + NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Split NewExit to insert epilog remainder loop. EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); @@ -548,7 +643,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Latch Header // *NewExit ... // *EpilogPreHeader Latch - // Exit Exit + // LatchExit LatchExit // Calculate conditions for branch around loop for unrolling // in epilog case and around prolog remainder loop in prolog case. @@ -599,6 +694,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); + if (DT) { + if (UseEpilogRemainder) + DT->changeImmediateDominator(NewExit, PreHeader); + else + DT->changeImmediateDominator(PrologExit, PreHeader); + } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code @@ -620,10 +721,11 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. - BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; + BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; - CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, - InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI); + Loop *remainderLoop = CloneLoopBlocks( + L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot, + NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), @@ -631,6 +733,66 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, NewBlocks[0]->getIterator(), F->end()); + // Now the loop blocks are cloned and the other exiting blocks from the + // remainder are connected to the original Loop's exit blocks. The remaining + // work is to update the phi nodes in the original loop, and take in the + // values from the cloned region. Also update the dominator info for + // OtherExits and their immediate successors, since we have new edges into + // OtherExits. + SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks; + for (auto *BB : OtherExits) { + for (auto &II : *BB) { + + // Given we preserve LCSSA form, we know that the values used outside the + // loop will be used through these phi nodes at the exit blocks that are + // transformed below. + if (!isa<PHINode>(II)) + break; + PHINode *Phi = cast<PHINode>(&II); + unsigned oldNumOperands = Phi->getNumIncomingValues(); + // Add the incoming values from the remainder code to the end of the phi + // node. + for (unsigned i =0; i < oldNumOperands; i++){ + Value *newVal = VMap[Phi->getIncomingValue(i)]; + // newVal can be a constant or derived from values outside the loop, and + // hence need not have a VMap value. + if (!newVal) + newVal = Phi->getIncomingValue(i); + Phi->addIncoming(newVal, + cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); + } + } +#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) + for (BasicBlock *SuccBB : successors(BB)) { + assert(!(any_of(OtherExits, + [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || + SuccBB == LatchExit) && + "Breaks the definition of dedicated exits!"); + } +#endif + // Update the dominator info because the immediate dominator is no longer the + // header of the original Loop. BB has edges both from L and remainder code. + // Since the preheader determines which loop is run (L or directly jump to + // the remainder code), we set the immediate dominator as the preheader. + if (DT) { + DT->changeImmediateDominator(BB, PreHeader); + // Also update the IDom for immediate successors of BB. If the current + // IDom is the header, update the IDom to be the preheader because that is + // the nearest common dominator of all predecessors of SuccBB. We need to + // check for IDom being the header because successors of exit blocks can + // have edges from outside the loop, and we should not incorrectly update + // the IDom in that case. + for (BasicBlock *SuccBB: successors(BB)) + if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) { + if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) { + assert(!SuccBB->getSinglePredecessor() && + "BB should be the IDom then!"); + DT->changeImmediateDominator(SuccBB, PreHeader); + } + } + } + } + // Loop structure should be the following: // Epilog Prolog // @@ -644,7 +806,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // EpilogHeader Header // ... ... // EpilogLatch Latch - // Exit Exit + // LatchExit LatchExit // Rewrite the cloned instruction operands to use the values created when the // clone is created. @@ -658,7 +820,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, if (UseEpilogRemainder) { // Connect the epilog code to the original loop and update the // PHI functions. - ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader, + ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, NewPreHeader, VMap, DT, LI, PreserveLCSSA); @@ -684,8 +846,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, } else { // Connect the prolog code to the original loop and update the // PHI functions. - ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, - VMap, DT, LI, PreserveLCSSA); + ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, + NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the @@ -693,6 +855,19 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); + // Canonicalize to LoopSimplifyForm both original and remainder loops. We + // cannot rely on the LoopUnrollPass to do this because it only does + // canonicalization for parent/subloops and not the sibling loops. + if (OtherExits.size() > 0) { + // Generate dedicated exit blocks for the original loop, to preserve + // LoopSimplifyForm. + formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); + // Generate dedicated exit blocks for the remainder loop if one exists, to + // preserve LoopSimplifyForm. + if (remainderLoop) + formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); + } + NumRuntimeUnrolled++; return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp index c8efa9e..3c52278 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -12,16 +12,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -29,6 +30,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -87,8 +89,7 @@ RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT // with a new integer type of the corresponding bit width. - if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)), - m_And(m_APInt(M), m_Instruction(I))))) { + if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { int32_t Bits = (*M + 1).exactLogBase2(); if (Bits > 0) { RT = IntegerType::get(Phi->getContext(), Bits); @@ -230,8 +231,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, // - PHI: // - All uses of the PHI must be the reduction (safe). // - Otherwise, not safe. - // - By one instruction outside of the loop (safe). - // - By further instructions outside of the loop (not safe). + // - By instructions outside of the loop (safe). + // * One value may have several outside users, but all outside + // uses must be of the same value. // - By an instruction that is not part of the reduction (not safe). // This is either: // * An instruction type other than PHI or the reduction operation. @@ -297,10 +299,15 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, // Check if we found the exit user. BasicBlock *Parent = UI->getParent(); if (!TheLoop->contains(Parent)) { - // Exit if you find multiple outside users or if the header phi node is - // being used. In this case the user uses the value of the previous - // iteration, in which case we would loose "VF-1" iterations of the - // reduction operation if we vectorize. + // If we already know this instruction is used externally, move on to + // the next user. + if (ExitInstruction == Cur) + continue; + + // Exit if you find multiple values used outside or if the header phi + // node is being used. In this case the user uses the value of the + // previous iteration, in which case we would loose "VF-1" iterations of + // the reduction operation if we vectorize. if (ExitInstruction != nullptr || Cur == Phi) return false; @@ -521,8 +528,9 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, return false; } -bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, - DominatorTree *DT) { +bool RecurrenceDescriptor::isFirstOrderRecurrence( + PHINode *Phi, Loop *TheLoop, + DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { // Ensure the phi node is in the loop header and has two incoming values. if (Phi->getParent() != TheLoop->getHeader() || @@ -544,16 +552,29 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, // Get the previous value. The previous value comes from the latch edge while // the initial value comes form the preheader edge. auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); - if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous)) + if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || + SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. return false; - // Ensure every user of the phi node is dominated by the previous value. The - // dominance requirement ensures the loop vectorizer will not need to + // Ensure every user of the phi node is dominated by the previous value. + // The dominance requirement ensures the loop vectorizer will not need to // vectorize the initial value prior to the first iteration of the loop. + // TODO: Consider extending this sinking to handle other kinds of instructions + // and expressions, beyond sinking a single cast past Previous. + if (Phi->hasOneUse()) { + auto *I = Phi->user_back(); + if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && + DT->dominates(Previous, I->user_back())) { + SinkAfter[I] = Previous; + return true; + } + } + for (User *U : Phi->users()) - if (auto *I = dyn_cast<Instruction>(U)) + if (auto *I = dyn_cast<Instruction>(U)) { if (!DT->dominates(Previous, I)) return false; + } return true; } @@ -916,6 +937,69 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, return true; } +bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { + bool Changed = false; + + // We re-use a vector for the in-loop predecesosrs. + SmallVector<BasicBlock *, 4> InLoopPredecessors; + + auto RewriteExit = [&](BasicBlock *BB) { + assert(InLoopPredecessors.empty() && + "Must start with an empty predecessors list!"); + auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); + + // See if there are any non-loop predecessors of this exit block and + // keep track of the in-loop predecessors. + bool IsDedicatedExit = true; + for (auto *PredBB : predecessors(BB)) + if (L->contains(PredBB)) { + if (isa<IndirectBrInst>(PredBB->getTerminator())) + // We cannot rewrite exiting edges from an indirectbr. + return false; + + InLoopPredecessors.push_back(PredBB); + } else { + IsDedicatedExit = false; + } + + assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); + + // Nothing to do if this is already a dedicated exit. + if (IsDedicatedExit) + return false; + + auto *NewExitBB = SplitBlockPredecessors( + BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); + + if (!NewExitBB) + DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " + << *L << "\n"); + else + DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " + << NewExitBB->getName() << "\n"); + return true; + }; + + // Walk the exit blocks directly rather than building up a data structure for + // them, but only visit each one once. + SmallPtrSet<BasicBlock *, 4> Visited; + for (auto *BB : L->blocks()) + for (auto *SuccBB : successors(BB)) { + // We're looking for exit blocks so skip in-loop successors. + if (L->contains(SuccBB)) + continue; + + // Visit each exit block exactly once. + if (!Visited.insert(SuccBB).second) + continue; + + Changed |= RewriteExit(SuccBB); + } + + return Changed; +} + /// \brief Returns the instructions that use values defined in the loop. SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { SmallVector<Instruction *, 8> UsedOutside; @@ -1105,3 +1189,208 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { else return (FalseVal + (TrueVal / 2)) / TrueVal; } + +/// \brief Adds a 'fast' flag to floating point operations. +static Value *addFastMathFlag(Value *V) { + if (isa<FPMathOperator>(V)) { + FastMathFlags Flags; + Flags.setUnsafeAlgebra(); + cast<Instruction>(V)->setFastMathFlags(Flags); + } + return V; +} + +// Helper to generate a log2 shuffle reduction. +Value * +llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, + RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, + ArrayRef<Value *> RedOps) { + unsigned VF = Src->getType()->getVectorNumElements(); + // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles + // and vector ops, reducing the set of values being computed by half each + // round. + assert(isPowerOf2_32(VF) && + "Reduction emission only supported for pow2 vectors!"); + Value *TmpVec = Src; + SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); + for (unsigned i = VF; i != 1; i >>= 1) { + // Move the upper half of the vector to the lower half. + for (unsigned j = 0; j != i / 2; ++j) + ShuffleMask[j] = Builder.getInt32(i / 2 + j); + + // Fill the rest of the mask with undef. + std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), + UndefValue::get(Builder.getInt32Ty())); + + Value *Shuf = Builder.CreateShuffleVector( + TmpVec, UndefValue::get(TmpVec->getType()), + ConstantVector::get(ShuffleMask), "rdx.shuf"); + + if (Op != Instruction::ICmp && Op != Instruction::FCmp) { + // Floating point operations had to be 'fast' to enable the reduction. + TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, + TmpVec, Shuf, "bin.rdx")); + } else { + assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && + "Invalid min/max"); + TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, + Shuf); + } + if (!RedOps.empty()) + propagateIRFlags(TmpVec, RedOps); + } + // The result is in the first element of the vector. + return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); +} + +/// Create a simple vector reduction specified by an opcode and some +/// flags (if generating min/max reductions). +Value *llvm::createSimpleTargetReduction( + IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, + Value *Src, TargetTransformInfo::ReductionFlags Flags, + ArrayRef<Value *> RedOps) { + assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); + + Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); + std::function<Value*()> BuildFunc; + using RD = RecurrenceDescriptor; + RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; + // TODO: Support creating ordered reductions. + FastMathFlags FMFUnsafe; + FMFUnsafe.setUnsafeAlgebra(); + + switch (Opcode) { + case Instruction::Add: + BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; + break; + case Instruction::Mul: + BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; + break; + case Instruction::And: + BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; + break; + case Instruction::Or: + BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; + break; + case Instruction::Xor: + BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; + break; + case Instruction::FAdd: + BuildFunc = [&]() { + auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); + cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); + return Rdx; + }; + break; + case Instruction::FMul: + BuildFunc = [&]() { + auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); + cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe); + return Rdx; + }; + break; + case Instruction::ICmp: + if (Flags.IsMaxOp) { + MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; + BuildFunc = [&]() { + return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); + }; + } else { + MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; + BuildFunc = [&]() { + return Builder.CreateIntMinReduce(Src, Flags.IsSigned); + }; + } + break; + case Instruction::FCmp: + if (Flags.IsMaxOp) { + MinMaxKind = RD::MRK_FloatMax; + BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; + } else { + MinMaxKind = RD::MRK_FloatMin; + BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; + } + break; + default: + llvm_unreachable("Unhandled opcode"); + break; + } + if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) + return BuildFunc(); + return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); +} + +/// Create a vector reduction using a given recurrence descriptor. +Value *llvm::createTargetReduction(IRBuilder<> &Builder, + const TargetTransformInfo *TTI, + RecurrenceDescriptor &Desc, Value *Src, + bool NoNaN) { + // TODO: Support in-order reductions based on the recurrence descriptor. + RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind(); + TargetTransformInfo::ReductionFlags Flags; + Flags.NoNaN = NoNaN; + auto getSimpleRdx = [&](unsigned Opc) { + return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags); + }; + switch (RecKind) { + case RecurrenceDescriptor::RK_FloatAdd: + return getSimpleRdx(Instruction::FAdd); + case RecurrenceDescriptor::RK_FloatMult: + return getSimpleRdx(Instruction::FMul); + case RecurrenceDescriptor::RK_IntegerAdd: + return getSimpleRdx(Instruction::Add); + case RecurrenceDescriptor::RK_IntegerMult: + return getSimpleRdx(Instruction::Mul); + case RecurrenceDescriptor::RK_IntegerAnd: + return getSimpleRdx(Instruction::And); + case RecurrenceDescriptor::RK_IntegerOr: + return getSimpleRdx(Instruction::Or); + case RecurrenceDescriptor::RK_IntegerXor: + return getSimpleRdx(Instruction::Xor); + case RecurrenceDescriptor::RK_IntegerMinMax: { + switch (Desc.getMinMaxRecurrenceKind()) { + case RecurrenceDescriptor::MRK_SIntMax: + Flags.IsSigned = true; + Flags.IsMaxOp = true; + break; + case RecurrenceDescriptor::MRK_UIntMax: + Flags.IsMaxOp = true; + break; + case RecurrenceDescriptor::MRK_SIntMin: + Flags.IsSigned = true; + break; + case RecurrenceDescriptor::MRK_UIntMin: + break; + default: + llvm_unreachable("Unhandled MRK"); + } + return getSimpleRdx(Instruction::ICmp); + } + case RecurrenceDescriptor::RK_FloatMinMax: { + Flags.IsMaxOp = + Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax; + return getSimpleRdx(Instruction::FCmp); + } + default: + llvm_unreachable("Unhandled RecKind"); + } +} + +void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { + auto *VecOp = dyn_cast<Instruction>(I); + if (!VecOp) + return; + auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) + : dyn_cast<Instruction>(OpValue); + if (!Intersection) + return; + const unsigned Opcode = Intersection->getOpcode(); + VecOp->copyIRFlags(Intersection); + for (auto *V : VL) { + auto *Instr = dyn_cast<Instruction>(V); + if (!Instr) + continue; + if (OpValue == nullptr || Opcode == Instr->getOpcode()) + VecOp->andIRFlags(V); + } +} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp new file mode 100644 index 0000000..900450b --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -0,0 +1,510 @@ +//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +static unsigned getLoopOperandSizeInBytes(Type *Type) { + if (VectorType *VTy = dyn_cast<VectorType>(Type)) { + return VTy->getBitWidth() / 8; + } + + return Type->getPrimitiveSizeInBits() / 8; +} + +void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, + Value *DstAddr, ConstantInt *CopyLen, + unsigned SrcAlign, unsigned DestAlign, + bool SrcIsVolatile, bool DstIsVolatile, + const TargetTransformInfo &TTI) { + // No need to expand zero length copies. + if (CopyLen->isZero()) + return; + + BasicBlock *PreLoopBB = InsertBefore->getParent(); + BasicBlock *PostLoopBB = nullptr; + Function *ParentFunc = PreLoopBB->getParent(); + LLVMContext &Ctx = PreLoopBB->getContext(); + + Type *TypeOfCopyLen = CopyLen->getType(); + Type *LoopOpType = + TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); + + unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); + uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; + + unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); + + if (LoopEndCount != 0) { + // Split + PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); + BasicBlock *LoopBB = + BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); + PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); + + IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + + // Cast the Src and Dst pointers to pointers to the loop operand type (if + // needed). + PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); + PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); + if (SrcAddr->getType() != SrcOpType) { + SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); + } + if (DstAddr->getType() != DstOpType) { + DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); + } + + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); + // Loop Body + Value *SrcGEP = + LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); + Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = + LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); + LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + // Create the loop branch condition. + Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), + LoopBB, PostLoopBB); + } + + uint64_t BytesCopied = LoopEndCount * LoopOpSize; + uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; + if (RemainingBytes) { + IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() + : InsertBefore); + + // Update the alignment based on the copy size used in the loop body. + SrcAlign = std::min(SrcAlign, LoopOpSize); + DestAlign = std::min(DestAlign, LoopOpSize); + + SmallVector<Type *, 5> RemainingOps; + TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, + SrcAlign, DestAlign); + + for (auto OpTy : RemainingOps) { + // Calaculate the new index + unsigned OperandSize = getLoopOperandSizeInBytes(OpTy); + uint64_t GepIndex = BytesCopied / OperandSize; + assert(GepIndex * OperandSize == BytesCopied && + "Division should have no Remainder!"); + // Cast source to operand type and load + PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); + Value *CastedSrc = SrcAddr->getType() == SrcPtrType + ? SrcAddr + : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); + Value *SrcGEP = RBuilder.CreateInBoundsGEP( + OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); + Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + + // Cast destination to operand type and store. + PointerType *DstPtrType = PointerType::get(OpTy, DstAS); + Value *CastedDst = DstAddr->getType() == DstPtrType + ? DstAddr + : RBuilder.CreateBitCast(DstAddr, DstPtrType); + Value *DstGEP = RBuilder.CreateInBoundsGEP( + OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); + RBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + BytesCopied += OperandSize; + } + } + assert(BytesCopied == CopyLen->getZExtValue() && + "Bytes copied should match size in the call!"); +} + +void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, + Value *SrcAddr, Value *DstAddr, + Value *CopyLen, unsigned SrcAlign, + unsigned DestAlign, bool SrcIsVolatile, + bool DstIsVolatile, + const TargetTransformInfo &TTI) { + BasicBlock *PreLoopBB = InsertBefore->getParent(); + BasicBlock *PostLoopBB = + PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); + + Function *ParentFunc = PreLoopBB->getParent(); + LLVMContext &Ctx = PreLoopBB->getContext(); + + Type *LoopOpType = + TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); + unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); + + IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + + unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); + PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); + PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); + if (SrcAddr->getType() != SrcOpType) { + SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); + } + if (DstAddr->getType() != DstOpType) { + DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); + } + + // Calculate the loop trip count, and remaining bytes to copy after the loop. + Type *CopyLenType = CopyLen->getType(); + IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); + assert(ILengthType && + "expected size argument to memcpy to be an integer type!"); + ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); + Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); + Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); + Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); + + BasicBlock *LoopBB = + BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr); + IRBuilder<> LoopBuilder(LoopBB); + + PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); + LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); + + Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); + Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); + LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + Type *Int8Type = Type::getInt8Ty(Ctx); + if (LoopOpType != Int8Type) { + // Loop body for the residual copy. + BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", + PreLoopBB->getParent(), nullptr); + // Residual loop header. + BasicBlock *ResHeaderBB = BasicBlock::Create( + Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); + + // Need to update the pre-loop basic block to branch to the correct place. + // branch to the main loop if the count is non-zero, branch to the residual + // loop if the copy size is smaller then 1 iteration of the main loop but + // non-zero and finally branch to after the residual loop if the memcpy + // size is zero. + ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); + PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), + LoopBB, ResHeaderBB); + PreLoopBB->getTerminator()->eraseFromParent(); + + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, + ResHeaderBB); + + // Determine if we need to branch to the residual loop or bypass it. + IRBuilder<> RHBuilder(ResHeaderBB); + RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), + ResLoopBB, PostLoopBB); + + // Copy the residual with single byte load/store loop. + IRBuilder<> ResBuilder(ResLoopBB); + PHINode *ResidualIndex = + ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); + ResidualIndex->addIncoming(Zero, ResHeaderBB); + + Value *SrcAsInt8 = + ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); + Value *DstAsInt8 = + ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); + Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); + Value *SrcGEP = + ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); + Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = + ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); + ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *ResNewIndex = + ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); + ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); + + // Create the loop branch condition. + ResBuilder.CreateCondBr( + ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, + PostLoopBB); + } else { + // In this case the loop operand type was a byte, and there is no need for a + // residual loop to copy the remaining memory after the main loop. + // We do however need to patch up the control flow by creating the + // terminators for the preloop block and the memcpy loop. + ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); + PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), + LoopBB, PostLoopBB); + PreLoopBB->getTerminator()->eraseFromParent(); + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, + PostLoopBB); + } +} + +void llvm::createMemCpyLoop(Instruction *InsertBefore, + Value *SrcAddr, Value *DstAddr, Value *CopyLen, + unsigned SrcAlign, unsigned DestAlign, + bool SrcIsVolatile, bool DstIsVolatile) { + Type *TypeOfCopyLen = CopyLen->getType(); + + BasicBlock *OrigBB = InsertBefore->getParent(); + Function *F = OrigBB->getParent(); + BasicBlock *NewBB = + InsertBefore->getParent()->splitBasicBlock(InsertBefore, "split"); + BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop", + F, NewBB); + + IRBuilder<> Builder(OrigBB->getTerminator()); + + // SrcAddr and DstAddr are expected to be pointer types, + // so no check is made here. + unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); + + // Cast pointers to (char *) + SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS)); + DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS)); + + Builder.CreateCondBr( + Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, + LoopBB); + OrigBB->getTerminator()->eraseFromParent(); + + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); + + // load from SrcAddr+LoopIndex + // TODO: we can leverage the align parameter of llvm.memcpy for more efficient + // word-sized loads and stores. + Value *Element = + LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP( + LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex), + SrcIsVolatile); + // store at DstAddr+LoopIndex + LoopBuilder.CreateStore(Element, + LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(), + DstAddr, LoopIndex), + DstIsVolatile); + + // The value for LoopIndex coming from backedge is (LoopIndex + 1) + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, + NewBB); +} + +// Lower memmove to IR. memmove is required to correctly copy overlapping memory +// regions; therefore, it has to check the relative positions of the source and +// destination pointers and choose the copy direction accordingly. +// +// The code below is an IR rendition of this C function: +// +// void* memmove(void* dst, const void* src, size_t n) { +// unsigned char* d = dst; +// const unsigned char* s = src; +// if (s < d) { +// // copy backwards +// while (n--) { +// d[n] = s[n]; +// } +// } else { +// // copy forward +// for (size_t i = 0; i < n; ++i) { +// d[i] = s[i]; +// } +// } +// return dst; +// } +static void createMemMoveLoop(Instruction *InsertBefore, + Value *SrcAddr, Value *DstAddr, Value *CopyLen, + unsigned SrcAlign, unsigned DestAlign, + bool SrcIsVolatile, bool DstIsVolatile) { + Type *TypeOfCopyLen = CopyLen->getType(); + BasicBlock *OrigBB = InsertBefore->getParent(); + Function *F = OrigBB->getParent(); + + // Create the a comparison of src and dst, based on which we jump to either + // the forward-copy part of the function (if src >= dst) or the backwards-copy + // part (if src < dst). + // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else + // structure. Its block terminators (unconditional branches) are replaced by + // the appropriate conditional branches when the loop is built. + ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT, + SrcAddr, DstAddr, "compare_src_dst"); + TerminatorInst *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm, + &ElseTerm); + + // Each part of the function consists of two blocks: + // copy_backwards: used to skip the loop when n == 0 + // copy_backwards_loop: the actual backwards loop BB + // copy_forward: used to skip the loop when n == 0 + // copy_forward_loop: the actual forward loop BB + BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); + CopyBackwardsBB->setName("copy_backwards"); + BasicBlock *CopyForwardBB = ElseTerm->getParent(); + CopyForwardBB->setName("copy_forward"); + BasicBlock *ExitBB = InsertBefore->getParent(); + ExitBB->setName("memmove_done"); + + // Initial comparison of n == 0 that lets us skip the loops altogether. Shared + // between both backwards and forward copy clauses. + ICmpInst *CompareN = + new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, + ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); + + // Copying backwards. + BasicBlock *LoopBB = + BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB); + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); + Value *IndexPtr = LoopBuilder.CreateSub( + LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); + Value *Element = LoopBuilder.CreateLoad( + LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element"); + LoopBuilder.CreateStore(Element, + LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr)); + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), + ExitBB, LoopBB); + LoopPhi->addIncoming(IndexPtr, LoopBB); + LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); + BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); + ThenTerm->eraseFromParent(); + + // Copying forward. + BasicBlock *FwdLoopBB = + BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB); + IRBuilder<> FwdLoopBuilder(FwdLoopBB); + PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); + Value *FwdElement = FwdLoopBuilder.CreateLoad( + FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element"); + FwdLoopBuilder.CreateStore( + FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi)); + Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( + FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); + FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), + ExitBB, FwdLoopBB); + FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); + FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); + + BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); + ElseTerm->eraseFromParent(); +} + +static void createMemSetLoop(Instruction *InsertBefore, + Value *DstAddr, Value *CopyLen, Value *SetValue, + unsigned Align, bool IsVolatile) { + Type *TypeOfCopyLen = CopyLen->getType(); + BasicBlock *OrigBB = InsertBefore->getParent(); + Function *F = OrigBB->getParent(); + BasicBlock *NewBB = + OrigBB->splitBasicBlock(InsertBefore, "split"); + BasicBlock *LoopBB + = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); + + IRBuilder<> Builder(OrigBB->getTerminator()); + + // Cast pointer to the type of value getting stored + unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); + DstAddr = Builder.CreateBitCast(DstAddr, + PointerType::get(SetValue->getType(), dstAS)); + + Builder.CreateCondBr( + Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, + LoopBB); + OrigBB->getTerminator()->eraseFromParent(); + + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); + + LoopBuilder.CreateStore( + SetValue, + LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), + IsVolatile); + + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, + NewBB); +} + +void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, + const TargetTransformInfo &TTI) { + // Original implementation + if (!TTI.useWideIRMemcpyLoopLowering()) { + createMemCpyLoop(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile()); + } else { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { + createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ CI, + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* TargetTransformInfo */ TTI); + } else { + createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* TargetTransfomrInfo */ TTI); + } + } +} + +void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { + createMemMoveLoop(/* InsertBefore */ Memmove, + /* SrcAddr */ Memmove->getRawSource(), + /* DstAddr */ Memmove->getRawDest(), + /* CopyLen */ Memmove->getLength(), + /* SrcAlign */ Memmove->getAlignment(), + /* DestAlign */ Memmove->getAlignment(), + /* SrcIsVolatile */ Memmove->isVolatile(), + /* DstIsVolatile */ Memmove->isVolatile()); +} + +void llvm::expandMemSetAsLoop(MemSetInst *Memset) { + createMemSetLoop(/* InsertBefore */ Memset, + /* DstAddr */ Memset->getRawDest(), + /* CopyLen */ Memset->getLength(), + /* SetValue */ Memset->getValue(), + /* Alignment */ Memset->getAlignment(), + Memset->isVolatile()); +} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index 75cd3bc..890afbc 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -24,6 +23,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include <algorithm> @@ -356,10 +356,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { unsigned numCmps = 0; // Start with "simple" cases - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) - Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(), - i.getCaseSuccessor())); - + for (auto Case : SI->cases()) + Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(), + Case.getCaseSuccessor())); + std::sort(Cases.begin(), Cases.end(), CaseCmp()); // Merge case into clusters @@ -403,6 +403,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, Value *Val = SI->getCondition(); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); + // Don't handle unreachable blocks. If there are successors with phis, this + // would leave them behind with missing predecessors. + if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) || + CurBlock->getSinglePredecessor() == CurBlock) { + DeleteList.insert(CurBlock); + return; + } + // If there is only the default destination, just branch. if (!SI->getNumCases()) { BranchInst::Create(Default, CurBlock); diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp index 24b3b12..b659a2e 100644 --- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp @@ -46,7 +46,7 @@ static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, nullptr, &AC); + PromoteMemToReg(Allocas, DT, &AC); NumPromoted += Allocas.size(); Changed = true; } @@ -59,8 +59,9 @@ PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) { if (!promoteMemoryToRegister(F, DT, AC)) return PreservedAnalyses::all(); - // FIXME: This should also 'preserve the CFG'. - return PreservedAnalyses::none(); + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; } namespace { diff --git a/contrib/llvm/lib/Transforms/Utils/MemorySSA.cpp b/contrib/llvm/lib/Transforms/Utils/MemorySSA.cpp deleted file mode 100644 index 1ce4225..0000000 --- a/contrib/llvm/lib/Transforms/Utils/MemorySSA.cpp +++ /dev/null @@ -1,2305 +0,0 @@ -//===-- MemorySSA.cpp - Memory SSA Builder---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------===// -// -// This file implements the MemorySSA class. -// -//===----------------------------------------------------------------===// -#include "llvm/Transforms/Utils/MemorySSA.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/PHITransAddr.h" -#include "llvm/IR/AssemblyAnnotationWriter.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Transforms/Scalar.h" -#include <algorithm> - -#define DEBUG_TYPE "memoryssa" -using namespace llvm; -STATISTIC(NumClobberCacheLookups, "Number of Memory SSA version cache lookups"); -STATISTIC(NumClobberCacheHits, "Number of Memory SSA version cache hits"); -STATISTIC(NumClobberCacheInserts, "Number of MemorySSA version cache inserts"); - -INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, - true) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, - true) - -INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa", - "Memory SSA Printer", false, false) -INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) -INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa", - "Memory SSA Printer", false, false) - -static cl::opt<unsigned> MaxCheckLimit( - "memssa-check-limit", cl::Hidden, cl::init(100), - cl::desc("The maximum number of stores/phis MemorySSA" - "will consider trying to walk past (default = 100)")); - -static cl::opt<bool> - VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden, - cl::desc("Verify MemorySSA in legacy printer pass.")); - -namespace llvm { -/// \brief An assembly annotator class to print Memory SSA information in -/// comments. -class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { - friend class MemorySSA; - const MemorySSA *MSSA; - -public: - MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} - - virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, - formatted_raw_ostream &OS) { - if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) - OS << "; " << *MA << "\n"; - } - - virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS) { - if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) - OS << "; " << *MA << "\n"; - } -}; -} - -namespace { -/// Our current alias analysis API differentiates heavily between calls and -/// non-calls, and functions called on one usually assert on the other. -/// This class encapsulates the distinction to simplify other code that wants -/// "Memory affecting instructions and related data" to use as a key. -/// For example, this class is used as a densemap key in the use optimizer. -class MemoryLocOrCall { -public: - MemoryLocOrCall() : IsCall(false) {} - MemoryLocOrCall(MemoryUseOrDef *MUD) - : MemoryLocOrCall(MUD->getMemoryInst()) {} - MemoryLocOrCall(const MemoryUseOrDef *MUD) - : MemoryLocOrCall(MUD->getMemoryInst()) {} - - MemoryLocOrCall(Instruction *Inst) { - if (ImmutableCallSite(Inst)) { - IsCall = true; - CS = ImmutableCallSite(Inst); - } else { - IsCall = false; - // There is no such thing as a memorylocation for a fence inst, and it is - // unique in that regard. - if (!isa<FenceInst>(Inst)) - Loc = MemoryLocation::get(Inst); - } - } - - explicit MemoryLocOrCall(const MemoryLocation &Loc) - : IsCall(false), Loc(Loc) {} - - bool IsCall; - ImmutableCallSite getCS() const { - assert(IsCall); - return CS; - } - MemoryLocation getLoc() const { - assert(!IsCall); - return Loc; - } - - bool operator==(const MemoryLocOrCall &Other) const { - if (IsCall != Other.IsCall) - return false; - - if (IsCall) - return CS.getCalledValue() == Other.CS.getCalledValue(); - return Loc == Other.Loc; - } - -private: - union { - ImmutableCallSite CS; - MemoryLocation Loc; - }; -}; -} - -namespace llvm { -template <> struct DenseMapInfo<MemoryLocOrCall> { - static inline MemoryLocOrCall getEmptyKey() { - return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); - } - static inline MemoryLocOrCall getTombstoneKey() { - return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); - } - static unsigned getHashValue(const MemoryLocOrCall &MLOC) { - if (MLOC.IsCall) - return hash_combine(MLOC.IsCall, - DenseMapInfo<const Value *>::getHashValue( - MLOC.getCS().getCalledValue())); - return hash_combine( - MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); - } - static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { - return LHS == RHS; - } -}; - -enum class Reorderability { Always, IfNoAlias, Never }; - -/// This does one-way checks to see if Use could theoretically be hoisted above -/// MayClobber. This will not check the other way around. -/// -/// This assumes that, for the purposes of MemorySSA, Use comes directly after -/// MayClobber, with no potentially clobbering operations in between them. -/// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) -static Reorderability getLoadReorderability(const LoadInst *Use, - const LoadInst *MayClobber) { - bool VolatileUse = Use->isVolatile(); - bool VolatileClobber = MayClobber->isVolatile(); - // Volatile operations may never be reordered with other volatile operations. - if (VolatileUse && VolatileClobber) - return Reorderability::Never; - - // The lang ref allows reordering of volatile and non-volatile operations. - // Whether an aliasing nonvolatile load and volatile load can be reordered, - // though, is ambiguous. Because it may not be best to exploit this ambiguity, - // we only allow volatile/non-volatile reordering if the volatile and - // non-volatile operations don't alias. - Reorderability Result = VolatileUse || VolatileClobber - ? Reorderability::IfNoAlias - : Reorderability::Always; - - // If a load is seq_cst, it cannot be moved above other loads. If its ordering - // is weaker, it can be moved above other loads. We just need to be sure that - // MayClobber isn't an acquire load, because loads can't be moved above - // acquire loads. - // - // Note that this explicitly *does* allow the free reordering of monotonic (or - // weaker) loads of the same address. - bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; - bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), - AtomicOrdering::Acquire); - if (SeqCstUse || MayClobberIsAcquire) - return Reorderability::Never; - return Result; -} - -static bool instructionClobbersQuery(MemoryDef *MD, - const MemoryLocation &UseLoc, - const Instruction *UseInst, - AliasAnalysis &AA) { - Instruction *DefInst = MD->getMemoryInst(); - assert(DefInst && "Defining instruction not actually an instruction"); - - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { - // These intrinsics will show up as affecting memory, but they are just - // markers. - switch (II->getIntrinsicID()) { - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::assume: - return false; - default: - break; - } - } - - ImmutableCallSite UseCS(UseInst); - if (UseCS) { - ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); - return I != MRI_NoModRef; - } - - if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) { - if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) { - switch (getLoadReorderability(UseLoad, DefLoad)) { - case Reorderability::Always: - return false; - case Reorderability::Never: - return true; - case Reorderability::IfNoAlias: - return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad)); - } - } - } - - return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod; -} - -static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU, - const MemoryLocOrCall &UseMLOC, - AliasAnalysis &AA) { - // FIXME: This is a temporary hack to allow a single instructionClobbersQuery - // to exist while MemoryLocOrCall is pushed through places. - if (UseMLOC.IsCall) - return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), - AA); - return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), - AA); -} - -// Return true when MD may alias MU, return false otherwise. -bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, - AliasAnalysis &AA) { - return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA); -} -} - -namespace { -struct UpwardsMemoryQuery { - // True if our original query started off as a call - bool IsCall; - // The pointer location we started the query with. This will be empty if - // IsCall is true. - MemoryLocation StartingLoc; - // This is the instruction we were querying about. - const Instruction *Inst; - // The MemoryAccess we actually got called with, used to test local domination - const MemoryAccess *OriginalAccess; - - UpwardsMemoryQuery() - : IsCall(false), Inst(nullptr), OriginalAccess(nullptr) {} - - UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) - : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) { - if (!IsCall) - StartingLoc = MemoryLocation::get(Inst); - } -}; - -static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, - AliasAnalysis &AA) { - Instruction *Inst = MD->getMemoryInst(); - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { - switch (II->getIntrinsicID()) { - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc); - default: - return false; - } - } - return false; -} - -static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, - const Instruction *I) { - // If the memory can't be changed, then loads of the memory can't be - // clobbered. - // - // FIXME: We should handle invariant groups, as well. It's a bit harder, - // because we need to pay close attention to invariant group barriers. - return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || - AA.pointsToConstantMemory(I)); -} - -/// Cache for our caching MemorySSA walker. -class WalkerCache { - DenseMap<ConstMemoryAccessPair, MemoryAccess *> Accesses; - DenseMap<const MemoryAccess *, MemoryAccess *> Calls; - -public: - MemoryAccess *lookup(const MemoryAccess *MA, const MemoryLocation &Loc, - bool IsCall) const { - ++NumClobberCacheLookups; - MemoryAccess *R = IsCall ? Calls.lookup(MA) : Accesses.lookup({MA, Loc}); - if (R) - ++NumClobberCacheHits; - return R; - } - - bool insert(const MemoryAccess *MA, MemoryAccess *To, - const MemoryLocation &Loc, bool IsCall) { - // This is fine for Phis, since there are times where we can't optimize - // them. Making a def its own clobber is never correct, though. - assert((MA != To || isa<MemoryPhi>(MA)) && - "Something can't clobber itself!"); - - ++NumClobberCacheInserts; - bool Inserted; - if (IsCall) - Inserted = Calls.insert({MA, To}).second; - else - Inserted = Accesses.insert({{MA, Loc}, To}).second; - - return Inserted; - } - - bool remove(const MemoryAccess *MA, const MemoryLocation &Loc, bool IsCall) { - return IsCall ? Calls.erase(MA) : Accesses.erase({MA, Loc}); - } - - void clear() { - Accesses.clear(); - Calls.clear(); - } - - bool contains(const MemoryAccess *MA) const { - for (auto &P : Accesses) - if (P.first.first == MA || P.second == MA) - return true; - for (auto &P : Calls) - if (P.first == MA || P.second == MA) - return true; - return false; - } -}; - -/// Walks the defining uses of MemoryDefs. Stops after we hit something that has -/// no defining use (e.g. a MemoryPhi or liveOnEntry). Note that, when comparing -/// against a null def_chain_iterator, this will compare equal only after -/// walking said Phi/liveOnEntry. -struct def_chain_iterator - : public iterator_facade_base<def_chain_iterator, std::forward_iterator_tag, - MemoryAccess *> { - def_chain_iterator() : MA(nullptr) {} - def_chain_iterator(MemoryAccess *MA) : MA(MA) {} - - MemoryAccess *operator*() const { return MA; } - - def_chain_iterator &operator++() { - // N.B. liveOnEntry has a null defining access. - if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) - MA = MUD->getDefiningAccess(); - else - MA = nullptr; - return *this; - } - - bool operator==(const def_chain_iterator &O) const { return MA == O.MA; } - -private: - MemoryAccess *MA; -}; - -static iterator_range<def_chain_iterator> -def_chain(MemoryAccess *MA, MemoryAccess *UpTo = nullptr) { -#ifdef EXPENSIVE_CHECKS - assert((!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator()) && - "UpTo isn't in the def chain!"); -#endif - return make_range(def_chain_iterator(MA), def_chain_iterator(UpTo)); -} - -/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing -/// inbetween `Start` and `ClobberAt` can clobbers `Start`. -/// -/// This is meant to be as simple and self-contained as possible. Because it -/// uses no cache, etc., it can be relatively expensive. -/// -/// \param Start The MemoryAccess that we want to walk from. -/// \param ClobberAt A clobber for Start. -/// \param StartLoc The MemoryLocation for Start. -/// \param MSSA The MemorySSA isntance that Start and ClobberAt belong to. -/// \param Query The UpwardsMemoryQuery we used for our search. -/// \param AA The AliasAnalysis we used for our search. -static void LLVM_ATTRIBUTE_UNUSED -checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt, - const MemoryLocation &StartLoc, const MemorySSA &MSSA, - const UpwardsMemoryQuery &Query, AliasAnalysis &AA) { - assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); - - if (MSSA.isLiveOnEntryDef(Start)) { - assert(MSSA.isLiveOnEntryDef(ClobberAt) && - "liveOnEntry must clobber itself"); - return; - } - - bool FoundClobber = false; - DenseSet<MemoryAccessPair> VisitedPhis; - SmallVector<MemoryAccessPair, 8> Worklist; - Worklist.emplace_back(Start, StartLoc); - // Walk all paths from Start to ClobberAt, while looking for clobbers. If one - // is found, complain. - while (!Worklist.empty()) { - MemoryAccessPair MAP = Worklist.pop_back_val(); - // All we care about is that nothing from Start to ClobberAt clobbers Start. - // We learn nothing from revisiting nodes. - if (!VisitedPhis.insert(MAP).second) - continue; - - for (MemoryAccess *MA : def_chain(MAP.first)) { - if (MA == ClobberAt) { - if (auto *MD = dyn_cast<MemoryDef>(MA)) { - // instructionClobbersQuery isn't essentially free, so don't use `|=`, - // since it won't let us short-circuit. - // - // Also, note that this can't be hoisted out of the `Worklist` loop, - // since MD may only act as a clobber for 1 of N MemoryLocations. - FoundClobber = - FoundClobber || MSSA.isLiveOnEntryDef(MD) || - instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); - } - break; - } - - // We should never hit liveOnEntry, unless it's the clobber. - assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?"); - - if (auto *MD = dyn_cast<MemoryDef>(MA)) { - (void)MD; - assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) && - "Found clobber before reaching ClobberAt!"); - continue; - } - - assert(isa<MemoryPhi>(MA)); - Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end()); - } - } - - // If ClobberAt is a MemoryPhi, we can assume something above it acted as a - // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point. - assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) && - "ClobberAt never acted as a clobber"); -} - -/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up -/// in one class. -class ClobberWalker { - /// Save a few bytes by using unsigned instead of size_t. - using ListIndex = unsigned; - - /// Represents a span of contiguous MemoryDefs, potentially ending in a - /// MemoryPhi. - struct DefPath { - MemoryLocation Loc; - // Note that, because we always walk in reverse, Last will always dominate - // First. Also note that First and Last are inclusive. - MemoryAccess *First; - MemoryAccess *Last; - Optional<ListIndex> Previous; - - DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last, - Optional<ListIndex> Previous) - : Loc(Loc), First(First), Last(Last), Previous(Previous) {} - - DefPath(const MemoryLocation &Loc, MemoryAccess *Init, - Optional<ListIndex> Previous) - : DefPath(Loc, Init, Init, Previous) {} - }; - - const MemorySSA &MSSA; - AliasAnalysis &AA; - DominatorTree &DT; - WalkerCache &WC; - UpwardsMemoryQuery *Query; - bool UseCache; - - // Phi optimization bookkeeping - SmallVector<DefPath, 32> Paths; - DenseSet<ConstMemoryAccessPair> VisitedPhis; - DenseMap<const BasicBlock *, MemoryAccess *> WalkTargetCache; - - void setUseCache(bool Use) { UseCache = Use; } - bool shouldIgnoreCache() const { - // UseCache will only be false when we're debugging, or when expensive - // checks are enabled. In either case, we don't care deeply about speed. - return LLVM_UNLIKELY(!UseCache); - } - - void addCacheEntry(const MemoryAccess *What, MemoryAccess *To, - const MemoryLocation &Loc) const { -// EXPENSIVE_CHECKS because most of these queries are redundant. -#ifdef EXPENSIVE_CHECKS - assert(MSSA.dominates(To, What)); -#endif - if (shouldIgnoreCache()) - return; - WC.insert(What, To, Loc, Query->IsCall); - } - - MemoryAccess *lookupCache(const MemoryAccess *MA, const MemoryLocation &Loc) { - return shouldIgnoreCache() ? nullptr : WC.lookup(MA, Loc, Query->IsCall); - } - - void cacheDefPath(const DefPath &DN, MemoryAccess *Target) const { - if (shouldIgnoreCache()) - return; - - for (MemoryAccess *MA : def_chain(DN.First, DN.Last)) - addCacheEntry(MA, Target, DN.Loc); - - // DefPaths only express the path we walked. So, DN.Last could either be a - // thing we want to cache, or not. - if (DN.Last != Target) - addCacheEntry(DN.Last, Target, DN.Loc); - } - - /// Find the nearest def or phi that `From` can legally be optimized to. - /// - /// FIXME: Deduplicate this with MSSA::findDominatingDef. Ideally, MSSA should - /// keep track of this information for us, and allow us O(1) lookups of this - /// info. - MemoryAccess *getWalkTarget(const MemoryPhi *From) { - assert(From->getNumOperands() && "Phi with no operands?"); - - BasicBlock *BB = From->getBlock(); - auto At = WalkTargetCache.find(BB); - if (At != WalkTargetCache.end()) - return At->second; - - SmallVector<const BasicBlock *, 8> ToCache; - ToCache.push_back(BB); - - MemoryAccess *Result = MSSA.getLiveOnEntryDef(); - DomTreeNode *Node = DT.getNode(BB); - while ((Node = Node->getIDom())) { - auto At = WalkTargetCache.find(BB); - if (At != WalkTargetCache.end()) { - Result = At->second; - break; - } - - auto *Accesses = MSSA.getBlockAccesses(Node->getBlock()); - if (Accesses) { - auto Iter = find_if(reverse(*Accesses), [](const MemoryAccess &MA) { - return !isa<MemoryUse>(MA); - }); - if (Iter != Accesses->rend()) { - Result = const_cast<MemoryAccess *>(&*Iter); - break; - } - } - - ToCache.push_back(Node->getBlock()); - } - - for (const BasicBlock *BB : ToCache) - WalkTargetCache.insert({BB, Result}); - return Result; - } - - /// Result of calling walkToPhiOrClobber. - struct UpwardsWalkResult { - /// The "Result" of the walk. Either a clobber, the last thing we walked, or - /// both. - MemoryAccess *Result; - bool IsKnownClobber; - bool FromCache; - }; - - /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. - /// This will update Desc.Last as it walks. It will (optionally) also stop at - /// StopAt. - /// - /// This does not test for whether StopAt is a clobber - UpwardsWalkResult walkToPhiOrClobber(DefPath &Desc, - MemoryAccess *StopAt = nullptr) { - assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world"); - - for (MemoryAccess *Current : def_chain(Desc.Last)) { - Desc.Last = Current; - if (Current == StopAt) - return {Current, false, false}; - - if (auto *MD = dyn_cast<MemoryDef>(Current)) - if (MSSA.isLiveOnEntryDef(MD) || - instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA)) - return {MD, true, false}; - - // Cache checks must be done last, because if Current is a clobber, the - // cache will contain the clobber for Current. - if (MemoryAccess *MA = lookupCache(Current, Desc.Loc)) - return {MA, true, true}; - } - - assert(isa<MemoryPhi>(Desc.Last) && - "Ended at a non-clobber that's not a phi?"); - return {Desc.Last, false, false}; - } - - void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, - ListIndex PriorNode) { - auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}), - upward_defs_end()); - for (const MemoryAccessPair &P : UpwardDefs) { - PausedSearches.push_back(Paths.size()); - Paths.emplace_back(P.second, P.first, PriorNode); - } - } - - /// Represents a search that terminated after finding a clobber. This clobber - /// may or may not be present in the path of defs from LastNode..SearchStart, - /// since it may have been retrieved from cache. - struct TerminatedPath { - MemoryAccess *Clobber; - ListIndex LastNode; - }; - - /// Get an access that keeps us from optimizing to the given phi. - /// - /// PausedSearches is an array of indices into the Paths array. Its incoming - /// value is the indices of searches that stopped at the last phi optimization - /// target. It's left in an unspecified state. - /// - /// If this returns None, NewPaused is a vector of searches that terminated - /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. - Optional<TerminatedPath> - getBlockingAccess(MemoryAccess *StopWhere, - SmallVectorImpl<ListIndex> &PausedSearches, - SmallVectorImpl<ListIndex> &NewPaused, - SmallVectorImpl<TerminatedPath> &Terminated) { - assert(!PausedSearches.empty() && "No searches to continue?"); - - // BFS vs DFS really doesn't make a difference here, so just do a DFS with - // PausedSearches as our stack. - while (!PausedSearches.empty()) { - ListIndex PathIndex = PausedSearches.pop_back_val(); - DefPath &Node = Paths[PathIndex]; - - // If we've already visited this path with this MemoryLocation, we don't - // need to do so again. - // - // NOTE: That we just drop these paths on the ground makes caching - // behavior sporadic. e.g. given a diamond: - // A - // B C - // D - // - // ...If we walk D, B, A, C, we'll only cache the result of phi - // optimization for A, B, and D; C will be skipped because it dies here. - // This arguably isn't the worst thing ever, since: - // - We generally query things in a top-down order, so if we got below D - // without needing cache entries for {C, MemLoc}, then chances are - // that those cache entries would end up ultimately unused. - // - We still cache things for A, so C only needs to walk up a bit. - // If this behavior becomes problematic, we can fix without a ton of extra - // work. - if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) - continue; - - UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere); - if (Res.IsKnownClobber) { - assert(Res.Result != StopWhere || Res.FromCache); - // If this wasn't a cache hit, we hit a clobber when walking. That's a - // failure. - TerminatedPath Term{Res.Result, PathIndex}; - if (!Res.FromCache || !MSSA.dominates(Res.Result, StopWhere)) - return Term; - - // Otherwise, it's a valid thing to potentially optimize to. - Terminated.push_back(Term); - continue; - } - - if (Res.Result == StopWhere) { - // We've hit our target. Save this path off for if we want to continue - // walking. - NewPaused.push_back(PathIndex); - continue; - } - - assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber"); - addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex); - } - - return None; - } - - template <typename T, typename Walker> - struct generic_def_path_iterator - : public iterator_facade_base<generic_def_path_iterator<T, Walker>, - std::forward_iterator_tag, T *> { - generic_def_path_iterator() : W(nullptr), N(None) {} - generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} - - T &operator*() const { return curNode(); } - - generic_def_path_iterator &operator++() { - N = curNode().Previous; - return *this; - } - - bool operator==(const generic_def_path_iterator &O) const { - if (N.hasValue() != O.N.hasValue()) - return false; - return !N.hasValue() || *N == *O.N; - } - - private: - T &curNode() const { return W->Paths[*N]; } - - Walker *W; - Optional<ListIndex> N; - }; - - using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; - using const_def_path_iterator = - generic_def_path_iterator<const DefPath, const ClobberWalker>; - - iterator_range<def_path_iterator> def_path(ListIndex From) { - return make_range(def_path_iterator(this, From), def_path_iterator()); - } - - iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const { - return make_range(const_def_path_iterator(this, From), - const_def_path_iterator()); - } - - struct OptznResult { - /// The path that contains our result. - TerminatedPath PrimaryClobber; - /// The paths that we can legally cache back from, but that aren't - /// necessarily the result of the Phi optimization. - SmallVector<TerminatedPath, 4> OtherClobbers; - }; - - ListIndex defPathIndex(const DefPath &N) const { - // The assert looks nicer if we don't need to do &N - const DefPath *NP = &N; - assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() && - "Out of bounds DefPath!"); - return NP - &Paths.front(); - } - - /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths - /// that act as legal clobbers. Note that this won't return *all* clobbers. - /// - /// Phi optimization algorithm tl;dr: - /// - Find the earliest def/phi, A, we can optimize to - /// - Find if all paths from the starting memory access ultimately reach A - /// - If not, optimization isn't possible. - /// - Otherwise, walk from A to another clobber or phi, A'. - /// - If A' is a def, we're done. - /// - If A' is a phi, try to optimize it. - /// - /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path - /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. - OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, - const MemoryLocation &Loc) { - assert(Paths.empty() && VisitedPhis.empty() && - "Reset the optimization state."); - - Paths.emplace_back(Loc, Start, Phi, None); - // Stores how many "valid" optimization nodes we had prior to calling - // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. - auto PriorPathsSize = Paths.size(); - - SmallVector<ListIndex, 16> PausedSearches; - SmallVector<ListIndex, 8> NewPaused; - SmallVector<TerminatedPath, 4> TerminatedPaths; - - addSearches(Phi, PausedSearches, 0); - - // Moves the TerminatedPath with the "most dominated" Clobber to the end of - // Paths. - auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) { - assert(!Paths.empty() && "Need a path to move"); - auto Dom = Paths.begin(); - for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I) - if (!MSSA.dominates(I->Clobber, Dom->Clobber)) - Dom = I; - auto Last = Paths.end() - 1; - if (Last != Dom) - std::iter_swap(Last, Dom); - }; - - MemoryPhi *Current = Phi; - while (1) { - assert(!MSSA.isLiveOnEntryDef(Current) && - "liveOnEntry wasn't treated as a clobber?"); - - MemoryAccess *Target = getWalkTarget(Current); - // If a TerminatedPath doesn't dominate Target, then it wasn't a legal - // optimization for the prior phi. - assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) { - return MSSA.dominates(P.Clobber, Target); - })); - - // FIXME: This is broken, because the Blocker may be reported to be - // liveOnEntry, and we'll happily wait for that to disappear (read: never) - // For the moment, this is fine, since we do nothing with blocker info. - if (Optional<TerminatedPath> Blocker = getBlockingAccess( - Target, PausedSearches, NewPaused, TerminatedPaths)) { - // Cache our work on the blocking node, since we know that's correct. - cacheDefPath(Paths[Blocker->LastNode], Blocker->Clobber); - - // Find the node we started at. We can't search based on N->Last, since - // we may have gone around a loop with a different MemoryLocation. - auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) { - return defPathIndex(N) < PriorPathsSize; - }); - assert(Iter != def_path_iterator()); - - DefPath &CurNode = *Iter; - assert(CurNode.Last == Current); - - // Two things: - // A. We can't reliably cache all of NewPaused back. Consider a case - // where we have two paths in NewPaused; one of which can't optimize - // above this phi, whereas the other can. If we cache the second path - // back, we'll end up with suboptimal cache entries. We can handle - // cases like this a bit better when we either try to find all - // clobbers that block phi optimization, or when our cache starts - // supporting unfinished searches. - // B. We can't reliably cache TerminatedPaths back here without doing - // extra checks; consider a case like: - // T - // / \ - // D C - // \ / - // S - // Where T is our target, C is a node with a clobber on it, D is a - // diamond (with a clobber *only* on the left or right node, N), and - // S is our start. Say we walk to D, through the node opposite N - // (read: ignoring the clobber), and see a cache entry in the top - // node of D. That cache entry gets put into TerminatedPaths. We then - // walk up to C (N is later in our worklist), find the clobber, and - // quit. If we append TerminatedPaths to OtherClobbers, we'll cache - // the bottom part of D to the cached clobber, ignoring the clobber - // in N. Again, this problem goes away if we start tracking all - // blockers for a given phi optimization. - TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)}; - return {Result, {}}; - } - - // If there's nothing left to search, then all paths led to valid clobbers - // that we got from our cache; pick the nearest to the start, and allow - // the rest to be cached back. - if (NewPaused.empty()) { - MoveDominatedPathToEnd(TerminatedPaths); - TerminatedPath Result = TerminatedPaths.pop_back_val(); - return {Result, std::move(TerminatedPaths)}; - } - - MemoryAccess *DefChainEnd = nullptr; - SmallVector<TerminatedPath, 4> Clobbers; - for (ListIndex Paused : NewPaused) { - UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); - if (WR.IsKnownClobber) - Clobbers.push_back({WR.Result, Paused}); - else - // Micro-opt: If we hit the end of the chain, save it. - DefChainEnd = WR.Result; - } - - if (!TerminatedPaths.empty()) { - // If we couldn't find the dominating phi/liveOnEntry in the above loop, - // do it now. - if (!DefChainEnd) - for (MemoryAccess *MA : def_chain(Target)) - DefChainEnd = MA; - - // If any of the terminated paths don't dominate the phi we'll try to - // optimize, we need to figure out what they are and quit. - const BasicBlock *ChainBB = DefChainEnd->getBlock(); - for (const TerminatedPath &TP : TerminatedPaths) { - // Because we know that DefChainEnd is as "high" as we can go, we - // don't need local dominance checks; BB dominance is sufficient. - if (DT.dominates(ChainBB, TP.Clobber->getBlock())) - Clobbers.push_back(TP); - } - } - - // If we have clobbers in the def chain, find the one closest to Current - // and quit. - if (!Clobbers.empty()) { - MoveDominatedPathToEnd(Clobbers); - TerminatedPath Result = Clobbers.pop_back_val(); - return {Result, std::move(Clobbers)}; - } - - assert(all_of(NewPaused, - [&](ListIndex I) { return Paths[I].Last == DefChainEnd; })); - - // Because liveOnEntry is a clobber, this must be a phi. - auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd); - - PriorPathsSize = Paths.size(); - PausedSearches.clear(); - for (ListIndex I : NewPaused) - addSearches(DefChainPhi, PausedSearches, I); - NewPaused.clear(); - - Current = DefChainPhi; - } - } - - /// Caches everything in an OptznResult. - void cacheOptResult(const OptznResult &R) { - if (R.OtherClobbers.empty()) { - // If we're not going to be caching OtherClobbers, don't bother with - // marking visited/etc. - for (const DefPath &N : const_def_path(R.PrimaryClobber.LastNode)) - cacheDefPath(N, R.PrimaryClobber.Clobber); - return; - } - - // PrimaryClobber is our answer. If we can cache anything back, we need to - // stop caching when we visit PrimaryClobber. - SmallBitVector Visited(Paths.size()); - for (const DefPath &N : const_def_path(R.PrimaryClobber.LastNode)) { - Visited[defPathIndex(N)] = true; - cacheDefPath(N, R.PrimaryClobber.Clobber); - } - - for (const TerminatedPath &P : R.OtherClobbers) { - for (const DefPath &N : const_def_path(P.LastNode)) { - ListIndex NIndex = defPathIndex(N); - if (Visited[NIndex]) - break; - Visited[NIndex] = true; - cacheDefPath(N, P.Clobber); - } - } - } - - void verifyOptResult(const OptznResult &R) const { - assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) { - return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber); - })); - } - - void resetPhiOptznState() { - Paths.clear(); - VisitedPhis.clear(); - } - -public: - ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT, - WalkerCache &WC) - : MSSA(MSSA), AA(AA), DT(DT), WC(WC), UseCache(true) {} - - void reset() { WalkTargetCache.clear(); } - - /// Finds the nearest clobber for the given query, optimizing phis if - /// possible. - MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q, - bool UseWalkerCache = true) { - setUseCache(UseWalkerCache); - Query = &Q; - - MemoryAccess *Current = Start; - // This walker pretends uses don't exist. If we're handed one, silently grab - // its def. (This has the nice side-effect of ensuring we never cache uses) - if (auto *MU = dyn_cast<MemoryUse>(Start)) - Current = MU->getDefiningAccess(); - - DefPath FirstDesc(Q.StartingLoc, Current, Current, None); - // Fast path for the overly-common case (no crazy phi optimization - // necessary) - UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); - MemoryAccess *Result; - if (WalkResult.IsKnownClobber) { - cacheDefPath(FirstDesc, WalkResult.Result); - Result = WalkResult.Result; - } else { - OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), - Current, Q.StartingLoc); - verifyOptResult(OptRes); - cacheOptResult(OptRes); - resetPhiOptznState(); - Result = OptRes.PrimaryClobber.Clobber; - } - -#ifdef EXPENSIVE_CHECKS - checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); -#endif - return Result; - } - - void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); } -}; - -struct RenamePassData { - DomTreeNode *DTN; - DomTreeNode::const_iterator ChildIt; - MemoryAccess *IncomingVal; - - RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, - MemoryAccess *M) - : DTN(D), ChildIt(It), IncomingVal(M) {} - void swap(RenamePassData &RHS) { - std::swap(DTN, RHS.DTN); - std::swap(ChildIt, RHS.ChildIt); - std::swap(IncomingVal, RHS.IncomingVal); - } -}; -} // anonymous namespace - -namespace llvm { -/// \brief A MemorySSAWalker that does AA walks and caching of lookups to -/// disambiguate accesses. -/// -/// FIXME: The current implementation of this can take quadratic space in rare -/// cases. This can be fixed, but it is something to note until it is fixed. -/// -/// In order to trigger this behavior, you need to store to N distinct locations -/// (that AA can prove don't alias), perform M stores to other memory -/// locations that AA can prove don't alias any of the initial N locations, and -/// then load from all of the N locations. In this case, we insert M cache -/// entries for each of the N loads. -/// -/// For example: -/// define i32 @foo() { -/// %a = alloca i32, align 4 -/// %b = alloca i32, align 4 -/// store i32 0, i32* %a, align 4 -/// store i32 0, i32* %b, align 4 -/// -/// ; Insert M stores to other memory that doesn't alias %a or %b here -/// -/// %c = load i32, i32* %a, align 4 ; Caches M entries in -/// ; CachedUpwardsClobberingAccess for the -/// ; MemoryLocation %a -/// %d = load i32, i32* %b, align 4 ; Caches M entries in -/// ; CachedUpwardsClobberingAccess for the -/// ; MemoryLocation %b -/// -/// ; For completeness' sake, loading %a or %b again would not cache *another* -/// ; M entries. -/// %r = add i32 %c, %d -/// ret i32 %r -/// } -class MemorySSA::CachingWalker final : public MemorySSAWalker { - WalkerCache Cache; - ClobberWalker Walker; - bool AutoResetWalker; - - MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); - void verifyRemoved(MemoryAccess *); - -public: - CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); - ~CachingWalker() override; - - using MemorySSAWalker::getClobberingMemoryAccess; - MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; - MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, - const MemoryLocation &) override; - void invalidateInfo(MemoryAccess *) override; - - /// Whether we call resetClobberWalker() after each time we *actually* walk to - /// answer a clobber query. - void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; } - - /// Drop the walker's persistent data structures. At the moment, this means - /// "drop the walker's cache of BasicBlocks -> - /// earliest-MemoryAccess-we-can-optimize-to". This is necessary if we're - /// going to have DT updates, if we remove MemoryAccesses, etc. - void resetClobberWalker() { Walker.reset(); } - - void verify(const MemorySSA *MSSA) override { - MemorySSAWalker::verify(MSSA); - Walker.verify(MSSA); - } -}; - -/// \brief Rename a single basic block into MemorySSA form. -/// Uses the standard SSA renaming algorithm. -/// \returns The new incoming value. -MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, - MemoryAccess *IncomingVal) { - auto It = PerBlockAccesses.find(BB); - // Skip most processing if the list is empty. - if (It != PerBlockAccesses.end()) { - AccessList *Accesses = It->second.get(); - for (MemoryAccess &L : *Accesses) { - if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) { - if (MUD->getDefiningAccess() == nullptr) - MUD->setDefiningAccess(IncomingVal); - if (isa<MemoryDef>(&L)) - IncomingVal = &L; - } else { - IncomingVal = &L; - } - } - } - - // Pass through values to our successors - for (const BasicBlock *S : successors(BB)) { - auto It = PerBlockAccesses.find(S); - // Rename the phi nodes in our successor block - if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) - continue; - AccessList *Accesses = It->second.get(); - auto *Phi = cast<MemoryPhi>(&Accesses->front()); - Phi->addIncoming(IncomingVal, BB); - } - - return IncomingVal; -} - -/// \brief This is the standard SSA renaming algorithm. -/// -/// We walk the dominator tree in preorder, renaming accesses, and then filling -/// in phi nodes in our successors. -void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, - SmallPtrSet<BasicBlock *, 16> &Visited) { - SmallVector<RenamePassData, 32> WorkStack; - IncomingVal = renameBlock(Root->getBlock(), IncomingVal); - WorkStack.push_back({Root, Root->begin(), IncomingVal}); - Visited.insert(Root->getBlock()); - - while (!WorkStack.empty()) { - DomTreeNode *Node = WorkStack.back().DTN; - DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; - IncomingVal = WorkStack.back().IncomingVal; - - if (ChildIt == Node->end()) { - WorkStack.pop_back(); - } else { - DomTreeNode *Child = *ChildIt; - ++WorkStack.back().ChildIt; - BasicBlock *BB = Child->getBlock(); - Visited.insert(BB); - IncomingVal = renameBlock(BB, IncomingVal); - WorkStack.push_back({Child, Child->begin(), IncomingVal}); - } - } -} - -/// \brief Compute dominator levels, used by the phi insertion algorithm above. -void MemorySSA::computeDomLevels(DenseMap<DomTreeNode *, unsigned> &DomLevels) { - for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode()); - DFI != DFE; ++DFI) - DomLevels[*DFI] = DFI.getPathLength() - 1; -} - -/// \brief This handles unreachable block accesses by deleting phi nodes in -/// unreachable blocks, and marking all other unreachable MemoryAccess's as -/// being uses of the live on entry definition. -void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { - assert(!DT->isReachableFromEntry(BB) && - "Reachable block found while handling unreachable blocks"); - - // Make sure phi nodes in our reachable successors end up with a - // LiveOnEntryDef for our incoming edge, even though our block is forward - // unreachable. We could just disconnect these blocks from the CFG fully, - // but we do not right now. - for (const BasicBlock *S : successors(BB)) { - if (!DT->isReachableFromEntry(S)) - continue; - auto It = PerBlockAccesses.find(S); - // Rename the phi nodes in our successor block - if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) - continue; - AccessList *Accesses = It->second.get(); - auto *Phi = cast<MemoryPhi>(&Accesses->front()); - Phi->addIncoming(LiveOnEntryDef.get(), BB); - } - - auto It = PerBlockAccesses.find(BB); - if (It == PerBlockAccesses.end()) - return; - - auto &Accesses = It->second; - for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { - auto Next = std::next(AI); - // If we have a phi, just remove it. We are going to replace all - // users with live on entry. - if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) - UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); - else - Accesses->erase(AI); - AI = Next; - } -} - -MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) - : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), - NextID(INVALID_MEMORYACCESS_ID) { - buildMemorySSA(); -} - -MemorySSA::~MemorySSA() { - // Drop all our references - for (const auto &Pair : PerBlockAccesses) - for (MemoryAccess &MA : *Pair.second) - MA.dropAllReferences(); -} - -MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { - auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); - - if (Res.second) - Res.first->second = make_unique<AccessList>(); - return Res.first->second.get(); -} - -/// This class is a batch walker of all MemoryUse's in the program, and points -/// their defining access at the thing that actually clobbers them. Because it -/// is a batch walker that touches everything, it does not operate like the -/// other walkers. This walker is basically performing a top-down SSA renaming -/// pass, where the version stack is used as the cache. This enables it to be -/// significantly more time and memory efficient than using the regular walker, -/// which is walking bottom-up. -class MemorySSA::OptimizeUses { -public: - OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA, - DominatorTree *DT) - : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) { - Walker = MSSA->getWalker(); - } - - void optimizeUses(); - -private: - /// This represents where a given memorylocation is in the stack. - struct MemlocStackInfo { - // This essentially is keeping track of versions of the stack. Whenever - // the stack changes due to pushes or pops, these versions increase. - unsigned long StackEpoch; - unsigned long PopEpoch; - // This is the lower bound of places on the stack to check. It is equal to - // the place the last stack walk ended. - // Note: Correctness depends on this being initialized to 0, which densemap - // does - unsigned long LowerBound; - const BasicBlock *LowerBoundBlock; - // This is where the last walk for this memory location ended. - unsigned long LastKill; - bool LastKillValid; - }; - void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, - SmallVectorImpl<MemoryAccess *> &, - DenseMap<MemoryLocOrCall, MemlocStackInfo> &); - MemorySSA *MSSA; - MemorySSAWalker *Walker; - AliasAnalysis *AA; - DominatorTree *DT; -}; - -/// Optimize the uses in a given block This is basically the SSA renaming -/// algorithm, with one caveat: We are able to use a single stack for all -/// MemoryUses. This is because the set of *possible* reaching MemoryDefs is -/// the same for every MemoryUse. The *actual* clobbering MemoryDef is just -/// going to be some position in that stack of possible ones. -/// -/// We track the stack positions that each MemoryLocation needs -/// to check, and last ended at. This is because we only want to check the -/// things that changed since last time. The same MemoryLocation should -/// get clobbered by the same store (getModRefInfo does not use invariantness or -/// things like this, and if they start, we can modify MemoryLocOrCall to -/// include relevant data) -void MemorySSA::OptimizeUses::optimizeUsesInBlock( - const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch, - SmallVectorImpl<MemoryAccess *> &VersionStack, - DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) { - - /// If no accesses, nothing to do. - MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB); - if (Accesses == nullptr) - return; - - // Pop everything that doesn't dominate the current block off the stack, - // increment the PopEpoch to account for this. - while (!VersionStack.empty()) { - BasicBlock *BackBlock = VersionStack.back()->getBlock(); - if (DT->dominates(BackBlock, BB)) - break; - while (VersionStack.back()->getBlock() == BackBlock) - VersionStack.pop_back(); - ++PopEpoch; - } - for (MemoryAccess &MA : *Accesses) { - auto *MU = dyn_cast<MemoryUse>(&MA); - if (!MU) { - VersionStack.push_back(&MA); - ++StackEpoch; - continue; - } - - if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { - MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true); - continue; - } - - MemoryLocOrCall UseMLOC(MU); - auto &LocInfo = LocStackInfo[UseMLOC]; - // If the pop epoch changed, it means we've removed stuff from top of - // stack due to changing blocks. We may have to reset the lower bound or - // last kill info. - if (LocInfo.PopEpoch != PopEpoch) { - LocInfo.PopEpoch = PopEpoch; - LocInfo.StackEpoch = StackEpoch; - // If the lower bound was in something that no longer dominates us, we - // have to reset it. - // We can't simply track stack size, because the stack may have had - // pushes/pops in the meantime. - // XXX: This is non-optimal, but only is slower cases with heavily - // branching dominator trees. To get the optimal number of queries would - // be to make lowerbound and lastkill a per-loc stack, and pop it until - // the top of that stack dominates us. This does not seem worth it ATM. - // A much cheaper optimization would be to always explore the deepest - // branch of the dominator tree first. This will guarantee this resets on - // the smallest set of blocks. - if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB && - !DT->dominates(LocInfo.LowerBoundBlock, BB)) { - // Reset the lower bound of things to check. - // TODO: Some day we should be able to reset to last kill, rather than - // 0. - LocInfo.LowerBound = 0; - LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); - LocInfo.LastKillValid = false; - } - } else if (LocInfo.StackEpoch != StackEpoch) { - // If all that has changed is the StackEpoch, we only have to check the - // new things on the stack, because we've checked everything before. In - // this case, the lower bound of things to check remains the same. - LocInfo.PopEpoch = PopEpoch; - LocInfo.StackEpoch = StackEpoch; - } - if (!LocInfo.LastKillValid) { - LocInfo.LastKill = VersionStack.size() - 1; - LocInfo.LastKillValid = true; - } - - // At this point, we should have corrected last kill and LowerBound to be - // in bounds. - assert(LocInfo.LowerBound < VersionStack.size() && - "Lower bound out of range"); - assert(LocInfo.LastKill < VersionStack.size() && - "Last kill info out of range"); - // In any case, the new upper bound is the top of the stack. - unsigned long UpperBound = VersionStack.size() - 1; - - if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) { - DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" - << *(MU->getMemoryInst()) << ")" - << " because there are " << UpperBound - LocInfo.LowerBound - << " stores to disambiguate\n"); - // Because we did not walk, LastKill is no longer valid, as this may - // have been a kill. - LocInfo.LastKillValid = false; - continue; - } - bool FoundClobberResult = false; - while (UpperBound > LocInfo.LowerBound) { - if (isa<MemoryPhi>(VersionStack[UpperBound])) { - // For phis, use the walker, see where we ended up, go there - Instruction *UseInst = MU->getMemoryInst(); - MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst); - // We are guaranteed to find it or something is wrong - while (VersionStack[UpperBound] != Result) { - assert(UpperBound != 0); - --UpperBound; - } - FoundClobberResult = true; - break; - } - - MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); - // If the lifetime of the pointer ends at this instruction, it's live on - // entry. - if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) { - // Reset UpperBound to liveOnEntryDef's place in the stack - UpperBound = 0; - FoundClobberResult = true; - break; - } - if (instructionClobbersQuery(MD, MU, UseMLOC, *AA)) { - FoundClobberResult = true; - break; - } - --UpperBound; - } - // At the end of this loop, UpperBound is either a clobber, or lower bound - // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. - if (FoundClobberResult || UpperBound < LocInfo.LastKill) { - MU->setDefiningAccess(VersionStack[UpperBound], true); - // We were last killed now by where we got to - LocInfo.LastKill = UpperBound; - } else { - // Otherwise, we checked all the new ones, and now we know we can get to - // LastKill. - MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true); - } - LocInfo.LowerBound = VersionStack.size() - 1; - LocInfo.LowerBoundBlock = BB; - } -} - -/// Optimize uses to point to their actual clobbering definitions. -void MemorySSA::OptimizeUses::optimizeUses() { - - // We perform a non-recursive top-down dominator tree walk - struct StackInfo { - const DomTreeNode *Node; - DomTreeNode::const_iterator Iter; - }; - - SmallVector<MemoryAccess *, 16> VersionStack; - SmallVector<StackInfo, 16> DomTreeWorklist; - DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo; - VersionStack.push_back(MSSA->getLiveOnEntryDef()); - - unsigned long StackEpoch = 1; - unsigned long PopEpoch = 1; - for (const auto *DomNode : depth_first(DT->getRootNode())) - optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, - LocStackInfo); -} - -void MemorySSA::placePHINodes( - const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks, - const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) { - // Determine where our MemoryPhi's should go - ForwardIDFCalculator IDFs(*DT); - IDFs.setDefiningBlocks(DefiningBlocks); - SmallVector<BasicBlock *, 32> IDFBlocks; - IDFs.calculate(IDFBlocks); - - std::sort(IDFBlocks.begin(), IDFBlocks.end(), - [&BBNumbers](const BasicBlock *A, const BasicBlock *B) { - return BBNumbers.lookup(A) < BBNumbers.lookup(B); - }); - - // Now place MemoryPhi nodes. - for (auto &BB : IDFBlocks) { - // Insert phi node - AccessList *Accesses = getOrCreateAccessList(BB); - MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); - ValueToMemoryAccess[BB] = Phi; - // Phi's always are placed at the front of the block. - Accesses->push_front(Phi); - } -} - -void MemorySSA::buildMemorySSA() { - // We create an access to represent "live on entry", for things like - // arguments or users of globals, where the memory they use is defined before - // the beginning of the function. We do not actually insert it into the IR. - // We do not define a live on exit for the immediate uses, and thus our - // semantics do *not* imply that something with no immediate uses can simply - // be removed. - BasicBlock &StartingPoint = F.getEntryBlock(); - LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr, - &StartingPoint, NextID++); - DenseMap<const BasicBlock *, unsigned int> BBNumbers; - unsigned NextBBNum = 0; - - // We maintain lists of memory accesses per-block, trading memory for time. We - // could just look up the memory access for every possible instruction in the - // stream. - SmallPtrSet<BasicBlock *, 32> DefiningBlocks; - SmallPtrSet<BasicBlock *, 32> DefUseBlocks; - // Go through each block, figure out where defs occur, and chain together all - // the accesses. - for (BasicBlock &B : F) { - BBNumbers[&B] = NextBBNum++; - bool InsertIntoDef = false; - AccessList *Accesses = nullptr; - for (Instruction &I : B) { - MemoryUseOrDef *MUD = createNewAccess(&I); - if (!MUD) - continue; - InsertIntoDef |= isa<MemoryDef>(MUD); - - if (!Accesses) - Accesses = getOrCreateAccessList(&B); - Accesses->push_back(MUD); - } - if (InsertIntoDef) - DefiningBlocks.insert(&B); - if (Accesses) - DefUseBlocks.insert(&B); - } - placePHINodes(DefiningBlocks, BBNumbers); - - // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get - // filled in with all blocks. - SmallPtrSet<BasicBlock *, 16> Visited; - renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); - - CachingWalker *Walker = getWalkerImpl(); - - // We're doing a batch of updates; don't drop useful caches between them. - Walker->setAutoResetWalker(false); - OptimizeUses(this, Walker, AA, DT).optimizeUses(); - Walker->setAutoResetWalker(true); - Walker->resetClobberWalker(); - - // Mark the uses in unreachable blocks as live on entry, so that they go - // somewhere. - for (auto &BB : F) - if (!Visited.count(&BB)) - markUnreachableAsLiveOnEntry(&BB); -} - -MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } - -MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { - if (Walker) - return Walker.get(); - - Walker = make_unique<CachingWalker>(this, AA, DT); - return Walker.get(); -} - -MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { - assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); - AccessList *Accesses = getOrCreateAccessList(BB); - MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); - ValueToMemoryAccess[BB] = Phi; - // Phi's always are placed at the front of the block. - Accesses->push_front(Phi); - BlockNumberingValid.erase(BB); - return Phi; -} - -MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, - MemoryAccess *Definition) { - assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); - MemoryUseOrDef *NewAccess = createNewAccess(I); - assert( - NewAccess != nullptr && - "Tried to create a memory access for a non-memory touching instruction"); - NewAccess->setDefiningAccess(Definition); - return NewAccess; -} - -MemoryAccess *MemorySSA::createMemoryAccessInBB(Instruction *I, - MemoryAccess *Definition, - const BasicBlock *BB, - InsertionPlace Point) { - MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); - auto *Accesses = getOrCreateAccessList(BB); - if (Point == Beginning) { - // It goes after any phi nodes - auto AI = find_if( - *Accesses, [](const MemoryAccess &MA) { return !isa<MemoryPhi>(MA); }); - - Accesses->insert(AI, NewAccess); - } else { - Accesses->push_back(NewAccess); - } - BlockNumberingValid.erase(BB); - return NewAccess; -} - -MemoryUseOrDef *MemorySSA::createMemoryAccessBefore(Instruction *I, - MemoryAccess *Definition, - MemoryUseOrDef *InsertPt) { - assert(I->getParent() == InsertPt->getBlock() && - "New and old access must be in the same block"); - MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); - auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); - Accesses->insert(AccessList::iterator(InsertPt), NewAccess); - BlockNumberingValid.erase(InsertPt->getBlock()); - return NewAccess; -} - -MemoryUseOrDef *MemorySSA::createMemoryAccessAfter(Instruction *I, - MemoryAccess *Definition, - MemoryAccess *InsertPt) { - assert(I->getParent() == InsertPt->getBlock() && - "New and old access must be in the same block"); - MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); - auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); - Accesses->insertAfter(AccessList::iterator(InsertPt), NewAccess); - BlockNumberingValid.erase(InsertPt->getBlock()); - return NewAccess; -} - -void MemorySSA::spliceMemoryAccessAbove(MemoryDef *Where, - MemoryUseOrDef *What) { - assert(What != getLiveOnEntryDef() && - Where != getLiveOnEntryDef() && "Can't splice (above) LOE."); - assert(dominates(Where, What) && "Only upwards splices are permitted."); - - if (Where == What) - return; - if (isa<MemoryDef>(What)) { - // TODO: possibly use removeMemoryAccess' more efficient RAUW - What->replaceAllUsesWith(What->getDefiningAccess()); - What->setDefiningAccess(Where->getDefiningAccess()); - Where->setDefiningAccess(What); - } - AccessList *Src = getWritableBlockAccesses(What->getBlock()); - AccessList *Dest = getWritableBlockAccesses(Where->getBlock()); - Dest->splice(AccessList::iterator(Where), *Src, What); - - BlockNumberingValid.erase(What->getBlock()); - if (What->getBlock() != Where->getBlock()) - BlockNumberingValid.erase(Where->getBlock()); -} - -/// \brief Helper function to create new memory accesses -MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { - // The assume intrinsic has a control dependency which we model by claiming - // that it writes arbitrarily. Ignore that fake memory dependency here. - // FIXME: Replace this special casing with a more accurate modelling of - // assume's control dependency. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) - if (II->getIntrinsicID() == Intrinsic::assume) - return nullptr; - - // Find out what affect this instruction has on memory. - ModRefInfo ModRef = AA->getModRefInfo(I); - bool Def = bool(ModRef & MRI_Mod); - bool Use = bool(ModRef & MRI_Ref); - - // It's possible for an instruction to not modify memory at all. During - // construction, we ignore them. - if (!Def && !Use) - return nullptr; - - assert((Def || Use) && - "Trying to create a memory access with a non-memory instruction"); - - MemoryUseOrDef *MUD; - if (Def) - MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); - else - MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); - ValueToMemoryAccess[I] = MUD; - return MUD; -} - -MemoryAccess *MemorySSA::findDominatingDef(BasicBlock *UseBlock, - enum InsertionPlace Where) { - // Handle the initial case - if (Where == Beginning) - // The only thing that could define us at the beginning is a phi node - if (MemoryPhi *Phi = getMemoryAccess(UseBlock)) - return Phi; - - DomTreeNode *CurrNode = DT->getNode(UseBlock); - // Need to be defined by our dominator - if (Where == Beginning) - CurrNode = CurrNode->getIDom(); - Where = End; - while (CurrNode) { - auto It = PerBlockAccesses.find(CurrNode->getBlock()); - if (It != PerBlockAccesses.end()) { - auto &Accesses = It->second; - for (MemoryAccess &RA : reverse(*Accesses)) { - if (isa<MemoryDef>(RA) || isa<MemoryPhi>(RA)) - return &RA; - } - } - CurrNode = CurrNode->getIDom(); - } - return LiveOnEntryDef.get(); -} - -/// \brief Returns true if \p Replacer dominates \p Replacee . -bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, - const MemoryAccess *Replacee) const { - if (isa<MemoryUseOrDef>(Replacee)) - return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); - const auto *MP = cast<MemoryPhi>(Replacee); - // For a phi node, the use occurs in the predecessor block of the phi node. - // Since we may occur multiple times in the phi node, we have to check each - // operand to ensure Replacer dominates each operand where Replacee occurs. - for (const Use &Arg : MP->operands()) { - if (Arg.get() != Replacee && - !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg))) - return false; - } - return true; -} - -/// \brief If all arguments of a MemoryPHI are defined by the same incoming -/// argument, return that argument. -static MemoryAccess *onlySingleValue(MemoryPhi *MP) { - MemoryAccess *MA = nullptr; - - for (auto &Arg : MP->operands()) { - if (!MA) - MA = cast<MemoryAccess>(Arg); - else if (MA != Arg) - return nullptr; - } - return MA; -} - -/// \brief Properly remove \p MA from all of MemorySSA's lookup tables. -/// -/// Because of the way the intrusive list and use lists work, it is important to -/// do removal in the right order. -void MemorySSA::removeFromLookups(MemoryAccess *MA) { - assert(MA->use_empty() && - "Trying to remove memory access that still has uses"); - BlockNumbering.erase(MA); - if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) - MUD->setDefiningAccess(nullptr); - // Invalidate our walker's cache if necessary - if (!isa<MemoryUse>(MA)) - Walker->invalidateInfo(MA); - // The call below to erase will destroy MA, so we can't change the order we - // are doing things here - Value *MemoryInst; - if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) { - MemoryInst = MUD->getMemoryInst(); - } else { - MemoryInst = MA->getBlock(); - } - auto VMA = ValueToMemoryAccess.find(MemoryInst); - if (VMA->second == MA) - ValueToMemoryAccess.erase(VMA); - - auto AccessIt = PerBlockAccesses.find(MA->getBlock()); - std::unique_ptr<AccessList> &Accesses = AccessIt->second; - Accesses->erase(MA); - if (Accesses->empty()) - PerBlockAccesses.erase(AccessIt); -} - -void MemorySSA::removeMemoryAccess(MemoryAccess *MA) { - assert(!isLiveOnEntryDef(MA) && "Trying to remove the live on entry def"); - // We can only delete phi nodes if they have no uses, or we can replace all - // uses with a single definition. - MemoryAccess *NewDefTarget = nullptr; - if (MemoryPhi *MP = dyn_cast<MemoryPhi>(MA)) { - // Note that it is sufficient to know that all edges of the phi node have - // the same argument. If they do, by the definition of dominance frontiers - // (which we used to place this phi), that argument must dominate this phi, - // and thus, must dominate the phi's uses, and so we will not hit the assert - // below. - NewDefTarget = onlySingleValue(MP); - assert((NewDefTarget || MP->use_empty()) && - "We can't delete this memory phi"); - } else { - NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess(); - } - - // Re-point the uses at our defining access - if (!MA->use_empty()) { - // Reset optimized on users of this store, and reset the uses. - // A few notes: - // 1. This is a slightly modified version of RAUW to avoid walking the - // uses twice here. - // 2. If we wanted to be complete, we would have to reset the optimized - // flags on users of phi nodes if doing the below makes a phi node have all - // the same arguments. Instead, we prefer users to removeMemoryAccess those - // phi nodes, because doing it here would be N^3. - if (MA->hasValueHandle()) - ValueHandleBase::ValueIsRAUWd(MA, NewDefTarget); - // Note: We assume MemorySSA is not used in metadata since it's not really - // part of the IR. - - while (!MA->use_empty()) { - Use &U = *MA->use_begin(); - if (MemoryUse *MU = dyn_cast<MemoryUse>(U.getUser())) - MU->resetOptimized(); - U.set(NewDefTarget); - } - } - - // The call below to erase will destroy MA, so we can't change the order we - // are doing things here - removeFromLookups(MA); -} - -void MemorySSA::print(raw_ostream &OS) const { - MemorySSAAnnotatedWriter Writer(this); - F.print(OS, &Writer); -} - -void MemorySSA::dump() const { - MemorySSAAnnotatedWriter Writer(this); - F.print(dbgs(), &Writer); -} - -void MemorySSA::verifyMemorySSA() const { - verifyDefUses(F); - verifyDomination(F); - verifyOrdering(F); - Walker->verify(this); -} - -/// \brief Verify that the order and existence of MemoryAccesses matches the -/// order and existence of memory affecting instructions. -void MemorySSA::verifyOrdering(Function &F) const { - // Walk all the blocks, comparing what the lookups think and what the access - // lists think, as well as the order in the blocks vs the order in the access - // lists. - SmallVector<MemoryAccess *, 32> ActualAccesses; - for (BasicBlock &B : F) { - const AccessList *AL = getBlockAccesses(&B); - MemoryAccess *Phi = getMemoryAccess(&B); - if (Phi) - ActualAccesses.push_back(Phi); - for (Instruction &I : B) { - MemoryAccess *MA = getMemoryAccess(&I); - assert((!MA || AL) && "We have memory affecting instructions " - "in this block but they are not in the " - "access list"); - if (MA) - ActualAccesses.push_back(MA); - } - // Either we hit the assert, really have no accesses, or we have both - // accesses and an access list - if (!AL) - continue; - assert(AL->size() == ActualAccesses.size() && - "We don't have the same number of accesses in the block as on the " - "access list"); - auto ALI = AL->begin(); - auto AAI = ActualAccesses.begin(); - while (ALI != AL->end() && AAI != ActualAccesses.end()) { - assert(&*ALI == *AAI && "Not the same accesses in the same order"); - ++ALI; - ++AAI; - } - ActualAccesses.clear(); - } -} - -/// \brief Verify the domination properties of MemorySSA by checking that each -/// definition dominates all of its uses. -void MemorySSA::verifyDomination(Function &F) const { -#ifndef NDEBUG - for (BasicBlock &B : F) { - // Phi nodes are attached to basic blocks - if (MemoryPhi *MP = getMemoryAccess(&B)) - for (const Use &U : MP->uses()) - assert(dominates(MP, U) && "Memory PHI does not dominate it's uses"); - - for (Instruction &I : B) { - MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I)); - if (!MD) - continue; - - for (const Use &U : MD->uses()) - assert(dominates(MD, U) && "Memory Def does not dominate it's uses"); - } - } -#endif -} - -/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use -/// appears in the use list of \p Def. - -void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { -#ifndef NDEBUG - // The live on entry use may cause us to get a NULL def here - if (!Def) - assert(isLiveOnEntryDef(Use) && - "Null def but use not point to live on entry def"); - else - assert(is_contained(Def->users(), Use) && - "Did not find use in def's use list"); -#endif -} - -/// \brief Verify the immediate use information, by walking all the memory -/// accesses and verifying that, for each use, it appears in the -/// appropriate def's use list -void MemorySSA::verifyDefUses(Function &F) const { - for (BasicBlock &B : F) { - // Phi nodes are attached to basic blocks - if (MemoryPhi *Phi = getMemoryAccess(&B)) { - assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( - pred_begin(&B), pred_end(&B))) && - "Incomplete MemoryPhi Node"); - for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) - verifyUseInDefs(Phi->getIncomingValue(I), Phi); - } - - for (Instruction &I : B) { - if (MemoryUseOrDef *MA = getMemoryAccess(&I)) { - verifyUseInDefs(MA->getDefiningAccess(), MA); - } - } - } -} - -MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const { - return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I)); -} - -MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const { - return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB))); -} - -/// Perform a local numbering on blocks so that instruction ordering can be -/// determined in constant time. -/// TODO: We currently just number in order. If we numbered by N, we could -/// allow at least N-1 sequences of insertBefore or insertAfter (and at least -/// log2(N) sequences of mixed before and after) without needing to invalidate -/// the numbering. -void MemorySSA::renumberBlock(const BasicBlock *B) const { - // The pre-increment ensures the numbers really start at 1. - unsigned long CurrentNumber = 0; - const AccessList *AL = getBlockAccesses(B); - assert(AL != nullptr && "Asking to renumber an empty block"); - for (const auto &I : *AL) - BlockNumbering[&I] = ++CurrentNumber; - BlockNumberingValid.insert(B); -} - -/// \brief Determine, for two memory accesses in the same block, -/// whether \p Dominator dominates \p Dominatee. -/// \returns True if \p Dominator dominates \p Dominatee. -bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, - const MemoryAccess *Dominatee) const { - - const BasicBlock *DominatorBlock = Dominator->getBlock(); - - assert((DominatorBlock == Dominatee->getBlock()) && - "Asking for local domination when accesses are in different blocks!"); - // A node dominates itself. - if (Dominatee == Dominator) - return true; - - // When Dominatee is defined on function entry, it is not dominated by another - // memory access. - if (isLiveOnEntryDef(Dominatee)) - return false; - - // When Dominator is defined on function entry, it dominates the other memory - // access. - if (isLiveOnEntryDef(Dominator)) - return true; - - if (!BlockNumberingValid.count(DominatorBlock)) - renumberBlock(DominatorBlock); - - unsigned long DominatorNum = BlockNumbering.lookup(Dominator); - // All numbers start with 1 - assert(DominatorNum != 0 && "Block was not numbered properly"); - unsigned long DominateeNum = BlockNumbering.lookup(Dominatee); - assert(DominateeNum != 0 && "Block was not numbered properly"); - return DominatorNum < DominateeNum; -} - -bool MemorySSA::dominates(const MemoryAccess *Dominator, - const MemoryAccess *Dominatee) const { - if (Dominator == Dominatee) - return true; - - if (isLiveOnEntryDef(Dominatee)) - return false; - - if (Dominator->getBlock() != Dominatee->getBlock()) - return DT->dominates(Dominator->getBlock(), Dominatee->getBlock()); - return locallyDominates(Dominator, Dominatee); -} - -bool MemorySSA::dominates(const MemoryAccess *Dominator, - const Use &Dominatee) const { - if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) { - BasicBlock *UseBB = MP->getIncomingBlock(Dominatee); - // The def must dominate the incoming block of the phi. - if (UseBB != Dominator->getBlock()) - return DT->dominates(Dominator->getBlock(), UseBB); - // If the UseBB and the DefBB are the same, compare locally. - return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee)); - } - // If it's not a PHI node use, the normal dominates can already handle it. - return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser())); -} - -const static char LiveOnEntryStr[] = "liveOnEntry"; - -void MemoryDef::print(raw_ostream &OS) const { - MemoryAccess *UO = getDefiningAccess(); - - OS << getID() << " = MemoryDef("; - if (UO && UO->getID()) - OS << UO->getID(); - else - OS << LiveOnEntryStr; - OS << ')'; -} - -void MemoryPhi::print(raw_ostream &OS) const { - bool First = true; - OS << getID() << " = MemoryPhi("; - for (const auto &Op : operands()) { - BasicBlock *BB = getIncomingBlock(Op); - MemoryAccess *MA = cast<MemoryAccess>(Op); - if (!First) - OS << ','; - else - First = false; - - OS << '{'; - if (BB->hasName()) - OS << BB->getName(); - else - BB->printAsOperand(OS, false); - OS << ','; - if (unsigned ID = MA->getID()) - OS << ID; - else - OS << LiveOnEntryStr; - OS << '}'; - } - OS << ')'; -} - -MemoryAccess::~MemoryAccess() {} - -void MemoryUse::print(raw_ostream &OS) const { - MemoryAccess *UO = getDefiningAccess(); - OS << "MemoryUse("; - if (UO && UO->getID()) - OS << UO->getID(); - else - OS << LiveOnEntryStr; - OS << ')'; -} - -void MemoryAccess::dump() const { - print(dbgs()); - dbgs() << "\n"; -} - -char MemorySSAPrinterLegacyPass::ID = 0; - -MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { - initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); -} - -void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<MemorySSAWrapperPass>(); - AU.addPreserved<MemorySSAWrapperPass>(); -} - -bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { - auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); - MSSA.print(dbgs()); - if (VerifyMemorySSA) - MSSA.verifyMemorySSA(); - return false; -} - -AnalysisKey MemorySSAAnalysis::Key; - -MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, - FunctionAnalysisManager &AM) { - auto &DT = AM.getResult<DominatorTreeAnalysis>(F); - auto &AA = AM.getResult<AAManager>(F); - return MemorySSAAnalysis::Result(make_unique<MemorySSA>(F, &AA, &DT)); -} - -PreservedAnalyses MemorySSAPrinterPass::run(Function &F, - FunctionAnalysisManager &AM) { - OS << "MemorySSA for function: " << F.getName() << "\n"; - AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS); - - return PreservedAnalyses::all(); -} - -PreservedAnalyses MemorySSAVerifierPass::run(Function &F, - FunctionAnalysisManager &AM) { - AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA(); - - return PreservedAnalyses::all(); -} - -char MemorySSAWrapperPass::ID = 0; - -MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { - initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); -} - -void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } - -void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequiredTransitive<DominatorTreeWrapperPass>(); - AU.addRequiredTransitive<AAResultsWrapperPass>(); -} - -bool MemorySSAWrapperPass::runOnFunction(Function &F) { - auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); - MSSA.reset(new MemorySSA(F, &AA, &DT)); - return false; -} - -void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } - -void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { - MSSA->print(OS); -} - -MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} - -MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A, - DominatorTree *D) - : MemorySSAWalker(M), Walker(*M, *A, *D, Cache), AutoResetWalker(true) {} - -MemorySSA::CachingWalker::~CachingWalker() {} - -void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { - // TODO: We can do much better cache invalidation with differently stored - // caches. For now, for MemoryUses, we simply remove them - // from the cache, and kill the entire call/non-call cache for everything - // else. The problem is for phis or defs, currently we'd need to follow use - // chains down and invalidate anything below us in the chain that currently - // terminates at this access. - - // See if this is a MemoryUse, if so, just remove the cached info. MemoryUse - // is by definition never a barrier, so nothing in the cache could point to - // this use. In that case, we only need invalidate the info for the use - // itself. - - if (MemoryUse *MU = dyn_cast<MemoryUse>(MA)) { - UpwardsMemoryQuery Q(MU->getMemoryInst(), MU); - Cache.remove(MU, Q.StartingLoc, Q.IsCall); - MU->resetOptimized(); - } else { - // If it is not a use, the best we can do right now is destroy the cache. - Cache.clear(); - } - -#ifdef EXPENSIVE_CHECKS - verifyRemoved(MA); -#endif -} - -/// \brief Walk the use-def chains starting at \p MA and find -/// the MemoryAccess that actually clobbers Loc. -/// -/// \returns our clobbering memory access -MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( - MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { - MemoryAccess *New = Walker.findClobber(StartingAccess, Q); -#ifdef EXPENSIVE_CHECKS - MemoryAccess *NewNoCache = - Walker.findClobber(StartingAccess, Q, /*UseWalkerCache=*/false); - assert(NewNoCache == New && "Cache made us hand back a different result?"); -#endif - if (AutoResetWalker) - resetClobberWalker(); - return New; -} - -MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( - MemoryAccess *StartingAccess, const MemoryLocation &Loc) { - if (isa<MemoryPhi>(StartingAccess)) - return StartingAccess; - - auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess); - if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) - return StartingUseOrDef; - - Instruction *I = StartingUseOrDef->getMemoryInst(); - - // Conservatively, fences are always clobbers, so don't perform the walk if we - // hit a fence. - if (!ImmutableCallSite(I) && I->isFenceLike()) - return StartingUseOrDef; - - UpwardsMemoryQuery Q; - Q.OriginalAccess = StartingUseOrDef; - Q.StartingLoc = Loc; - Q.Inst = I; - Q.IsCall = false; - - if (auto *CacheResult = Cache.lookup(StartingUseOrDef, Loc, Q.IsCall)) - return CacheResult; - - // Unlike the other function, do not walk to the def of a def, because we are - // handed something we already believe is the clobbering access. - MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef) - ? StartingUseOrDef->getDefiningAccess() - : StartingUseOrDef; - - MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); - DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); - DEBUG(dbgs() << *StartingUseOrDef << "\n"); - DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); - DEBUG(dbgs() << *Clobber << "\n"); - return Clobber; -} - -MemoryAccess * -MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { - auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); - // If this is a MemoryPhi, we can't do anything. - if (!StartingAccess) - return MA; - - // If this is an already optimized use or def, return the optimized result. - // Note: Currently, we do not store the optimized def result because we'd need - // a separate field, since we can't use it as the defining access. - if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess)) - if (MU->isOptimized()) - return MU->getDefiningAccess(); - - const Instruction *I = StartingAccess->getMemoryInst(); - UpwardsMemoryQuery Q(I, StartingAccess); - // We can't sanely do anything with a fences, they conservatively - // clobber all memory, and have no locations to get pointers from to - // try to disambiguate. - if (!Q.IsCall && I->isFenceLike()) - return StartingAccess; - - if (auto *CacheResult = Cache.lookup(StartingAccess, Q.StartingLoc, Q.IsCall)) - return CacheResult; - - if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { - MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); - Cache.insert(StartingAccess, LiveOnEntry, Q.StartingLoc, Q.IsCall); - if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess)) - MU->setDefiningAccess(LiveOnEntry, true); - return LiveOnEntry; - } - - // Start with the thing we already think clobbers this location - MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); - - // At this point, DefiningAccess may be the live on entry def. - // If it is, we will not get a better result. - if (MSSA->isLiveOnEntryDef(DefiningAccess)) - return DefiningAccess; - - MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); - DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); - DEBUG(dbgs() << *DefiningAccess << "\n"); - DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); - DEBUG(dbgs() << *Result << "\n"); - if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess)) - MU->setDefiningAccess(Result, true); - - return Result; -} - -// Verify that MA doesn't exist in any of the caches. -void MemorySSA::CachingWalker::verifyRemoved(MemoryAccess *MA) { - assert(!Cache.contains(MA) && "Found removed MemoryAccess in cache."); -} - -MemoryAccess * -DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { - if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) - return Use->getDefiningAccess(); - return MA; -} - -MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( - MemoryAccess *StartingAccess, const MemoryLocation &) { - if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) - return Use->getDefiningAccess(); - return StartingAccess; -} -} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp index c999bd0..9f2ad54 100644 --- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -13,15 +13,16 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; namespace { @@ -67,6 +68,7 @@ namespace { } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.setPreservesAll(); } @@ -110,9 +112,15 @@ namespace { } // Rename all functions + const TargetLibraryInfo &TLI = + getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); for (auto &F : M) { StringRef Name = F.getName(); - if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + LibFunc Tmp; + // Leave library functions alone because their presence or absence could + // affect the behavior of other passes. + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || + TLI.getLibFunc(F, Tmp)) continue; F.setName(renamer.newName()); @@ -139,8 +147,11 @@ namespace { } char MetaRenamer::ID = 0; -INITIALIZE_PASS(MetaRenamer, "metarenamer", - "Assign new names to everything", false, false) +INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer", + "Assign new names to everything", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(MetaRenamer, "metarenamer", + "Assign new names to everything", false, false) //===----------------------------------------------------------------------===// // // MetaRenamer - Rename everything with metasyntactic names. diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 0d623df..2ef3d63 100644 --- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -35,7 +35,7 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, // Upgrade a 2-field global array type to the new 3-field format if needed. if (Data && OldEltTy->getNumElements() < 3) EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - IRB.getInt8PtrTy(), nullptr); + IRB.getInt8PtrTy()); else EltTy = OldEltTy; if (Constant *Init = GVCtor->getInitializer()) { @@ -44,10 +44,10 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, for (unsigned i = 0; i != n; ++i) { auto Ctor = cast<Constant>(Init->getOperand(i)); if (EltTy != OldEltTy) - Ctor = ConstantStruct::get( - EltTy, Ctor->getAggregateElement((unsigned)0), - Ctor->getAggregateElement(1), - Constant::getNullValue(IRB.getInt8PtrTy()), nullptr); + Ctor = + ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0), + Ctor->getAggregateElement(1), + Constant::getNullValue(IRB.getInt8PtrTy())); CurrentCtors.push_back(Ctor); } } @@ -55,7 +55,7 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, } else { // Use the new three-field struct if there isn't one already. EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - IRB.getInt8PtrTy(), nullptr); + IRB.getInt8PtrTy()); } // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. @@ -130,13 +130,25 @@ void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) { if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast); - FuncOrBitcast->dump(); + FuncOrBitcast->print(errs()); + errs() << '\n'; std::string Err; raw_string_ostream Stream(Err); Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast; report_fatal_error(Err); } +Function *llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, + ArrayRef<Type *> InitArgTypes) { + assert(!InitName.empty() && "Expected init function name"); + Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction( + InitName, + FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false), + AttributeList())); + F->setLinkage(Function::ExternalLinkage); + return F; +} + std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions( Module &M, StringRef CtorName, StringRef InitName, ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, @@ -144,22 +156,19 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions( assert(!InitName.empty() && "Expected init function name"); assert(InitArgs.size() == InitArgTypes.size() && "Sanitizer's init function expects different number of arguments"); + Function *InitFunction = + declareSanitizerInitFunction(M, InitName, InitArgTypes); Function *Ctor = Function::Create( FunctionType::get(Type::getVoidTy(M.getContext()), false), GlobalValue::InternalLinkage, CtorName, &M); BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB)); - Function *InitFunction = - checkSanitizerInterfaceFunction(M.getOrInsertFunction( - InitName, FunctionType::get(IRB.getVoidTy(), InitArgTypes, false), - AttributeSet())); - InitFunction->setLinkage(Function::ExternalLinkage); IRB.CreateCall(InitFunction, InitArgs); if (!VersionCheckName.empty()) { Function *VersionCheckFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), - AttributeSet())); + AttributeList())); IRB.CreateCall(VersionCheckFunction, {}); } return std::make_pair(Ctor, InitFunction); @@ -228,3 +237,35 @@ void llvm::filterDeadComdatFunctions( ComdatEntriesCovered.end(); }); } + +std::string llvm::getUniqueModuleId(Module *M) { + MD5 Md5; + bool ExportsSymbols = false; + auto AddGlobal = [&](GlobalValue &GV) { + if (GV.isDeclaration() || GV.getName().startswith("llvm.") || + !GV.hasExternalLinkage()) + return; + ExportsSymbols = true; + Md5.update(GV.getName()); + Md5.update(ArrayRef<uint8_t>{0}); + }; + + for (auto &F : *M) + AddGlobal(F); + for (auto &GV : M->globals()) + AddGlobal(GV); + for (auto &GA : M->aliases()) + AddGlobal(GA); + for (auto &IF : M->ifuncs()) + AddGlobal(IF); + + if (!ExportsSymbols) + return ""; + + MD5::MD5Result R; + Md5.final(R); + + SmallString<32> Str; + MD5::stringifyResult(R, Str); + return ("$" + Str).str(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp new file mode 100644 index 0000000..dc78054 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/OrderedInstructions.cpp @@ -0,0 +1,32 @@ +//===-- OrderedInstructions.cpp - Instruction dominance function ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utility to check dominance relation of 2 instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/OrderedInstructions.h" +using namespace llvm; + +/// Given 2 instructions, use OrderedBasicBlock to check for dominance relation +/// if the instructions are in the same basic block, Otherwise, use dominator +/// tree. +bool OrderedInstructions::dominates(const Instruction *InstA, + const Instruction *InstB) const { + const BasicBlock *IBB = InstA->getParent(); + // Use ordered basic block to do dominance check in case the 2 instructions + // are in the same basic block. + if (IBB == InstB->getParent()) { + auto OBB = OBBMap.find(IBB); + if (OBB == OBBMap.end()) + OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first; + return OBB->second->dominates(InstA, InstB); + } + return DT->dominates(InstA->getParent(), InstB->getParent()); +} diff --git a/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp new file mode 100644 index 0000000..d4cdaed --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -0,0 +1,793 @@ +//===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------===// +// +// This file implements the PredicateInfo class. +// +//===----------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/PredicateInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/OrderedInstructions.h" +#include <algorithm> +#define DEBUG_TYPE "predicateinfo" +using namespace llvm; +using namespace PatternMatch; +using namespace llvm::PredicateInfoClasses; + +INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo", + "PredicateInfo Printer", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo", + "PredicateInfo Printer", false, false) +static cl::opt<bool> VerifyPredicateInfo( + "verify-predicateinfo", cl::init(false), cl::Hidden, + cl::desc("Verify PredicateInfo in legacy printer pass.")); +namespace { +DEBUG_COUNTER(RenameCounter, "predicateinfo-rename", + "Controls which variables are renamed with predicateinfo") +// Given a predicate info that is a type of branching terminator, get the +// branching block. +const BasicBlock *getBranchBlock(const PredicateBase *PB) { + assert(isa<PredicateWithEdge>(PB) && + "Only branches and switches should have PHIOnly defs that " + "require branch blocks."); + return cast<PredicateWithEdge>(PB)->From; +} + +// Given a predicate info that is a type of branching terminator, get the +// branching terminator. +static Instruction *getBranchTerminator(const PredicateBase *PB) { + assert(isa<PredicateWithEdge>(PB) && + "Not a predicate info type we know how to get a terminator from."); + return cast<PredicateWithEdge>(PB)->From->getTerminator(); +} + +// Given a predicate info that is a type of branching terminator, get the +// edge this predicate info represents +const std::pair<BasicBlock *, BasicBlock *> +getBlockEdge(const PredicateBase *PB) { + assert(isa<PredicateWithEdge>(PB) && + "Not a predicate info type we know how to get an edge from."); + const auto *PEdge = cast<PredicateWithEdge>(PB); + return std::make_pair(PEdge->From, PEdge->To); +} +} + +namespace llvm { +namespace PredicateInfoClasses { +enum LocalNum { + // Operations that must appear first in the block. + LN_First, + // Operations that are somewhere in the middle of the block, and are sorted on + // demand. + LN_Middle, + // Operations that must appear last in a block, like successor phi node uses. + LN_Last +}; + +// Associate global and local DFS info with defs and uses, so we can sort them +// into a global domination ordering. +struct ValueDFS { + int DFSIn = 0; + int DFSOut = 0; + unsigned int LocalNum = LN_Middle; + // Only one of Def or Use will be set. + Value *Def = nullptr; + Use *U = nullptr; + // Neither PInfo nor EdgeOnly participate in the ordering + PredicateBase *PInfo = nullptr; + bool EdgeOnly = false; +}; + +// Perform a strict weak ordering on instructions and arguments. +static bool valueComesBefore(OrderedInstructions &OI, const Value *A, + const Value *B) { + auto *ArgA = dyn_cast_or_null<Argument>(A); + auto *ArgB = dyn_cast_or_null<Argument>(B); + if (ArgA && !ArgB) + return true; + if (ArgB && !ArgA) + return false; + if (ArgA && ArgB) + return ArgA->getArgNo() < ArgB->getArgNo(); + return OI.dominates(cast<Instruction>(A), cast<Instruction>(B)); +} + +// This compares ValueDFS structures, creating OrderedBasicBlocks where +// necessary to compare uses/defs in the same block. Doing so allows us to walk +// the minimum number of instructions necessary to compute our def/use ordering. +struct ValueDFS_Compare { + OrderedInstructions &OI; + ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {} + + bool operator()(const ValueDFS &A, const ValueDFS &B) const { + if (&A == &B) + return false; + // The only case we can't directly compare them is when they in the same + // block, and both have localnum == middle. In that case, we have to use + // comesbefore to see what the real ordering is, because they are in the + // same basic block. + + bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut); + + // We want to put the def that will get used for a given set of phi uses, + // before those phi uses. + // So we sort by edge, then by def. + // Note that only phi nodes uses and defs can come last. + if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last) + return comparePHIRelated(A, B); + + if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle) + return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) < + std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U); + return localComesBefore(A, B); + } + + // For a phi use, or a non-materialized def, return the edge it represents. + const std::pair<BasicBlock *, BasicBlock *> + getBlockEdge(const ValueDFS &VD) const { + if (!VD.Def && VD.U) { + auto *PHI = cast<PHINode>(VD.U->getUser()); + return std::make_pair(PHI->getIncomingBlock(*VD.U), PHI->getParent()); + } + // This is really a non-materialized def. + return ::getBlockEdge(VD.PInfo); + } + + // For two phi related values, return the ordering. + bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const { + auto &ABlockEdge = getBlockEdge(A); + auto &BBlockEdge = getBlockEdge(B); + // Now sort by block edge and then defs before uses. + return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U); + } + + // Get the definition of an instruction that occurs in the middle of a block. + Value *getMiddleDef(const ValueDFS &VD) const { + if (VD.Def) + return VD.Def; + // It's possible for the defs and uses to be null. For branches, the local + // numbering will say the placed predicaeinfos should go first (IE + // LN_beginning), so we won't be in this function. For assumes, we will end + // up here, beause we need to order the def we will place relative to the + // assume. So for the purpose of ordering, we pretend the def is the assume + // because that is where we will insert the info. + if (!VD.U) { + assert(VD.PInfo && + "No def, no use, and no predicateinfo should not occur"); + assert(isa<PredicateAssume>(VD.PInfo) && + "Middle of block should only occur for assumes"); + return cast<PredicateAssume>(VD.PInfo)->AssumeInst; + } + return nullptr; + } + + // Return either the Def, if it's not null, or the user of the Use, if the def + // is null. + const Instruction *getDefOrUser(const Value *Def, const Use *U) const { + if (Def) + return cast<Instruction>(Def); + return cast<Instruction>(U->getUser()); + } + + // This performs the necessary local basic block ordering checks to tell + // whether A comes before B, where both are in the same basic block. + bool localComesBefore(const ValueDFS &A, const ValueDFS &B) const { + auto *ADef = getMiddleDef(A); + auto *BDef = getMiddleDef(B); + + // See if we have real values or uses. If we have real values, we are + // guaranteed they are instructions or arguments. No matter what, we are + // guaranteed they are in the same block if they are instructions. + auto *ArgA = dyn_cast_or_null<Argument>(ADef); + auto *ArgB = dyn_cast_or_null<Argument>(BDef); + + if (ArgA || ArgB) + return valueComesBefore(OI, ArgA, ArgB); + + auto *AInst = getDefOrUser(ADef, A.U); + auto *BInst = getDefOrUser(BDef, B.U); + return valueComesBefore(OI, AInst, BInst); + } +}; + +} // namespace PredicateInfoClasses + +bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack, + const ValueDFS &VDUse) const { + if (Stack.empty()) + return false; + // If it's a phi only use, make sure it's for this phi node edge, and that the + // use is in a phi node. If it's anything else, and the top of the stack is + // EdgeOnly, we need to pop the stack. We deliberately sort phi uses next to + // the defs they must go with so that we can know it's time to pop the stack + // when we hit the end of the phi uses for a given def. + if (Stack.back().EdgeOnly) { + if (!VDUse.U) + return false; + auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser()); + if (!PHI) + return false; + // Check edge + BasicBlock *EdgePred = PHI->getIncomingBlock(*VDUse.U); + if (EdgePred != getBranchBlock(Stack.back().PInfo)) + return false; + + // Use dominates, which knows how to handle edge dominance. + return DT.dominates(getBlockEdge(Stack.back().PInfo), *VDUse.U); + } + + return (VDUse.DFSIn >= Stack.back().DFSIn && + VDUse.DFSOut <= Stack.back().DFSOut); +} + +void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack, + const ValueDFS &VD) { + while (!Stack.empty() && !stackIsInScope(Stack, VD)) + Stack.pop_back(); +} + +// Convert the uses of Op into a vector of uses, associating global and local +// DFS info with each one. +void PredicateInfo::convertUsesToDFSOrdered( + Value *Op, SmallVectorImpl<ValueDFS> &DFSOrderedSet) { + for (auto &U : Op->uses()) { + if (auto *I = dyn_cast<Instruction>(U.getUser())) { + ValueDFS VD; + // Put the phi node uses in the incoming block. + BasicBlock *IBlock; + if (auto *PN = dyn_cast<PHINode>(I)) { + IBlock = PN->getIncomingBlock(U); + // Make phi node users appear last in the incoming block + // they are from. + VD.LocalNum = LN_Last; + } else { + // If it's not a phi node use, it is somewhere in the middle of the + // block. + IBlock = I->getParent(); + VD.LocalNum = LN_Middle; + } + DomTreeNode *DomNode = DT.getNode(IBlock); + // It's possible our use is in an unreachable block. Skip it if so. + if (!DomNode) + continue; + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.U = &U; + DFSOrderedSet.push_back(VD); + } + } +} + +// Collect relevant operations from Comparison that we may want to insert copies +// for. +void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) { + auto *Op0 = Comparison->getOperand(0); + auto *Op1 = Comparison->getOperand(1); + if (Op0 == Op1) + return; + CmpOperands.push_back(Comparison); + // Only want real values, not constants. Additionally, operands with one use + // are only being used in the comparison, which means they will not be useful + // for us to consider for predicateinfo. + // + if ((isa<Instruction>(Op0) || isa<Argument>(Op0)) && !Op0->hasOneUse()) + CmpOperands.push_back(Op0); + if ((isa<Instruction>(Op1) || isa<Argument>(Op1)) && !Op1->hasOneUse()) + CmpOperands.push_back(Op1); +} + +// Add Op, PB to the list of value infos for Op, and mark Op to be renamed. +void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op, + PredicateBase *PB) { + OpsToRename.insert(Op); + auto &OperandInfo = getOrCreateValueInfo(Op); + AllInfos.push_back(PB); + OperandInfo.Infos.push_back(PB); +} + +// Process an assume instruction and place relevant operations we want to rename +// into OpsToRename. +void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB, + SmallPtrSetImpl<Value *> &OpsToRename) { + // See if we have a comparison we support + SmallVector<Value *, 8> CmpOperands; + SmallVector<Value *, 2> ConditionsToProcess; + CmpInst::Predicate Pred; + Value *Operand = II->getOperand(0); + if (m_c_And(m_Cmp(Pred, m_Value(), m_Value()), + m_Cmp(Pred, m_Value(), m_Value())) + .match(II->getOperand(0))) { + ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(0)); + ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(1)); + ConditionsToProcess.push_back(Operand); + } else if (isa<CmpInst>(Operand)) { + + ConditionsToProcess.push_back(Operand); + } + for (auto Cond : ConditionsToProcess) { + if (auto *Cmp = dyn_cast<CmpInst>(Cond)) { + collectCmpOps(Cmp, CmpOperands); + // Now add our copy infos for our operands + for (auto *Op : CmpOperands) { + auto *PA = new PredicateAssume(Op, II, Cmp); + addInfoFor(OpsToRename, Op, PA); + } + CmpOperands.clear(); + } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) { + // Otherwise, it should be an AND. + assert(BinOp->getOpcode() == Instruction::And && + "Should have been an AND"); + auto *PA = new PredicateAssume(BinOp, II, BinOp); + addInfoFor(OpsToRename, BinOp, PA); + } else { + llvm_unreachable("Unknown type of condition"); + } + } +} + +// Process a block terminating branch, and place relevant operations to be +// renamed into OpsToRename. +void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB, + SmallPtrSetImpl<Value *> &OpsToRename) { + BasicBlock *FirstBB = BI->getSuccessor(0); + BasicBlock *SecondBB = BI->getSuccessor(1); + SmallVector<BasicBlock *, 2> SuccsToProcess; + SuccsToProcess.push_back(FirstBB); + SuccsToProcess.push_back(SecondBB); + SmallVector<Value *, 2> ConditionsToProcess; + + auto InsertHelper = [&](Value *Op, bool isAnd, bool isOr, Value *Cond) { + for (auto *Succ : SuccsToProcess) { + // Don't try to insert on a self-edge. This is mainly because we will + // eliminate during renaming anyway. + if (Succ == BranchBB) + continue; + bool TakenEdge = (Succ == FirstBB); + // For and, only insert on the true edge + // For or, only insert on the false edge + if ((isAnd && !TakenEdge) || (isOr && TakenEdge)) + continue; + PredicateBase *PB = + new PredicateBranch(Op, BranchBB, Succ, Cond, TakenEdge); + addInfoFor(OpsToRename, Op, PB); + if (!Succ->getSinglePredecessor()) + EdgeUsesOnly.insert({BranchBB, Succ}); + } + }; + + // Match combinations of conditions. + CmpInst::Predicate Pred; + bool isAnd = false; + bool isOr = false; + SmallVector<Value *, 8> CmpOperands; + if (match(BI->getCondition(), m_And(m_Cmp(Pred, m_Value(), m_Value()), + m_Cmp(Pred, m_Value(), m_Value()))) || + match(BI->getCondition(), m_Or(m_Cmp(Pred, m_Value(), m_Value()), + m_Cmp(Pred, m_Value(), m_Value())))) { + auto *BinOp = cast<BinaryOperator>(BI->getCondition()); + if (BinOp->getOpcode() == Instruction::And) + isAnd = true; + else if (BinOp->getOpcode() == Instruction::Or) + isOr = true; + ConditionsToProcess.push_back(BinOp->getOperand(0)); + ConditionsToProcess.push_back(BinOp->getOperand(1)); + ConditionsToProcess.push_back(BI->getCondition()); + } else if (isa<CmpInst>(BI->getCondition())) { + ConditionsToProcess.push_back(BI->getCondition()); + } + for (auto Cond : ConditionsToProcess) { + if (auto *Cmp = dyn_cast<CmpInst>(Cond)) { + collectCmpOps(Cmp, CmpOperands); + // Now add our copy infos for our operands + for (auto *Op : CmpOperands) + InsertHelper(Op, isAnd, isOr, Cmp); + } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) { + // This must be an AND or an OR. + assert((BinOp->getOpcode() == Instruction::And || + BinOp->getOpcode() == Instruction::Or) && + "Should have been an AND or an OR"); + // The actual value of the binop is not subject to the same restrictions + // as the comparison. It's either true or false on the true/false branch. + InsertHelper(BinOp, false, false, BinOp); + } else { + llvm_unreachable("Unknown type of condition"); + } + CmpOperands.clear(); + } +} +// Process a block terminating switch, and place relevant operations to be +// renamed into OpsToRename. +void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB, + SmallPtrSetImpl<Value *> &OpsToRename) { + Value *Op = SI->getCondition(); + if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse()) + return; + + // Remember how many outgoing edges there are to every successor. + SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; + for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + BasicBlock *TargetBlock = SI->getSuccessor(i); + ++SwitchEdges[TargetBlock]; + } + + // Now propagate info for each case value + for (auto C : SI->cases()) { + BasicBlock *TargetBlock = C.getCaseSuccessor(); + if (SwitchEdges.lookup(TargetBlock) == 1) { + PredicateSwitch *PS = new PredicateSwitch( + Op, SI->getParent(), TargetBlock, C.getCaseValue(), SI); + addInfoFor(OpsToRename, Op, PS); + if (!TargetBlock->getSinglePredecessor()) + EdgeUsesOnly.insert({BranchBB, TargetBlock}); + } + } +} + +// Build predicate info for our function +void PredicateInfo::buildPredicateInfo() { + DT.updateDFSNumbers(); + // Collect operands to rename from all conditional branch terminators, as well + // as assume statements. + SmallPtrSet<Value *, 8> OpsToRename; + for (auto DTN : depth_first(DT.getRootNode())) { + BasicBlock *BranchBB = DTN->getBlock(); + if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) { + if (!BI->isConditional()) + continue; + // Can't insert conditional information if they all go to the same place. + if (BI->getSuccessor(0) == BI->getSuccessor(1)) + continue; + processBranch(BI, BranchBB, OpsToRename); + } else if (auto *SI = dyn_cast<SwitchInst>(BranchBB->getTerminator())) { + processSwitch(SI, BranchBB, OpsToRename); + } + } + for (auto &Assume : AC.assumptions()) { + if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume)) + processAssume(II, II->getParent(), OpsToRename); + } + // Now rename all our operations. + renameUses(OpsToRename); +} + +// Given the renaming stack, make all the operands currently on the stack real +// by inserting them into the IR. Return the last operation's value. +Value *PredicateInfo::materializeStack(unsigned int &Counter, + ValueDFSStack &RenameStack, + Value *OrigOp) { + // Find the first thing we have to materialize + auto RevIter = RenameStack.rbegin(); + for (; RevIter != RenameStack.rend(); ++RevIter) + if (RevIter->Def) + break; + + size_t Start = RevIter - RenameStack.rbegin(); + // The maximum number of things we should be trying to materialize at once + // right now is 4, depending on if we had an assume, a branch, and both used + // and of conditions. + for (auto RenameIter = RenameStack.end() - Start; + RenameIter != RenameStack.end(); ++RenameIter) { + auto *Op = + RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def; + ValueDFS &Result = *RenameIter; + auto *ValInfo = Result.PInfo; + // For edge predicates, we can just place the operand in the block before + // the terminator. For assume, we have to place it right before the assume + // to ensure we dominate all of our uses. Always insert right before the + // relevant instruction (terminator, assume), so that we insert in proper + // order in the case of multiple predicateinfo in the same block. + if (isa<PredicateWithEdge>(ValInfo)) { + IRBuilder<> B(getBranchTerminator(ValInfo)); + Function *IF = Intrinsic::getDeclaration( + F.getParent(), Intrinsic::ssa_copy, Op->getType()); + CallInst *PIC = + B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); + PredicateMap.insert({PIC, ValInfo}); + Result.Def = PIC; + } else { + auto *PAssume = dyn_cast<PredicateAssume>(ValInfo); + assert(PAssume && + "Should not have gotten here without it being an assume"); + IRBuilder<> B(PAssume->AssumeInst); + Function *IF = Intrinsic::getDeclaration( + F.getParent(), Intrinsic::ssa_copy, Op->getType()); + CallInst *PIC = B.CreateCall(IF, Op); + PredicateMap.insert({PIC, ValInfo}); + Result.Def = PIC; + } + } + return RenameStack.back().Def; +} + +// Instead of the standard SSA renaming algorithm, which is O(Number of +// instructions), and walks the entire dominator tree, we walk only the defs + +// uses. The standard SSA renaming algorithm does not really rely on the +// dominator tree except to order the stack push/pops of the renaming stacks, so +// that defs end up getting pushed before hitting the correct uses. This does +// not require the dominator tree, only the *order* of the dominator tree. The +// complete and correct ordering of the defs and uses, in dominator tree is +// contained in the DFS numbering of the dominator tree. So we sort the defs and +// uses into the DFS ordering, and then just use the renaming stack as per +// normal, pushing when we hit a def (which is a predicateinfo instruction), +// popping when we are out of the dfs scope for that def, and replacing any uses +// with top of stack if it exists. In order to handle liveness without +// propagating liveness info, we don't actually insert the predicateinfo +// instruction def until we see a use that it would dominate. Once we see such +// a use, we materialize the predicateinfo instruction in the right place and +// use it. +// +// TODO: Use this algorithm to perform fast single-variable renaming in +// promotememtoreg and memoryssa. +void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) { + // Sort OpsToRename since we are going to iterate it. + SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end()); + auto Comparator = [&](const Value *A, const Value *B) { + return valueComesBefore(OI, A, B); + }; + std::sort(OpsToRename.begin(), OpsToRename.end(), Comparator); + ValueDFS_Compare Compare(OI); + // Compute liveness, and rename in O(uses) per Op. + for (auto *Op : OpsToRename) { + unsigned Counter = 0; + SmallVector<ValueDFS, 16> OrderedUses; + const auto &ValueInfo = getValueInfo(Op); + // Insert the possible copies into the def/use list. + // They will become real copies if we find a real use for them, and never + // created otherwise. + for (auto &PossibleCopy : ValueInfo.Infos) { + ValueDFS VD; + // Determine where we are going to place the copy by the copy type. + // The predicate info for branches always come first, they will get + // materialized in the split block at the top of the block. + // The predicate info for assumes will be somewhere in the middle, + // it will get materialized in front of the assume. + if (const auto *PAssume = dyn_cast<PredicateAssume>(PossibleCopy)) { + VD.LocalNum = LN_Middle; + DomTreeNode *DomNode = DT.getNode(PAssume->AssumeInst->getParent()); + if (!DomNode) + continue; + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.PInfo = PossibleCopy; + OrderedUses.push_back(VD); + } else if (isa<PredicateWithEdge>(PossibleCopy)) { + // If we can only do phi uses, we treat it like it's in the branch + // block, and handle it specially. We know that it goes last, and only + // dominate phi uses. + auto BlockEdge = getBlockEdge(PossibleCopy); + if (EdgeUsesOnly.count(BlockEdge)) { + VD.LocalNum = LN_Last; + auto *DomNode = DT.getNode(BlockEdge.first); + if (DomNode) { + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.PInfo = PossibleCopy; + VD.EdgeOnly = true; + OrderedUses.push_back(VD); + } + } else { + // Otherwise, we are in the split block (even though we perform + // insertion in the branch block). + // Insert a possible copy at the split block and before the branch. + VD.LocalNum = LN_First; + auto *DomNode = DT.getNode(BlockEdge.second); + if (DomNode) { + VD.DFSIn = DomNode->getDFSNumIn(); + VD.DFSOut = DomNode->getDFSNumOut(); + VD.PInfo = PossibleCopy; + OrderedUses.push_back(VD); + } + } + } + } + + convertUsesToDFSOrdered(Op, OrderedUses); + std::sort(OrderedUses.begin(), OrderedUses.end(), Compare); + SmallVector<ValueDFS, 8> RenameStack; + // For each use, sorted into dfs order, push values and replaces uses with + // top of stack, which will represent the reaching def. + for (auto &VD : OrderedUses) { + // We currently do not materialize copy over copy, but we should decide if + // we want to. + bool PossibleCopy = VD.PInfo != nullptr; + if (RenameStack.empty()) { + DEBUG(dbgs() << "Rename Stack is empty\n"); + } else { + DEBUG(dbgs() << "Rename Stack Top DFS numbers are (" + << RenameStack.back().DFSIn << "," + << RenameStack.back().DFSOut << ")\n"); + } + + DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << "," + << VD.DFSOut << ")\n"); + + bool ShouldPush = (VD.Def || PossibleCopy); + bool OutOfScope = !stackIsInScope(RenameStack, VD); + if (OutOfScope || ShouldPush) { + // Sync to our current scope. + popStackUntilDFSScope(RenameStack, VD); + if (ShouldPush) { + RenameStack.push_back(VD); + } + } + // If we get to this point, and the stack is empty we must have a use + // with no renaming needed, just skip it. + if (RenameStack.empty()) + continue; + // Skip values, only want to rename the uses + if (VD.Def || PossibleCopy) + continue; + if (!DebugCounter::shouldExecute(RenameCounter)) { + DEBUG(dbgs() << "Skipping execution due to debug counter\n"); + continue; + } + ValueDFS &Result = RenameStack.back(); + + // If the possible copy dominates something, materialize our stack up to + // this point. This ensures every comparison that affects our operation + // ends up with predicateinfo. + if (!Result.Def) + Result.Def = materializeStack(Counter, RenameStack, Op); + + DEBUG(dbgs() << "Found replacement " << *Result.Def << " for " + << *VD.U->get() << " in " << *(VD.U->getUser()) << "\n"); + assert(DT.dominates(cast<Instruction>(Result.Def), *VD.U) && + "Predicateinfo def should have dominated this use"); + VD.U->set(Result.Def); + } + } +} + +PredicateInfo::ValueInfo &PredicateInfo::getOrCreateValueInfo(Value *Operand) { + auto OIN = ValueInfoNums.find(Operand); + if (OIN == ValueInfoNums.end()) { + // This will grow it + ValueInfos.resize(ValueInfos.size() + 1); + // This will use the new size and give us a 0 based number of the info + auto InsertResult = ValueInfoNums.insert({Operand, ValueInfos.size() - 1}); + assert(InsertResult.second && "Value info number already existed?"); + return ValueInfos[InsertResult.first->second]; + } + return ValueInfos[OIN->second]; +} + +const PredicateInfo::ValueInfo & +PredicateInfo::getValueInfo(Value *Operand) const { + auto OINI = ValueInfoNums.lookup(Operand); + assert(OINI != 0 && "Operand was not really in the Value Info Numbers"); + assert(OINI < ValueInfos.size() && + "Value Info Number greater than size of Value Info Table"); + return ValueInfos[OINI]; +} + +PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT, + AssumptionCache &AC) + : F(F), DT(DT), AC(AC), OI(&DT) { + // Push an empty operand info so that we can detect 0 as not finding one + ValueInfos.resize(1); + buildPredicateInfo(); +} + +PredicateInfo::~PredicateInfo() {} + +void PredicateInfo::verifyPredicateInfo() const {} + +char PredicateInfoPrinterLegacyPass::ID = 0; + +PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass() + : FunctionPass(ID) { + initializePredicateInfoPrinterLegacyPassPass( + *PassRegistry::getPassRegistry()); +} + +void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); +} + +bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + auto PredInfo = make_unique<PredicateInfo>(F, DT, AC); + PredInfo->print(dbgs()); + if (VerifyPredicateInfo) + PredInfo->verifyPredicateInfo(); + return false; +} + +PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + OS << "PredicateInfo for function: " << F.getName() << "\n"; + make_unique<PredicateInfo>(F, DT, AC)->print(OS); + + return PreservedAnalyses::all(); +} + +/// \brief An assembly annotator class to print PredicateInfo information in +/// comments. +class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter { + friend class PredicateInfo; + const PredicateInfo *PredInfo; + +public: + PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {} + + virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) {} + + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) { + if (const auto *PI = PredInfo->getPredicateInfoFor(I)) { + OS << "; Has predicate info\n"; + if (const auto *PB = dyn_cast<PredicateBranch>(PI)) { + OS << "; branch predicate info { TrueEdge: " << PB->TrueEdge + << " Comparison:" << *PB->Condition << " Edge: ["; + PB->From->printAsOperand(OS); + OS << ","; + PB->To->printAsOperand(OS); + OS << "] }\n"; + } else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) { + OS << "; switch predicate info { CaseValue: " << *PS->CaseValue + << " Switch:" << *PS->Switch << " Edge: ["; + PS->From->printAsOperand(OS); + OS << ","; + PS->To->printAsOperand(OS); + OS << "] }\n"; + } else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) { + OS << "; assume predicate info {" + << " Comparison:" << *PA->Condition << " }\n"; + } + } + } +}; + +void PredicateInfo::print(raw_ostream &OS) const { + PredicateInfoAnnotatedWriter Writer(this); + F.print(OS, &Writer); +} + +void PredicateInfo::dump() const { + PredicateInfoAnnotatedWriter Writer(this); + F.print(dbgs(), &Writer); +} + +PreservedAnalyses PredicateInfoVerifierPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo(); + + return PreservedAnalyses::all(); +} +} diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 35faa6f..cdba982 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -23,6 +22,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/ValueTracking.h" @@ -38,6 +38,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <algorithm> using namespace llvm; @@ -224,13 +225,10 @@ struct PromoteMem2Reg { std::vector<AllocaInst *> Allocas; DominatorTree &DT; DIBuilder DIB; - - /// An AliasSetTracker object to update. If null, don't update it. - AliasSetTracker *AST; - /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. AssumptionCache *AC; + const SimplifyQuery SQ; /// Reverse mapping of Allocas. DenseMap<AllocaInst *, unsigned> AllocaLookup; @@ -269,10 +267,11 @@ struct PromoteMem2Reg { public: PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, - AliasSetTracker *AST, AssumptionCache *AC) + AssumptionCache *AC) : Allocas(Allocas.begin(), Allocas.end()), DT(DT), DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), - AST(AST), AC(AC) {} + AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), + nullptr, &DT, AC) {} void run(); @@ -301,6 +300,18 @@ private: } // end of anonymous namespace +/// Given a LoadInst LI this adds assume(LI != null) after it. +static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { + Function *AssumeIntrinsic = + Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume); + ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI, + Constant::getNullValue(LI->getType())); + LoadNotNull->insertAfter(LI); + CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull}); + CI->insertAfter(LoadNotNull); + AC->registerAssumption(CI); +} + static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. @@ -334,9 +345,8 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { /// and thus must be phi-ed with undef. We fall back to the standard alloca /// promotion algorithm in that case. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, - LargeBlockInfo &LBI, - DominatorTree &DT, - AliasSetTracker *AST) { + LargeBlockInfo &LBI, DominatorTree &DT, + AssumptionCache *AC) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); BasicBlock *StoreBB = OnlyStore->getParent(); @@ -387,9 +397,15 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // code. if (ReplVal == LI) ReplVal = UndefValue::get(LI->getType()); + + // If the load was marked as nonnull we don't want to lose + // that information when we erase this Load. So we preserve + // it with an assume. + if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && + !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) + addAssumeNonNull(AC, LI); + LI->replaceAllUsesWith(ReplVal); - if (AST && LI->getType()->isPointerTy()) - AST->deleteValue(LI); LI->eraseFromParent(); LBI.deleteValue(LI); } @@ -410,8 +426,6 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); - if (AST) - AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); return true; @@ -435,7 +449,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, /// } static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, - AliasSetTracker *AST) { + DominatorTree &DT, + AssumptionCache *AC) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not // significantly pessimize the small block case. This uses LargeBlockInfo to @@ -476,13 +491,18 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // There is no store before this load, bail out (load may be affected // by the following stores - see main comment). return false; - } - else + } else { // Otherwise, there was a store before this load, the load takes its value. - LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0)); + // Note, if the load was marked as nonnull we don't want to lose that + // information when we erase it. So we preserve it with an assume. + Value *ReplVal = std::prev(I)->second->getOperand(0); + if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && + !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) + addAssumeNonNull(AC, LI); + + LI->replaceAllUsesWith(ReplVal); + } - if (AST && LI->getType()->isPointerTy()) - AST->deleteValue(LI); LI->eraseFromParent(); LBI.deleteValue(LI); } @@ -499,8 +519,6 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LBI.deleteValue(SI); } - if (AST) - AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); @@ -517,8 +535,6 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); - if (AST) - PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; @@ -536,8 +552,6 @@ void PromoteMem2Reg::run() { if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. - if (AST) - AST->deleteValue(AI); AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed @@ -553,7 +567,7 @@ void PromoteMem2Reg::run() { // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { - if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; @@ -564,7 +578,7 @@ void PromoteMem2Reg::run() { // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock && - promoteSingleBlockAlloca(AI, Info, LBI, AST)) { + promoteSingleBlockAlloca(AI, Info, LBI, DT, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; @@ -578,11 +592,6 @@ void PromoteMem2Reg::run() { BBNumbers[&BB] = ID++; } - // If we have an AST to keep updated, remember some pointer value that is - // stored into the alloca. - if (AST) - PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; - // Remember the dbg.declare intrinsic describing this alloca, if any. if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; @@ -662,13 +671,9 @@ void PromoteMem2Reg::run() { // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); - if (AST) - AST->deleteValue(A); A->eraseFromParent(); } - const DataLayout &DL = F.getParent()->getDataLayout(); - // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) @@ -693,9 +698,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) { - if (AST && PN->getType()->isPointerTy()) - AST->deleteValue(PN); + if (Value *V = SimplifyInstruction(PN, SQ)) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); @@ -863,10 +866,6 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, &BB->front()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; - - if (AST && PN->getType()->isPointerTy()) - AST->copyValue(PointerAllocaValues[AllocaNo], PN); - return true; } @@ -940,10 +939,15 @@ NextIteration: Value *V = IncomingVals[AI->second]; + // If the load was marked as nonnull we don't want to lose + // that information when we erase this Load. So we preserve + // it with an assume. + if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && + !llvm::isKnownNonNullAt(V, LI, &DT)) + addAssumeNonNull(AC, LI); + // Anything using the load now uses the current value. LI->replaceAllUsesWith(V); - if (AST && LI->getType()->isPointerTy()) - AST->deleteValue(LI); BB->getInstList().erase(LI); } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { // Delete this instruction and mark the name as the current holder of the @@ -987,10 +991,10 @@ NextIteration: } void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, - AliasSetTracker *AST, AssumptionCache *AC) { + AssumptionCache *AC) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, AST, AC).run(); + PromoteMem2Reg(Allocas, DT, AC).run(); } diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index 8e93ee7..6ccf54e 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -13,18 +13,27 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" +#include <cassert> +#include <utility> using namespace llvm; @@ -36,7 +45,7 @@ static AvailableValsTy &getAvailableVals(void *AV) { } SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) - : AV(nullptr), ProtoType(nullptr), ProtoName(), InsertedPHIs(NewPHI) {} + : InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete static_cast<AvailableValsTy*>(AV); @@ -205,6 +214,7 @@ void SSAUpdater::RewriteUseAfterInsertions(Use &U) { } namespace llvm { + template<> class SSAUpdaterTraits<SSAUpdater> { public: @@ -230,6 +240,7 @@ public: PHI_iterator &operator++() { ++idx; return *this; } bool operator==(const PHI_iterator& x) const { return idx == x.idx; } bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + Value *getIncomingValue() { return PHI->getIncomingValue(idx); } BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } }; @@ -303,7 +314,7 @@ public: } }; -} // End llvm namespace +} // end namespace llvm /// Check to see if AvailableVals has an entry for the specified BB and if so, /// return it. If not, construct SSA form by first calculating the required @@ -337,14 +348,12 @@ LoadAndStorePromoter(ArrayRef<const Instruction*> Insts, SSA.Initialize(SomeVal->getType(), BaseName); } - void LoadAndStorePromoter:: run(const SmallVectorImpl<Instruction*> &Insts) const { - // First step: bucket up uses of the alloca by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. - DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock; + DenseMap<BasicBlock*, TinyPtrVector<Instruction*>> UsesByBlock; for (Instruction *User : Insts) UsesByBlock[User->getParent()].push_back(User); diff --git a/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp b/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp index 9afd175..8c23957 100644 --- a/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SanitizerStats.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SanitizerStats.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7b0bddb..8784b97 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -15,21 +15,22 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -54,11 +55,11 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -169,6 +170,8 @@ class SimplifyCFGOpt { unsigned BonusInstThreshold; AssumptionCache *AC; SmallPtrSetImpl<BasicBlock *> *LoopHeaders; + // See comments in SimplifyCFGOpt::SimplifySwitch. + bool LateSimplifyCFG; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases( TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases); @@ -192,9 +195,10 @@ class SimplifyCFGOpt { public: SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, unsigned BonusInstThreshold, AssumptionCache *AC, - SmallPtrSetImpl<BasicBlock *> *LoopHeaders) + SmallPtrSetImpl<BasicBlock *> *LoopHeaders, + bool LateSimplifyCFG) : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC), - LoopHeaders(LoopHeaders) {} + LoopHeaders(LoopHeaders), LateSimplifyCFG(LateSimplifyCFG) {} bool run(BasicBlock *BB); }; @@ -591,7 +595,7 @@ private: Span = Span.inverse(); // If there are a ton of values, we don't want to make a ginormous switch. - if (Span.getSetSize().ugt(8) || Span.isEmptySet()) { + if (Span.isSizeLargerThan(8) || Span.isEmptySet()) { return false; } @@ -710,10 +714,9 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases( TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases) { if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Cases.reserve(SI->getNumCases()); - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; - ++i) - Cases.push_back( - ValueEqualityComparisonCase(i.getCaseValue(), i.getCaseSuccessor())); + for (auto Case : SI->cases()) + Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(), + Case.getCaseSuccessor())); return SI->getDefaultDest(); } @@ -846,12 +849,12 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( } for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { --i; - if (DeadCases.count(i.getCaseValue())) { + if (DeadCases.count(i->getCaseValue())) { if (HasWeight) { - std::swap(Weights[i.getCaseIndex() + 1], Weights.back()); + std::swap(Weights[i->getCaseIndex() + 1], Weights.back()); Weights.pop_back(); } - i.getCaseSuccessor()->removePredecessor(TI->getParent()); + i->getCaseSuccessor()->removePredecessor(TI->getParent()); SI->removeCase(i); } } @@ -996,8 +999,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, SmallSetVector<BasicBlock*, 4> FailBlocks; if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) { for (auto *Succ : FailBlocks) { - std::vector<BasicBlock*> Blocks = { TI->getParent() }; - if (!SplitBlockPredecessors(Succ, Blocks, ".fold.split")) + if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split")) return false; } } @@ -1280,7 +1282,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, if (!isa<CallInst>(I1)) I1->setDebugLoc( DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc())); - + I2->eraseFromParent(); Changed = true; @@ -1373,53 +1375,6 @@ HoistTerminator: return true; } -// Is it legal to place a variable in operand \c OpIdx of \c I? -// FIXME: This should be promoted to Instruction. -static bool canReplaceOperandWithVariable(const Instruction *I, - unsigned OpIdx) { - // We can't have a PHI with a metadata type. - if (I->getOperand(OpIdx)->getType()->isMetadataTy()) - return false; - - // Early exit. - if (!isa<Constant>(I->getOperand(OpIdx))) - return true; - - switch (I->getOpcode()) { - default: - return true; - case Instruction::Call: - case Instruction::Invoke: - // FIXME: many arithmetic intrinsics have no issue taking a - // variable, however it's hard to distingish these from - // specials such as @llvm.frameaddress that require a constant. - if (isa<IntrinsicInst>(I)) - return false; - - // Constant bundle operands may need to retain their constant-ness for - // correctness. - if (ImmutableCallSite(I).isBundleOperand(OpIdx)) - return false; - - return true; - - case Instruction::ShuffleVector: - // Shufflevector masks are constant. - return OpIdx != 2; - case Instruction::ExtractValue: - case Instruction::InsertValue: - // All operands apart from the first are constant. - return OpIdx == 0; - case Instruction::Alloca: - return false; - case Instruction::GetElementPtr: - if (OpIdx == 0) - return true; - gep_type_iterator It = std::next(gep_type_begin(I), OpIdx - 1); - return It.isSequential(); - } -} - // All instructions in Insts belong to different blocks that all unconditionally // branch to a common successor. Analyze each instruction and return true if it // would be possible to sink them into their successor, creating one common @@ -1472,29 +1427,28 @@ static bool canSinkInstructions( return false; } + // Because SROA can't handle speculating stores of selects, try not + // to sink loads or stores of allocas when we'd have to create a PHI for + // the address operand. Also, because it is likely that loads or stores + // of allocas will disappear when Mem2Reg/SROA is run, don't sink them. + // This can cause code churn which can have unintended consequences down + // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244. + // FIXME: This is a workaround for a deficiency in SROA - see + // https://llvm.org/bugs/show_bug.cgi?id=30188 + if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) { + return isa<AllocaInst>(I->getOperand(1)); + })) + return false; + if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) { + return isa<AllocaInst>(I->getOperand(0)); + })) + return false; + for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) { if (I0->getOperand(OI)->getType()->isTokenTy()) // Don't touch any operand of token type. return false; - // Because SROA can't handle speculating stores of selects, try not - // to sink loads or stores of allocas when we'd have to create a PHI for - // the address operand. Also, because it is likely that loads or stores - // of allocas will disappear when Mem2Reg/SROA is run, don't sink them. - // This can cause code churn which can have unintended consequences down - // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244. - // FIXME: This is a workaround for a deficiency in SROA - see - // https://llvm.org/bugs/show_bug.cgi?id=30188 - if (OI == 1 && isa<StoreInst>(I0) && - any_of(Insts, [](const Instruction *I) { - return isa<AllocaInst>(I->getOperand(1)); - })) - return false; - if (OI == 0 && isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) { - return isa<AllocaInst>(I->getOperand(0)); - })) - return false; - auto SameAsI0 = [&I0, OI](const Instruction *I) { assert(I->getNumOperands() == I0->getNumOperands()); return I->getOperand(OI) == I0->getOperand(OI); @@ -1546,7 +1500,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { })) return false; } - + // We don't need to do any more checking here; canSinkLastInstruction should // have done it all for us. SmallVector<Value*, 4> NewOperands; @@ -1653,7 +1607,7 @@ namespace { bool isValid() const { return !Fail; } - + void operator -- () { if (Fail) return; @@ -1699,7 +1653,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // / \ // [f(1)] [if] // | | \ - // | | \ + // | | | // | [f(2)]| // \ | / // [ end ] @@ -1737,7 +1691,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { } if (UnconditionalPreds.size() < 2) return false; - + bool Changed = false; // We take a two-step approach to tail sinking. First we scan from the end of // each block upwards in lockstep. If the n'th instruction from the end of each @@ -1767,7 +1721,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size(); if ((NumPHIdValues % UnconditionalPreds.size()) != 0) NumPHIInsts++; - + return NumPHIInsts <= 1; }; @@ -1790,7 +1744,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { } if (!Profitable) return false; - + DEBUG(dbgs() << "SINK: Splitting edge\n"); // We have a conditional edge and we're going to sink some instructions. // Insert a new block postdominating all blocks we're going to sink from. @@ -1800,7 +1754,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { return false; Changed = true; } - + // Now that we've analyzed all potential sinking candidates, perform the // actual sink. We iteratively sink the last non-terminator of the source // blocks into their common successor unless doing so would require too @@ -1826,7 +1780,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); break; } - + if (!sinkLastInstruction(UnconditionalPreds)) return Changed; NumSinkCommons++; @@ -2078,6 +2032,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, Value *S = Builder.CreateSelect( BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI); SpeculatedStore->setOperand(0, S); + SpeculatedStore->setDebugLoc( + DILocation::getMergedLocation( + BI->getDebugLoc(), SpeculatedStore->getDebugLoc())); } // Metadata can be dependent on the condition we are hoisting above. @@ -2147,7 +2104,8 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// If we have a conditional branch on a PHI node value that is defined in the /// same block as the branch and if any PHI entries are constants, thread edges /// corresponding to that entry to be branches to their ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { +static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, + AssumptionCache *AC) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -2225,11 +2183,11 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { } // Check for trivial simplification. - if (Value *V = SimplifyInstruction(N, DL)) { + if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) { if (!BBI->use_empty()) TranslateMap[&*BBI] = V; if (!N->mayHaveSideEffects()) { - delete N; // Instruction folded away, don't need actual inst + N->deleteValue(); // Instruction folded away, don't need actual inst N = nullptr; } } else { @@ -2239,6 +2197,11 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { // Insert the new instruction into its new home. if (N) EdgeBB->getInstList().insert(InsertPt, N); + + // Register the new instruction with the assumption cache if necessary. + if (auto *II = dyn_cast_or_null<IntrinsicInst>(N)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); } // Loop over all of the edges from PredBB to BB, changing them to branch @@ -2251,7 +2214,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { } // Recurse, simplifying any other constants. - return FoldCondBranchOnPHI(BI, DL) | true; + return FoldCondBranchOnPHI(BI, DL, AC) | true; } return false; @@ -2296,7 +2259,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); - if (Value *V = SimplifyInstruction(PN, DL)) { + if (Value *V = SimplifyInstruction(PN, {DL, PN})) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); continue; @@ -3045,6 +3008,15 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { BasicBlock *QFB = QBI->getSuccessor(1); BasicBlock *PostBB = QFB->getSingleSuccessor(); + // Make sure we have a good guess for PostBB. If QTB's only successor is + // QFB, then QFB is a better PostBB. + if (QTB->getSingleSuccessor() == QFB) + PostBB = QFB; + + // If we couldn't find a good PostBB, stop. + if (!PostBB) + return false; + bool InvertPCond = false, InvertQCond = false; // Canonicalize fallthroughs to the true branches. if (PFB == QBI->getParent()) { @@ -3069,14 +3041,13 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S; }; - if (!PostBB || - !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || + if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) return false; if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB))) return false; - if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2) + if (!PostBB->hasNUses(2) || !QBI->getParent()->hasNUses(2)) return false; // OK, this is a sequence of two diamonds or triangles. @@ -3433,8 +3404,8 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { // Find the relevant condition and destinations. Value *Condition = Select->getCondition(); - BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor(); - BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor(); + BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor(); + BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor(); // Get weight for TrueBB and FalseBB. uint32_t TrueWeight = 0, FalseWeight = 0; @@ -3444,9 +3415,9 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { GetBranchWeights(SI, Weights); if (Weights.size() == 1 + SI->getNumCases()) { TrueWeight = - (uint32_t)Weights[SI->findCaseValue(TrueVal).getSuccessorIndex()]; + (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()]; FalseWeight = - (uint32_t)Weights[SI->findCaseValue(FalseVal).getSuccessorIndex()]; + (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()]; } } @@ -3526,7 +3497,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - if (Value *V = SimplifyInstruction(ICI, DL)) { + if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); } @@ -3736,7 +3707,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { if (!isa<DbgInfoIntrinsic>(I)) return false; - SmallSet<BasicBlock *, 4> TrivialUnwindBlocks; + SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks; auto *PhiLPInst = cast<PHINode>(RI->getValue()); // Check incoming blocks to see if any of them are trivial. @@ -4148,15 +4119,16 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } } else if (auto *SI = dyn_cast<SwitchInst>(TI)) { - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; - ++i) - if (i.getCaseSuccessor() == BB) { - BB->removePredecessor(SI->getParent()); - SI->removeCase(i); - --i; - --e; - Changed = true; + for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) { + if (i->getCaseSuccessor() != BB) { + ++i; + continue; } + BB->removePredecessor(SI->getParent()); + i = SI->removeCase(i); + e = SI->case_end(); + Changed = true; + } } else if (auto *II = dyn_cast<InvokeInst>(TI)) { if (II->getUnwindDest() == BB) { removeUnwindEdge(TI->getParent()); @@ -4239,18 +4211,18 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { SmallVector<ConstantInt *, 16> CasesA; SmallVector<ConstantInt *, 16> CasesB; - for (SwitchInst::CaseIt I : SI->cases()) { - BasicBlock *Dest = I.getCaseSuccessor(); + for (auto Case : SI->cases()) { + BasicBlock *Dest = Case.getCaseSuccessor(); if (!DestA) DestA = Dest; if (Dest == DestA) { - CasesA.push_back(I.getCaseValue()); + CasesA.push_back(Case.getCaseValue()); continue; } if (!DestB) DestB = Dest; if (Dest == DestB) { - CasesB.push_back(I.getCaseValue()); + CasesB.push_back(Case.getCaseValue()); continue; } return false; // More than two destinations. @@ -4348,8 +4320,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); - APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI); + KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI); // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) @@ -4360,8 +4331,8 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, // Gather dead cases. SmallVector<ConstantInt *, 8> DeadCases; for (auto &Case : SI->cases()) { - APInt CaseVal = Case.getCaseValue()->getValue(); - if ((CaseVal & KnownZero) != 0 || (CaseVal & KnownOne) != KnownOne || + const APInt &CaseVal = Case.getCaseValue()->getValue(); + if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); @@ -4375,7 +4346,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, bool HasDefault = !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); const unsigned NumUnknownBits = - Bits - (KnownZero.Or(KnownOne)).countPopulation(); + Bits - (Known.Zero | Known.One).countPopulation(); assert(NumUnknownBits <= Bits); if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && @@ -4400,17 +4371,17 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, // Remove dead cases from the switch. for (ConstantInt *DeadCase : DeadCases) { - SwitchInst::CaseIt Case = SI->findCaseValue(DeadCase); - assert(Case != SI->case_default() && + SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase); + assert(CaseI != SI->case_default() && "Case was not found. Probably mistake in DeadCases forming."); if (HasWeight) { - std::swap(Weights[Case.getCaseIndex() + 1], Weights.back()); + std::swap(Weights[CaseI->getCaseIndex() + 1], Weights.back()); Weights.pop_back(); } // Prune unused values from PHI nodes. - Case.getCaseSuccessor()->removePredecessor(SI->getParent()); - SI->removeCase(Case); + CaseI->getCaseSuccessor()->removePredecessor(SI->getParent()); + SI->removeCase(CaseI); } if (HasWeight && Weights.size() >= 2) { SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); @@ -4464,10 +4435,9 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { typedef DenseMap<PHINode *, SmallVector<int, 4>> ForwardingNodesMap; ForwardingNodesMap ForwardingNodes; - for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; - ++I) { - ConstantInt *CaseValue = I.getCaseValue(); - BasicBlock *CaseDest = I.getCaseSuccessor(); + for (auto Case : SI->cases()) { + ConstantInt *CaseValue = Case.getCaseValue(); + BasicBlock *CaseDest = Case.getCaseSuccessor(); int PhiIndex; PHINode *PHI = @@ -4811,7 +4781,7 @@ public: SwitchLookupTable( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, - Constant *DefaultValue, const DataLayout &DL); + Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName); /// Build instructions with Builder to retrieve the value at /// the position given by Index in the lookup table. @@ -4865,7 +4835,7 @@ private: SwitchLookupTable::SwitchLookupTable( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, - Constant *DefaultValue, const DataLayout &DL) + Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr), LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) { assert(Values.size() && "Can't build lookup table without values!"); @@ -4927,7 +4897,7 @@ SwitchLookupTable::SwitchLookupTable( LinearMappingPossible = false; break; } - APInt Val = ConstVal->getValue(); + const APInt &Val = ConstVal->getValue(); if (I != 0) { APInt Dist = Val - PrevVal; if (I == 1) { @@ -4973,7 +4943,7 @@ SwitchLookupTable::SwitchLookupTable( Array = new GlobalVariable(M, ArrayTy, /*constant=*/true, GlobalVariable::PrivateLinkage, Initializer, - "switch.table"); + "switch.table." + FuncName); Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); Kind = ArrayKind; } @@ -5202,8 +5172,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // common destination, as well as the min and max case values. assert(SI->case_begin() != SI->case_end()); SwitchInst::CaseIt CI = SI->case_begin(); - ConstantInt *MinCaseVal = CI.getCaseValue(); - ConstantInt *MaxCaseVal = CI.getCaseValue(); + ConstantInt *MinCaseVal = CI->getCaseValue(); + ConstantInt *MaxCaseVal = CI->getCaseValue(); BasicBlock *CommonDest = nullptr; typedef SmallVector<std::pair<ConstantInt *, Constant *>, 4> ResultListTy; @@ -5213,7 +5183,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, SmallVector<PHINode *, 4> PHIs; for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { - ConstantInt *CaseVal = CI.getCaseValue(); + ConstantInt *CaseVal = CI->getCaseValue(); if (CaseVal->getValue().slt(MinCaseVal->getValue())) MinCaseVal = CaseVal; if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) @@ -5222,7 +5192,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Resulting value at phi nodes for this case value. typedef SmallVector<std::pair<PHINode *, Constant *>, 4> ResultsTy; ResultsTy Results; - if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, + if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest, Results, DL, TTI)) return false; @@ -5363,7 +5333,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If using a bitmask, use any value to fill the lookup table holes. Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; - SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL); + StringRef FuncName = SI->getParent()->getParent()->getName(); + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL, + FuncName); Value *Result = Table.BuildLookup(TableIndex, Builder); @@ -5503,11 +5475,10 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, auto *Rot = Builder.CreateOr(LShr, Shl); SI->replaceUsesOfWith(SI->getCondition(), Rot); - for (SwitchInst::CaseIt C = SI->case_begin(), E = SI->case_end(); C != E; - ++C) { - auto *Orig = C.getCaseValue(); + for (auto Case : SI->cases()) { + auto *Orig = Case.getCaseValue(); auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); - C.setValue( + Case.setValue( cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); } return true; @@ -5553,7 +5524,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - if (SwitchToLookupTable(SI, Builder, DL, TTI)) + // The conversion from switch to lookup tables results in difficult + // to analyze code and makes pruning branches much harder. + // This is a problem of the switch expression itself can still be + // restricted as a result of inlining or CVP. There only apply this + // transformation during late steps of the optimisation chain. + if (LateSimplifyCFG && SwitchToLookupTable(SI, Builder, DL, TTI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; if (ReduceSwitchRange(SI, Builder, DL, TTI)) @@ -5680,20 +5656,22 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); + BasicBlock *Succ = BI->getSuccessor(0); if (SinkCommon && SinkThenElseCodeToEnd(BI)) return true; // If the Terminator is the only non-phi instruction, simplify the block. - // if LoopHeader is provided, check if the block is a loop header - // (This is for early invocations before loop simplify and vectorization - // to keep canonical loop forms for nested loops. - // These blocks can be eliminated when the pass is invoked later - // in the back-end.) + // if LoopHeader is provided, check if the block or its successor is a loop + // header (This is for early invocations before loop simplify and + // vectorization to keep canonical loop forms for nested loops. These blocks + // can be eliminated when the pass is invoked later in the back-end.) + bool NeedCanonicalLoop = + !LateSimplifyCFG && + (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - (!LoopHeaders || !LoopHeaders->count(BB)) && - TryToSimplifyUncondBranchFromEmptyBlock(BB)) + !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; // If the only instruction in the block is a seteq/setne comparison @@ -5778,8 +5756,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *Dom = BB->getSinglePredecessor()) { auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator()); if (PBI && PBI->isConditional() && - PBI->getSuccessor(0) != PBI->getSuccessor(1) && - (PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB)) { + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB); bool CondIsFalse = PBI->getSuccessor(1) == BB; Optional<bool> Implication = isImpliedCondition( PBI->getCondition(), BI->getCondition(), DL, CondIsFalse); @@ -5833,7 +5811,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, DL)) + if (FoldCondBranchOnPHI(BI, DL, AC)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // Scan predecessor blocks for conditional branches. @@ -6012,8 +5990,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, unsigned BonusInstThreshold, AssumptionCache *AC, - SmallPtrSetImpl<BasicBlock *> *LoopHeaders) { + SmallPtrSetImpl<BasicBlock *> *LoopHeaders, + bool LateSimplifyCFG) { return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), - BonusInstThreshold, AC, LoopHeaders) + BonusInstThreshold, AC, LoopHeaders, LateSimplifyCFG) .run(BB); } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 6b1d3dc..6d90e6b 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -35,6 +36,9 @@ using namespace llvm; STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC( + NumSimplifiedSDiv, + "Number of IV signed division operations converted to unsigned division"); STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); namespace { @@ -48,13 +52,13 @@ namespace { ScalarEvolution *SE; DominatorTree *DT; - SmallVectorImpl<WeakVH> &DeadInsts; + SmallVectorImpl<WeakTrackingVH> &DeadInsts; bool Changed; public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead) + LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -75,7 +79,9 @@ namespace { void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, bool IsSigned); + bool eliminateSDiv(BinaryOperator *SDiv); bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); + bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand); }; } @@ -150,6 +156,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { unsigned IVOperIdx = 0; ICmpInst::Predicate Pred = ICmp->getPredicate(); + ICmpInst::Predicate OriginalPred = Pred; if (IVOperand != ICmp->getOperand(0)) { // Swapped assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); @@ -258,6 +265,16 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { ICmp->setPredicate(InvariantPredicate); ICmp->setOperand(0, NewLHS); ICmp->setOperand(1, NewRHS); + } else if (ICmpInst::isSigned(OriginalPred) && + SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { + // If we were unable to make anything above, all we can is to canonicalize + // the comparison hoping that it will open the doors for other + // optimizations. If we find out that we compare two non-negative values, + // we turn the instruction's predicate to its unsigned version. Note that + // we cannot rely on Pred here unless we check if we have swapped it. + assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); + DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n'); + ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); } else return; @@ -265,6 +282,33 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { Changed = true; } +bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) { + // Get the SCEVs for the ICmp operands. + auto *N = SE->getSCEV(SDiv->getOperand(0)); + auto *D = SE->getSCEV(SDiv->getOperand(1)); + + // Simplify unnecessary loops away. + const Loop *L = LI->getLoopFor(SDiv->getParent()); + N = SE->getSCEVAtScope(N, L); + D = SE->getSCEVAtScope(D, L); + + // Replace sdiv by udiv if both of the operands are non-negative + if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) { + auto *UDiv = BinaryOperator::Create( + BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1), + SDiv->getName() + ".udiv", SDiv); + UDiv->setIsExact(SDiv->isExact()); + SDiv->replaceAllUsesWith(UDiv); + DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n'); + ++NumSimplifiedSDiv; + Changed = true; + DeadInsts.push_back(SDiv); + return true; + } + + return false; +} + /// SimplifyIVUsers helper for eliminating useless /// remainder operations operating on an induction variable. void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, @@ -321,9 +365,9 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { return false; typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)( - const SCEV *, const SCEV *, SCEV::NoWrapFlags); + const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned); typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)( - const SCEV *, Type *); + const SCEV *, Type *, unsigned); OperationFunctionTy Operation; ExtensionFunctionTy Extension; @@ -375,10 +419,11 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); const SCEV *A = - (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy); + (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), + WideTy, 0); const SCEV *B = - (SE->*Operation)((SE->*Extension)(LHS, WideTy), - (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap); + (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0), + (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0); if (A != B) return false; @@ -426,12 +471,15 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, eliminateIVComparison(ICmp, IVOperand); return true; } - if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { - bool IsSigned = Rem->getOpcode() == Instruction::SRem; - if (IsSigned || Rem->getOpcode() == Instruction::URem) { - eliminateIVRemainder(Rem, IVOperand, IsSigned); + if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSRem = Bin->getOpcode() == Instruction::SRem; + if (IsSRem || Bin->getOpcode() == Instruction::URem) { + eliminateIVRemainder(Bin, IVOperand, IsSRem); return true; } + + if (Bin->getOpcode() == Instruction::SDiv) + return eliminateSDiv(Bin); } if (auto *CI = dyn_cast<CallInst>(UseInst)) @@ -496,8 +544,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, return false; const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *, - SCEV::NoWrapFlags); - + SCEV::NoWrapFlags, unsigned); switch (BO->getOpcode()) { default: return false; @@ -526,7 +573,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy); const SCEV *OpAfterExtend = (SE->*GetExprForBO)( SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy), - SCEV::FlagAnyWrap); + SCEV::FlagAnyWrap, 0u); if (ExtendAfterOp == OpAfterExtend) { BO->setHasNoUnsignedWrap(); SE->forgetValue(BO); @@ -538,7 +585,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy); const SCEV *OpAfterExtend = (SE->*GetExprForBO)( SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy), - SCEV::FlagAnyWrap); + SCEV::FlagAnyWrap, 0u); if (ExtendAfterOp == OpAfterExtend) { BO->setHasNoSignedWrap(); SE->forgetValue(BO); @@ -549,6 +596,35 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, return Changed; } +/// Annotate the Shr in (X << IVOperand) >> C as exact using the +/// information from the IV's range. Returns true if anything changed, false +/// otherwise. +bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, + Value *IVOperand) { + using namespace llvm::PatternMatch; + + if (BO->getOpcode() == Instruction::Shl) { + bool Changed = false; + ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand)); + for (auto *U : BO->users()) { + const APInt *C; + if (match(U, + m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) || + match(U, + m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) { + BinaryOperator *Shr = cast<BinaryOperator>(U); + if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) { + Shr->setIsExact(true); + Changed = true; + } + } + } + return Changed; + } + + return false; +} + /// Add all uses of Def to the current IV's worklist. static void pushIVUsers( Instruction *Def, @@ -641,8 +717,9 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { } if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) { - if (isa<OverflowingBinaryOperator>(BO) && - strengthenOverflowingOperation(BO, IVOperand)) { + if ((isa<OverflowingBinaryOperator>(BO) && + strengthenOverflowingOperation(BO, IVOperand)) || + (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) { // re-queue uses of the now modified binary operator and fall // through to the checks that remain. pushIVUsers(IVOperand, Simplified, SimpleIVUsers); @@ -667,7 +744,7 @@ void IVVisitor::anchor() { } /// Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead, + LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead, IVVisitor *V) { SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead); SIV.simplifyUsers(CurrIV, V); @@ -677,7 +754,7 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, /// Simplify users of induction variables within this /// loop. This does not actually change or add IVs. bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) { + LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) { bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp index 1220490..2ea15f6 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -20,23 +20,23 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "instsimplify" STATISTIC(NumSimplified, "Number of redundant instructions removed"); -static bool runImpl(Function &F, const DominatorTree *DT, - const TargetLibraryInfo *TLI, AssumptionCache *AC) { - const DataLayout &DL = F.getParent()->getDataLayout(); +static bool runImpl(Function &F, const SimplifyQuery &SQ, + OptimizationRemarkEmitter *ORE) { SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -54,7 +54,7 @@ static bool runImpl(Function &F, const DominatorTree *DT, // Don't waste time simplifying unused instructions. if (!I->use_empty()) { - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { + if (Value *V = SimplifyInstruction(I, SQ, ORE)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) Next->insert(cast<Instruction>(U)); @@ -63,7 +63,7 @@ static bool runImpl(Function &F, const DominatorTree *DT, Changed = true; } } - if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) { + if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) { // RecursivelyDeleteTriviallyDeadInstruction can remove more than one // instruction, so simply incrementing the iterator does not work. // When instructions get deleted re-iterate instead. @@ -95,6 +95,7 @@ namespace { AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); } /// runOnFunction - Remove instructions that simplify. @@ -108,7 +109,11 @@ namespace { &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); AssumptionCache *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - return runImpl(F, DT, TLI, AC); + OptimizationRemarkEmitter *ORE = + &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); + const DataLayout &DL = F.getParent()->getDataLayout(); + const SimplifyQuery SQ(DL, TLI, DT, AC); + return runImpl(F, SQ, ORE); } }; } @@ -119,6 +124,7 @@ INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(InstSimplifier, "instsimplify", "Remove redundant instructions", false, false) char &llvm::InstructionSimplifierID = InstSimplifier::ID; @@ -133,9 +139,14 @@ PreservedAnalyses InstSimplifierPass::run(Function &F, auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); - bool Changed = runImpl(F, &DT, &TLI, &AC); + auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); + const DataLayout &DL = F.getParent()->getDataLayout(); + const SimplifyQuery SQ(DL, &TLI, &DT, &AC); + bool Changed = runImpl(F, SQ, &ORE); if (!Changed) return PreservedAnalyses::all(); - // FIXME: This should also 'preserve the CFG'. - return PreservedAnalyses::none(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 8eaeb10..77c0a41 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -37,10 +38,6 @@ using namespace llvm; using namespace PatternMatch; static cl::opt<bool> - ColdErrorCalls("error-reporting-is-cold", cl::init(true), cl::Hidden, - cl::desc("Treat error-reporting calls as cold")); - -static cl::opt<bool> EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden, cl::init(false), cl::desc("Enable unsafe double to float " @@ -51,9 +48,9 @@ static cl::opt<bool> // Helper Functions //===----------------------------------------------------------------------===// -static bool ignoreCallingConv(LibFunc::Func Func) { - return Func == LibFunc::abs || Func == LibFunc::labs || - Func == LibFunc::llabs || Func == LibFunc::strlen; +static bool ignoreCallingConv(LibFunc Func) { + return Func == LibFunc_abs || Func == LibFunc_labs || + Func == LibFunc_llabs || Func == LibFunc_strlen; } static bool isCallingConvCCompatible(CallInst *CI) { @@ -88,20 +85,6 @@ static bool isCallingConvCCompatible(CallInst *CI) { return false; } -/// Return true if it only matters that the value is equal or not-equal to zero. -static bool isOnlyUsedInZeroEqualityComparison(Value *V) { - for (User *U : V->users()) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) - if (IC->isEquality()) - if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - /// Return true if it is only used in equality comparisons with With. static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { for (User *U : V->users()) { @@ -123,8 +106,8 @@ static bool callHasFloatingPointArgument(const CallInst *CI) { /// \brief Check whether the overloaded unary floating point function /// corresponding to \a Ty is available. static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc::Func DoubleFn, LibFunc::Func FloatFn, - LibFunc::Func LongDoubleFn) { + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn) { switch (Ty->getTypeID()) { case Type::FloatTyID: return TLI->has(FloatFn); @@ -429,59 +412,68 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { return Dst; } -Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { +Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, + unsigned CharSize) { Value *Src = CI->getArgOperand(0); // Constant folding: strlen("xyz") -> 3 - if (uint64_t Len = GetStringLength(Src)) + if (uint64_t Len = GetStringLength(Src, CharSize)) return ConstantInt::get(CI->getType(), Len - 1); // If s is a constant pointer pointing to a string literal, we can fold - // strlen(s + x) to strlen(s) - x, when x is known to be in the range + // strlen(s + x) to strlen(s) - x, when x is known to be in the range // [0, strlen(s)] or the string has a single null terminator '\0' at the end. - // We only try to simplify strlen when the pointer s points to an array + // We only try to simplify strlen when the pointer s points to an array // of i8. Otherwise, we would need to scale the offset x before doing the - // subtraction. This will make the optimization more complex, and it's not - // very useful because calling strlen for a pointer of other types is + // subtraction. This will make the optimization more complex, and it's not + // very useful because calling strlen for a pointer of other types is // very uncommon. if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) { - if (!isGEPBasedOnPointerToString(GEP)) + if (!isGEPBasedOnPointerToString(GEP, CharSize)) return nullptr; - StringRef Str; - if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) { - size_t NullTermIdx = Str.find('\0'); - - // If the string does not have '\0', leave it to strlen to compute - // its length. - if (NullTermIdx == StringRef::npos) - return nullptr; - + ConstantDataArraySlice Slice; + if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) { + uint64_t NullTermIdx; + if (Slice.Array == nullptr) { + NullTermIdx = 0; + } else { + NullTermIdx = ~((uint64_t)0); + for (uint64_t I = 0, E = Slice.Length; I < E; ++I) { + if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) { + NullTermIdx = I; + break; + } + } + // If the string does not have '\0', leave it to strlen to compute + // its length. + if (NullTermIdx == ~((uint64_t)0)) + return nullptr; + } + Value *Offset = GEP->getOperand(2); - unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI, - nullptr); - KnownZero.flipAllBits(); - size_t ArrSize = + KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr); + Known.Zero.flipAllBits(); + uint64_t ArrSize = cast<ArrayType>(GEP->getSourceElementType())->getNumElements(); - // KnownZero's bits are flipped, so zeros in KnownZero now represent - // bits known to be zeros in Offset, and ones in KnowZero represent + // KnownZero's bits are flipped, so zeros in KnownZero now represent + // bits known to be zeros in Offset, and ones in KnowZero represent // bits unknown in Offset. Therefore, Offset is known to be in range - // [0, NullTermIdx] when the flipped KnownZero is non-negative and + // [0, NullTermIdx] when the flipped KnownZero is non-negative and // unsigned-less-than NullTermIdx. // - // If Offset is not provably in the range [0, NullTermIdx], we can still - // optimize if we can prove that the program has undefined behavior when - // Offset is outside that range. That is the case when GEP->getOperand(0) + // If Offset is not provably in the range [0, NullTermIdx], we can still + // optimize if we can prove that the program has undefined behavior when + // Offset is outside that range. That is the case when GEP->getOperand(0) // is a pointer to an object whose memory extent is NullTermIdx+1. - if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) || + if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) || (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) && - NullTermIdx == ArrSize - 1)) - return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), + NullTermIdx == ArrSize - 1)) { + Offset = B.CreateSExtOrTrunc(Offset, CI->getType()); + return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), Offset); + } } return nullptr; @@ -489,8 +481,8 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { // strlen(x?"foo":"bars") --> x ? 3 : 4 if (SelectInst *SI = dyn_cast<SelectInst>(Src)) { - uint64_t LenTrue = GetStringLength(SI->getTrueValue()); - uint64_t LenFalse = GetStringLength(SI->getFalseValue()); + uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize); + uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize); if (LenTrue && LenFalse) { Function *Caller = CI->getParent()->getParent(); emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller, @@ -510,6 +502,17 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { return nullptr; } +Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { + return optimizeStringLength(CI, B, 8); +} + +Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) { + Module &M = *CI->getParent()->getParent()->getParent(); + unsigned WCharSize = TLI->getWCharSize(M) * 8; + + return optimizeStringLength(CI, B, WCharSize); +} + Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -542,7 +545,7 @@ Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) { if (isa<ConstantPointerNull>(EndPtr)) { // With a null EndPtr, this function won't capture the main argument. // It would be readonly too, except that it still may write to errno. - CI->addAttribute(1, Attribute::NoCapture); + CI->addParamAttr(0, Attribute::NoCapture); } return nullptr; @@ -653,7 +656,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); // memchr(x, y, 0) -> null - if (LenC && LenC->isNullValue()) + if (LenC && LenC->isZero()) return Constant::getNullValue(CI->getType()); // From now on we need at least constant length and string. @@ -735,8 +738,8 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); if (!LenC) return nullptr; - uint64_t Len = LenC->getZExtValue(); + uint64_t Len = LenC->getZExtValue(); if (Len == 0) // memcmp(s1,s2,0) -> 0 return Constant::getNullValue(CI->getType()); @@ -809,9 +812,9 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { // TODO: Does this belong in BuildLibCalls or should all of those similar // functions be moved here? -static Value *emitCalloc(Value *Num, Value *Size, const AttributeSet &Attrs, +static Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs, IRBuilder<> &B, const TargetLibraryInfo &TLI) { - LibFunc::Func Func; + LibFunc Func; if (!TLI.getLibFunc("calloc", Func) || !TLI.has(Func)) return nullptr; @@ -819,7 +822,7 @@ static Value *emitCalloc(Value *Num, Value *Size, const AttributeSet &Attrs, const DataLayout &DL = M->getDataLayout(); IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext())); Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(), - PtrType, PtrType, nullptr); + PtrType, PtrType); CallInst *CI = B.CreateCall(Calloc, { Num, Size }, "calloc"); if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts())) @@ -846,9 +849,12 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B, // Is the inner call really malloc()? Function *InnerCallee = Malloc->getCalledFunction(); - LibFunc::Func Func; + if (!InnerCallee) + return nullptr; + + LibFunc Func; if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) || - Func != LibFunc::malloc) + Func != LibFunc_malloc) return nullptr; // The memset must cover the same number of bytes that are malloc'd. @@ -930,6 +936,24 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, if (V == nullptr) return nullptr; + // If call isn't an intrinsic, check that it isn't within a function with the + // same name as the float version of this call. + // + // e.g. inline float expf(float val) { return (float) exp((double) val); } + // + // A similar such definition exists in the MinGW-w64 math.h header file which + // when compiled with -O2 -ffast-math causes the generation of infinite loops + // where expf is called. + if (!Callee->isIntrinsic()) { + const Function *F = CI->getFunction(); + StringRef FName = F->getName(); + StringRef CalleeName = Callee->getName(); + if ((FName.size() == (CalleeName.size() + 1)) && + (FName.back() == 'f') && + FName.startswith(CalleeName)) + return nullptr; + } + // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); @@ -948,6 +972,20 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, return B.CreateFPExt(V, B.getDoubleTy()); } +// Replace a libcall \p CI with a call to intrinsic \p IID +static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) { + // Propagate fast-math flags from the existing call to the new call. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + Module *M = CI->getModule(); + Value *V = CI->getArgOperand(0); + Function *F = Intrinsic::getDeclaration(M, IID, CI->getType()); + CallInst *NewCall = B.CreateCall(F, V); + NewCall->takeName(CI); + return NewCall; +} + /// Shrink double -> float for binary functions like 'fmin/fmax'. static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); @@ -1041,9 +1079,9 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { // pow(10.0, x) -> exp10(x) if (Op1C->isExactlyValue(10.0) && - hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f, - LibFunc::exp10l)) - return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B, + hasUnaryFloatFn(TLI, Op1->getType(), LibFunc_exp10, LibFunc_exp10f, + LibFunc_exp10l)) + return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc_exp10), B, Callee->getAttributes()); } @@ -1055,10 +1093,10 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). auto *OpC = dyn_cast<CallInst>(Op1); if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) { - LibFunc::Func Func; + LibFunc Func; Function *OpCCallee = OpC->getCalledFunction(); if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && - TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) { + TLI->has(Func) && (Func == LibFunc_exp || Func == LibFunc_exp2)) { IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"); @@ -1075,17 +1113,20 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return ConstantFP::get(CI->getType(), 1.0); if (Op2C->isExactlyValue(-0.5) && - hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf, - LibFunc::sqrtl)) { + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf, + LibFunc_sqrtl)) { // If -ffast-math: // pow(x, -0.5) -> 1.0 / sqrt(x) if (CI->hasUnsafeAlgebra()) { IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); - // Here we cannot lower to an intrinsic because C99 sqrt() and llvm.sqrt - // are not guaranteed to have the same semantics. - Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, + // TODO: If the pow call is an intrinsic, we should lower to the sqrt + // intrinsic, so we match errno semantics. We also should check that the + // target can in fact lower the sqrt intrinsic -- we currently have no way + // to ask this question other than asking whether the target has a sqrt + // libcall, which is a sufficient but not necessary condition. + Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B, Callee->getAttributes()); return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Sqrt, "sqrtrecip"); @@ -1093,19 +1134,17 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { } if (Op2C->isExactlyValue(0.5) && - hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf, - LibFunc::sqrtl) && - hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf, - LibFunc::fabsl)) { + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf, + LibFunc_sqrtl)) { // In -ffast-math, pow(x, 0.5) -> sqrt(x). if (CI->hasUnsafeAlgebra()) { IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); - // Unlike other math intrinsics, sqrt has differerent semantics - // from the libc function. See LangRef for details. - return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, + // TODO: As above, we should lower to the sqrt intrinsic if the pow is an + // intrinsic, to match errno semantics. + return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B, Callee->getAttributes()); } @@ -1115,9 +1154,16 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { // TODO: In finite-only mode, this could be just fabs(sqrt(x)). Value *Inf = ConstantFP::getInfinity(CI->getType()); Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); + + // TODO: As above, we should lower to the sqrt intrinsic if the pow is an + // intrinsic, to match errno semantics. Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes()); - Value *FAbs = - emitUnaryFloatFnCall(Sqrt, "fabs", B, Callee->getAttributes()); + + Module *M = Callee->getParent(); + Function *FabsF = Intrinsic::getDeclaration(M, Intrinsic::fabs, + CI->getType()); + Value *FAbs = B.CreateCall(FabsF, Sqrt); + Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); return Sel; @@ -1173,11 +1219,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 - LibFunc::Func LdExp = LibFunc::ldexpl; + LibFunc LdExp = LibFunc_ldexpl; if (Op->getType()->isFloatTy()) - LdExp = LibFunc::ldexpf; + LdExp = LibFunc_ldexpf; else if (Op->getType()->isDoubleTy()) - LdExp = LibFunc::ldexp; + LdExp = LibFunc_ldexp; if (TLI->has(LdExp)) { Value *LdExpArg = nullptr; @@ -1197,7 +1243,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Module *M = CI->getModule(); Value *NewCallee = M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(), - Op->getType(), B.getInt32Ty(), nullptr); + Op->getType(), B.getInt32Ty()); CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg}); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1208,15 +1254,6 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { return Ret; } -Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); - if (Name == "fabs" && hasFloatVersion(Name)) - return optimizeUnaryDoubleFP(CI, B, false); - - return nullptr; -} - Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); // If we can shrink the call to a float function rather than a double @@ -1280,17 +1317,17 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { FMF.setUnsafeAlgebra(); B.setFastMathFlags(FMF); - LibFunc::Func Func; + LibFunc Func; Function *F = OpC->getCalledFunction(); if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && - Func == LibFunc::pow) || F->getIntrinsicID() == Intrinsic::pow)) + Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow)) return B.CreateFMul(OpC->getArgOperand(1), emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B, Callee->getAttributes()), "mul"); // log(exp2(y)) -> y*log(2) if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) && - TLI->has(Func) && Func == LibFunc::exp2) + TLI->has(Func) && Func == LibFunc_exp2) return B.CreateFMul( OpC->getArgOperand(0), emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0), @@ -1302,8 +1339,11 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; - if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || - Callee->getIntrinsicID() == Intrinsic::sqrt)) + // TODO: Once we have a way (other than checking for the existince of the + // libcall) to tell whether our target can lower @llvm.sqrt, relax the + // condition below. + if (TLI->has(LibFunc_sqrtf) && (Callee->getName() == "sqrt" || + Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); if (!CI->hasUnsafeAlgebra()) @@ -1385,12 +1425,12 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { // tan(atan(x)) -> x // tanf(atanf(x)) -> x // tanl(atanl(x)) -> x - LibFunc::Func Func; + LibFunc Func; Function *F = OpC->getCalledFunction(); if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && - ((Func == LibFunc::atan && Callee->getName() == "tan") || - (Func == LibFunc::atanf && Callee->getName() == "tanf") || - (Func == LibFunc::atanl && Callee->getName() == "tanl"))) + ((Func == LibFunc_atan && Callee->getName() == "tan") || + (Func == LibFunc_atanf && Callee->getName() == "tanf") || + (Func == LibFunc_atanl && Callee->getName() == "tanl"))) Ret = OpC->getArgOperand(0); return Ret; } @@ -1418,16 +1458,16 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, // x86_64 can't use {float, float} since that would be returned in both // xmm0 and xmm1, which isn't what a real struct would do. ResTy = T.getArch() == Triple::x86_64 - ? static_cast<Type *>(VectorType::get(ArgTy, 2)) - : static_cast<Type *>(StructType::get(ArgTy, ArgTy, nullptr)); + ? static_cast<Type *>(VectorType::get(ArgTy, 2)) + : static_cast<Type *>(StructType::get(ArgTy, ArgTy)); } else { Name = "__sincospi_stret"; - ResTy = StructType::get(ArgTy, ArgTy, nullptr); + ResTy = StructType::get(ArgTy, ArgTy); } Module *M = OrigCallee->getParent(); Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(), - ResTy, ArgTy, nullptr); + ResTy, ArgTy); if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { // If the argument is an instruction, it must dominate all uses so put our @@ -1508,24 +1548,24 @@ void LibCallSimplifier::classifyArgUse( return; Function *Callee = CI->getCalledFunction(); - LibFunc::Func Func; + LibFunc Func; if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) || !isTrigLibCall(CI)) return; if (IsFloat) { - if (Func == LibFunc::sinpif) + if (Func == LibFunc_sinpif) SinCalls.push_back(CI); - else if (Func == LibFunc::cospif) + else if (Func == LibFunc_cospif) CosCalls.push_back(CI); - else if (Func == LibFunc::sincospif_stret) + else if (Func == LibFunc_sincospif_stret) SinCosCalls.push_back(CI); } else { - if (Func == LibFunc::sinpi) + if (Func == LibFunc_sinpi) SinCalls.push_back(CI); - else if (Func == LibFunc::cospi) + else if (Func == LibFunc_cospi) CosCalls.push_back(CI); - else if (Func == LibFunc::sincospi_stret) + else if (Func == LibFunc_sincospi_stret) SinCosCalls.push_back(CI); } } @@ -1609,14 +1649,14 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B, // Proceedings of PACT'98, Oct. 1998, IEEE if (!CI->hasFnAttr(Attribute::Cold) && isReportingError(Callee, CI, StreamArg)) { - CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold); + CI->addAttribute(AttributeList::FunctionIndex, Attribute::Cold); } return nullptr; } static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) { - if (!ColdErrorCalls || !Callee || !Callee->isDeclaration()) + if (!Callee || !Callee->isDeclaration()) return false; if (StreamArg < 0) @@ -1699,7 +1739,7 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) { // printf(format, ...) -> iprintf(format, ...) if no floating point // arguments. - if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) { + if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); Constant *IPrintFFn = M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); @@ -1780,7 +1820,7 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) { // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating // point arguments. - if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) { + if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); Constant *SIPrintFFn = M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); @@ -1850,7 +1890,7 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) { // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no // floating point arguments. - if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) { + if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) { Module *M = B.GetInsertBlock()->getParent()->getParent(); Constant *FIPrintFFn = M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); @@ -1929,7 +1969,7 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { } bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) { - LibFunc::Func Func; + LibFunc Func; SmallString<20> FloatFuncName = FuncName; FloatFuncName += 'f'; if (TLI->getLibFunc(FloatFuncName, Func)) @@ -1939,7 +1979,7 @@ bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) { Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &Builder) { - LibFunc::Func Func; + LibFunc Func; Function *Callee = CI->getCalledFunction(); // Check for string/memory library functions. if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { @@ -1948,52 +1988,54 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, isCallingConvCCompatible(CI)) && "Optimizing string/memory libcall would change the calling convention"); switch (Func) { - case LibFunc::strcat: + case LibFunc_strcat: return optimizeStrCat(CI, Builder); - case LibFunc::strncat: + case LibFunc_strncat: return optimizeStrNCat(CI, Builder); - case LibFunc::strchr: + case LibFunc_strchr: return optimizeStrChr(CI, Builder); - case LibFunc::strrchr: + case LibFunc_strrchr: return optimizeStrRChr(CI, Builder); - case LibFunc::strcmp: + case LibFunc_strcmp: return optimizeStrCmp(CI, Builder); - case LibFunc::strncmp: + case LibFunc_strncmp: return optimizeStrNCmp(CI, Builder); - case LibFunc::strcpy: + case LibFunc_strcpy: return optimizeStrCpy(CI, Builder); - case LibFunc::stpcpy: + case LibFunc_stpcpy: return optimizeStpCpy(CI, Builder); - case LibFunc::strncpy: + case LibFunc_strncpy: return optimizeStrNCpy(CI, Builder); - case LibFunc::strlen: + case LibFunc_strlen: return optimizeStrLen(CI, Builder); - case LibFunc::strpbrk: + case LibFunc_strpbrk: return optimizeStrPBrk(CI, Builder); - case LibFunc::strtol: - case LibFunc::strtod: - case LibFunc::strtof: - case LibFunc::strtoul: - case LibFunc::strtoll: - case LibFunc::strtold: - case LibFunc::strtoull: + case LibFunc_strtol: + case LibFunc_strtod: + case LibFunc_strtof: + case LibFunc_strtoul: + case LibFunc_strtoll: + case LibFunc_strtold: + case LibFunc_strtoull: return optimizeStrTo(CI, Builder); - case LibFunc::strspn: + case LibFunc_strspn: return optimizeStrSpn(CI, Builder); - case LibFunc::strcspn: + case LibFunc_strcspn: return optimizeStrCSpn(CI, Builder); - case LibFunc::strstr: + case LibFunc_strstr: return optimizeStrStr(CI, Builder); - case LibFunc::memchr: + case LibFunc_memchr: return optimizeMemChr(CI, Builder); - case LibFunc::memcmp: + case LibFunc_memcmp: return optimizeMemCmp(CI, Builder); - case LibFunc::memcpy: + case LibFunc_memcpy: return optimizeMemCpy(CI, Builder); - case LibFunc::memmove: + case LibFunc_memmove: return optimizeMemMove(CI, Builder); - case LibFunc::memset: + case LibFunc_memset: return optimizeMemSet(CI, Builder); + case LibFunc_wcslen: + return optimizeWcslen(CI, Builder); default: break; } @@ -2005,7 +2047,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { if (CI->isNoBuiltin()) return nullptr; - LibFunc::Func Func; + LibFunc Func; Function *Callee = CI->getCalledFunction(); StringRef FuncName = Callee->getName(); @@ -2029,8 +2071,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizePow(CI, Builder); case Intrinsic::exp2: return optimizeExp2(CI, Builder); - case Intrinsic::fabs: - return optimizeFabs(CI, Builder); case Intrinsic::log: return optimizeLog(CI, Builder); case Intrinsic::sqrt: @@ -2067,114 +2107,117 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { if (Value *V = optimizeStringMemoryLibCall(CI, Builder)) return V; switch (Func) { - case LibFunc::cosf: - case LibFunc::cos: - case LibFunc::cosl: + case LibFunc_cosf: + case LibFunc_cos: + case LibFunc_cosl: return optimizeCos(CI, Builder); - case LibFunc::sinpif: - case LibFunc::sinpi: - case LibFunc::cospif: - case LibFunc::cospi: + case LibFunc_sinpif: + case LibFunc_sinpi: + case LibFunc_cospif: + case LibFunc_cospi: return optimizeSinCosPi(CI, Builder); - case LibFunc::powf: - case LibFunc::pow: - case LibFunc::powl: + case LibFunc_powf: + case LibFunc_pow: + case LibFunc_powl: return optimizePow(CI, Builder); - case LibFunc::exp2l: - case LibFunc::exp2: - case LibFunc::exp2f: + case LibFunc_exp2l: + case LibFunc_exp2: + case LibFunc_exp2f: return optimizeExp2(CI, Builder); - case LibFunc::fabsf: - case LibFunc::fabs: - case LibFunc::fabsl: - return optimizeFabs(CI, Builder); - case LibFunc::sqrtf: - case LibFunc::sqrt: - case LibFunc::sqrtl: + case LibFunc_fabsf: + case LibFunc_fabs: + case LibFunc_fabsl: + return replaceUnaryCall(CI, Builder, Intrinsic::fabs); + case LibFunc_sqrtf: + case LibFunc_sqrt: + case LibFunc_sqrtl: return optimizeSqrt(CI, Builder); - case LibFunc::ffs: - case LibFunc::ffsl: - case LibFunc::ffsll: + case LibFunc_ffs: + case LibFunc_ffsl: + case LibFunc_ffsll: return optimizeFFS(CI, Builder); - case LibFunc::fls: - case LibFunc::flsl: - case LibFunc::flsll: + case LibFunc_fls: + case LibFunc_flsl: + case LibFunc_flsll: return optimizeFls(CI, Builder); - case LibFunc::abs: - case LibFunc::labs: - case LibFunc::llabs: + case LibFunc_abs: + case LibFunc_labs: + case LibFunc_llabs: return optimizeAbs(CI, Builder); - case LibFunc::isdigit: + case LibFunc_isdigit: return optimizeIsDigit(CI, Builder); - case LibFunc::isascii: + case LibFunc_isascii: return optimizeIsAscii(CI, Builder); - case LibFunc::toascii: + case LibFunc_toascii: return optimizeToAscii(CI, Builder); - case LibFunc::printf: + case LibFunc_printf: return optimizePrintF(CI, Builder); - case LibFunc::sprintf: + case LibFunc_sprintf: return optimizeSPrintF(CI, Builder); - case LibFunc::fprintf: + case LibFunc_fprintf: return optimizeFPrintF(CI, Builder); - case LibFunc::fwrite: + case LibFunc_fwrite: return optimizeFWrite(CI, Builder); - case LibFunc::fputs: + case LibFunc_fputs: return optimizeFPuts(CI, Builder); - case LibFunc::log: - case LibFunc::log10: - case LibFunc::log1p: - case LibFunc::log2: - case LibFunc::logb: + case LibFunc_log: + case LibFunc_log10: + case LibFunc_log1p: + case LibFunc_log2: + case LibFunc_logb: return optimizeLog(CI, Builder); - case LibFunc::puts: + case LibFunc_puts: return optimizePuts(CI, Builder); - case LibFunc::tan: - case LibFunc::tanf: - case LibFunc::tanl: + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanl: return optimizeTan(CI, Builder); - case LibFunc::perror: + case LibFunc_perror: return optimizeErrorReporting(CI, Builder); - case LibFunc::vfprintf: - case LibFunc::fiprintf: + case LibFunc_vfprintf: + case LibFunc_fiprintf: return optimizeErrorReporting(CI, Builder, 0); - case LibFunc::fputc: + case LibFunc_fputc: return optimizeErrorReporting(CI, Builder, 1); - case LibFunc::ceil: - case LibFunc::floor: - case LibFunc::rint: - case LibFunc::round: - case LibFunc::nearbyint: - case LibFunc::trunc: - if (hasFloatVersion(FuncName)) - return optimizeUnaryDoubleFP(CI, Builder, false); - return nullptr; - case LibFunc::acos: - case LibFunc::acosh: - case LibFunc::asin: - case LibFunc::asinh: - case LibFunc::atan: - case LibFunc::atanh: - case LibFunc::cbrt: - case LibFunc::cosh: - case LibFunc::exp: - case LibFunc::exp10: - case LibFunc::expm1: - case LibFunc::sin: - case LibFunc::sinh: - case LibFunc::tanh: + case LibFunc_ceil: + return replaceUnaryCall(CI, Builder, Intrinsic::ceil); + case LibFunc_floor: + return replaceUnaryCall(CI, Builder, Intrinsic::floor); + case LibFunc_round: + return replaceUnaryCall(CI, Builder, Intrinsic::round); + case LibFunc_nearbyint: + return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint); + case LibFunc_rint: + return replaceUnaryCall(CI, Builder, Intrinsic::rint); + case LibFunc_trunc: + return replaceUnaryCall(CI, Builder, Intrinsic::trunc); + case LibFunc_acos: + case LibFunc_acosh: + case LibFunc_asin: + case LibFunc_asinh: + case LibFunc_atan: + case LibFunc_atanh: + case LibFunc_cbrt: + case LibFunc_cosh: + case LibFunc_exp: + case LibFunc_exp10: + case LibFunc_expm1: + case LibFunc_sin: + case LibFunc_sinh: + case LibFunc_tanh: if (UnsafeFPShrink && hasFloatVersion(FuncName)) return optimizeUnaryDoubleFP(CI, Builder, true); return nullptr; - case LibFunc::copysign: + case LibFunc_copysign: if (hasFloatVersion(FuncName)) return optimizeBinaryDoubleFP(CI, Builder); return nullptr; - case LibFunc::fminf: - case LibFunc::fmin: - case LibFunc::fminl: - case LibFunc::fmaxf: - case LibFunc::fmax: - case LibFunc::fmaxl: + case LibFunc_fminf: + case LibFunc_fmin: + case LibFunc_fminl: + case LibFunc_fmaxf: + case LibFunc_fmax: + case LibFunc_fmaxl: return optimizeFMinFMax(CI, Builder); default: return nullptr; @@ -2211,16 +2254,10 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { // * log(exp10(y)) -> y*log(10) // * log(sqrt(x)) -> 0.5*log(x) // -// lround, lroundf, lroundl: -// * lround(cnst) -> cnst' -// // pow, powf, powl: // * pow(sqrt(x),y) -> pow(x,y*0.5) // * pow(pow(x,y),z)-> pow(x,y*z) // -// round, roundf, roundl: -// * round(cnst) -> cnst' -// // signbit: // * signbit(cnst) -> cnst' // * signbit(nncst) -> 0 (if pstv is a non-negative constant) @@ -2230,10 +2267,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { // * sqrt(Nroot(x)) -> pow(x,1/(2*N)) // * sqrt(pow(x,y)) -> pow(|x|,y*0.5) // -// trunc, truncf, truncl: -// * trunc(cnst) -> cnst' -// -// //===----------------------------------------------------------------------===// // Fortified Library Call Optimizations @@ -2247,7 +2280,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, return true; if (ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) { - if (ObjSizeCI->isAllOnesValue()) + if (ObjSizeCI->isMinusOne()) return true; // If the object size wasn't -1 (unknown), bail out if we were asked to. if (OnlyLowerUnknownSize) @@ -2300,7 +2333,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, IRBuilder<> &B, - LibFunc::Func Func) { + LibFunc Func) { Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); const DataLayout &DL = CI->getModule()->getDataLayout(); @@ -2308,7 +2341,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, *ObjSize = CI->getArgOperand(2); // __stpcpy_chk(x,x,...) -> x+strlen(x) - if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) { + if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) { Value *StrLen = emitStrLen(Src, B, DL, TLI); return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; } @@ -2334,14 +2367,14 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); // If the function was an __stpcpy_chk, and we were able to fold it into // a __memcpy_chk, we still need to return the correct end pointer. - if (Ret && Func == LibFunc::stpcpy_chk) + if (Ret && Func == LibFunc_stpcpy_chk) return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1)); return Ret; } Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, IRBuilder<> &B, - LibFunc::Func Func) { + LibFunc Func) { Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); if (isFortifiedCallFoldable(CI, 3, 2, false)) { @@ -2366,7 +2399,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { // // PR23093. - LibFunc::Func Func; + LibFunc Func; Function *Callee = CI->getCalledFunction(); SmallVector<OperandBundleDef, 2> OpBundles; @@ -2384,17 +2417,17 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { return nullptr; switch (Func) { - case LibFunc::memcpy_chk: + case LibFunc_memcpy_chk: return optimizeMemCpyChk(CI, Builder); - case LibFunc::memmove_chk: + case LibFunc_memmove_chk: return optimizeMemMoveChk(CI, Builder); - case LibFunc::memset_chk: + case LibFunc_memset_chk: return optimizeMemSetChk(CI, Builder); - case LibFunc::stpcpy_chk: - case LibFunc::strcpy_chk: + case LibFunc_stpcpy_chk: + case LibFunc_strcpy_chk: return optimizeStrpCpyChk(CI, Builder, Func); - case LibFunc::stpncpy_chk: - case LibFunc::strncpy_chk: + case LibFunc_stpncpy_chk: + case LibFunc_strncpy_chk: return optimizeStrpNCpyChk(CI, Builder, Func); default: break; diff --git a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp index f3d3fad..49dc15c 100644 --- a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp @@ -20,8 +20,8 @@ #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; diff --git a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp index 66dbf33..cd0378e 100644 --- a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp +++ b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; namespace { diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp index 6d13663..2010755 100644 --- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp @@ -59,9 +59,9 @@ #define DEBUG_TYPE "symbol-rewriter" #include "llvm/Transforms/Utils/SymbolRewriter.h" -#include "llvm/Pass.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryBuffer.h" diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp index 7b9de2e..f6c7d1c 100644 --- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" #include "llvm-c/Initialization.h" +#include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" using namespace llvm; @@ -35,9 +35,8 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeUnifyFunctionExitNodesPass(Registry); initializeInstSimplifierPass(Registry); initializeMetaRenamerPass(Registry); - initializeMemorySSAWrapperPassPass(Registry); - initializeMemorySSAPrinterLegacyPassPass(Registry); initializeStripGCRelocatesPass(Registry); + initializePredicateInfoPrinterLegacyPassPass(Registry); } /// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. diff --git a/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp new file mode 100644 index 0000000..c3feea6 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -0,0 +1,495 @@ +#include "llvm/Transforms/Utils/VNCoercion.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "vncoerce" +namespace llvm { +namespace VNCoercion { + +/// Return true if coerceAvailableValueToLoadType will succeed. +bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, + const DataLayout &DL) { + // If the loaded or stored value is an first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (LoadTy->isStructTy() || LoadTy->isArrayTy() || + StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) + return false; + + // The store has to be at least as big as the load. + if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy)) + return false; + + // Don't coerce non-integral pointers to integers or vice versa. + if (DL.isNonIntegralPointerType(StoredVal->getType()) != + DL.isNonIntegralPointerType(LoadTy)) + return false; + + return true; +} + +template <class T, class HelperClass> +static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, + HelperClass &Helper, + const DataLayout &DL) { + assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) && + "precondition violation - materialization can't fail"); + if (auto *C = dyn_cast<Constant>(StoredVal)) + if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) + StoredVal = FoldedStoredVal; + + // If this is already the right type, just return it. + Type *StoredValTy = StoredVal->getType(); + + uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy); + uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy); + + // If the store and reload are the same size, we can always reuse it. + if (StoredValSize == LoadedValSize) { + // Pointer to Pointer -> use bitcast. + if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) { + StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); + } else { + // Convert source pointers to integers, which can be bitcast. + if (StoredValTy->isPtrOrPtrVectorTy()) { + StoredValTy = DL.getIntPtrType(StoredValTy); + StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); + } + + Type *TypeToCastTo = LoadedTy; + if (TypeToCastTo->isPtrOrPtrVectorTy()) + TypeToCastTo = DL.getIntPtrType(TypeToCastTo); + + if (StoredValTy != TypeToCastTo) + StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo); + + // Cast to pointer if the load needs a pointer type. + if (LoadedTy->isPtrOrPtrVectorTy()) + StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); + } + + if (auto *C = dyn_cast<ConstantExpr>(StoredVal)) + if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) + StoredVal = FoldedStoredVal; + + return StoredVal; + } + // If the loaded value is smaller than the available value, then we can + // extract out a piece from it. If the available value is too small, then we + // can't do anything. + assert(StoredValSize >= LoadedValSize && + "canCoerceMustAliasedValueToLoad fail"); + + // Convert source pointers to integers, which can be manipulated. + if (StoredValTy->isPtrOrPtrVectorTy()) { + StoredValTy = DL.getIntPtrType(StoredValTy); + StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); + } + + // Convert vectors and fp to integer, which can be manipulated. + if (!StoredValTy->isIntegerTy()) { + StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize); + StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy); + } + + // If this is a big-endian system, we need to shift the value down to the low + // bits so that a truncate will work. + if (DL.isBigEndian()) { + uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) - + DL.getTypeStoreSizeInBits(LoadedTy); + StoredVal = Helper.CreateLShr( + StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt)); + } + + // Truncate the integer to the right size now. + Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize); + StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy); + + if (LoadedTy != NewIntTy) { + // If the result is a pointer, inttoptr. + if (LoadedTy->isPtrOrPtrVectorTy()) + StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); + else + // Otherwise, bitcast. + StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); + } + + if (auto *C = dyn_cast<Constant>(StoredVal)) + if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL)) + StoredVal = FoldedStoredVal; + + return StoredVal; +} + +/// If we saw a store of a value to memory, and +/// then a load from a must-aliased pointer of a different type, try to coerce +/// the stored value. LoadedTy is the type of the load we want to replace. +/// IRB is IRBuilder used to insert new instructions. +/// +/// If we can't do it, return null. +Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, + IRBuilder<> &IRB, const DataLayout &DL) { + return coerceAvailableValueToLoadTypeHelper(StoredVal, LoadedTy, IRB, DL); +} + +/// This function is called when we have a memdep query of a load that ends up +/// being a clobbering memory write (store, memset, memcpy, memmove). This +/// means that the write *may* provide bits used by the load but we can't be +/// sure because the pointers don't must-alias. +/// +/// Check this case to see if there is anything more we can do before we give +/// up. This returns -1 if we have to give up, or a byte number in the stored +/// value of the piece that feeds the load. +static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, + Value *WritePtr, + uint64_t WriteSizeInBits, + const DataLayout &DL) { + // If the loaded or stored value is a first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (LoadTy->isStructTy() || LoadTy->isArrayTy()) + return -1; + + int64_t StoreOffset = 0, LoadOffset = 0; + Value *StoreBase = + GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL); + if (StoreBase != LoadBase) + return -1; + + // If the load and store are to the exact same address, they should have been + // a must alias. AA must have gotten confused. + // FIXME: Study to see if/when this happens. One case is forwarding a memset + // to a load from the base of the memset. + + // If the load and store don't overlap at all, the store doesn't provide + // anything to the load. In this case, they really don't alias at all, AA + // must have gotten confused. + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy); + + if ((WriteSizeInBits & 7) | (LoadSize & 7)) + return -1; + uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. + LoadSize /= 8; + + bool isAAFailure = false; + if (StoreOffset < LoadOffset) + isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset; + else + isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset; + + if (isAAFailure) + return -1; + + // If the Load isn't completely contained within the stored bits, we don't + // have all the bits to feed it. We could do something crazy in the future + // (issue a smaller load then merge the bits in) but this seems unlikely to be + // valuable. + if (StoreOffset > LoadOffset || + StoreOffset + StoreSize < LoadOffset + LoadSize) + return -1; + + // Okay, we can do this transformation. Return the number of bytes into the + // store that the load is. + return LoadOffset - StoreOffset; +} + +/// This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. +int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, + StoreInst *DepSI, const DataLayout &DL) { + // Cannot handle reading from store of first-class aggregate yet. + if (DepSI->getValueOperand()->getType()->isStructTy() || + DepSI->getValueOperand()->getType()->isArrayTy()) + return -1; + + Value *StorePtr = DepSI->getPointerOperand(); + uint64_t StoreSize = + DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); + return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, + DL); +} + +/// This function is called when we have a +/// memdep query of a load that ends up being clobbered by another load. See if +/// the other load can feed into the second load. +int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, + const DataLayout &DL) { + // Cannot handle reading from store of first-class aggregate yet. + if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) + return -1; + + Value *DepPtr = DepLI->getPointerOperand(); + uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); + int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); + if (R != -1) + return R; + + // If we have a load/load clobber an DepLI can be widened to cover this load, + // then we should widen it! + int64_t LoadOffs = 0; + const Value *LoadBase = + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + + unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize( + LoadBase, LoadOffs, LoadSize, DepLI); + if (Size == 0) + return -1; + + // Check non-obvious conditions enforced by MDA which we rely on for being + // able to materialize this potentially available value + assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!"); + assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load"); + + return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL); +} + +int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, + MemIntrinsic *MI, const DataLayout &DL) { + // If the mem operation is a non-constant size, we can't handle it. + ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); + if (!SizeCst) + return -1; + uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8; + + // If this is memset, we just need to see if the offset is valid in the size + // of the memset.. + if (MI->getIntrinsicID() == Intrinsic::memset) + return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, DL); + + // If we have a memcpy/memmove, the only case we can handle is if this is a + // copy from constant memory. In that case, we can read directly from the + // constant memory. + MemTransferInst *MTI = cast<MemTransferInst>(MI); + + Constant *Src = dyn_cast<Constant>(MTI->getSource()); + if (!Src) + return -1; + + GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL)); + if (!GV || !GV->isConstant()) + return -1; + + // See if the access is within the bounds of the transfer. + int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, DL); + if (Offset == -1) + return Offset; + + unsigned AS = Src->getType()->getPointerAddressSpace(); + // Otherwise, see if we can constant fold a load from the constant with the + // offset applied as appropriate. + Src = + ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); + Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); + if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL)) + return Offset; + return -1; +} + +template <class T, class HelperClass> +static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, + HelperClass &Helper, + const DataLayout &DL) { + LLVMContext &Ctx = SrcVal->getType()->getContext(); + + // If two pointers are in the same address space, they have the same size, + // so we don't need to do any truncation, etc. This avoids introducing + // ptrtoint instructions for pointers that may be non-integral. + if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() && + cast<PointerType>(SrcVal->getType())->getAddressSpace() == + cast<PointerType>(LoadTy)->getAddressSpace()) { + return SrcVal; + } + + uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; + uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; + // Compute which bits of the stored value are being used by the load. Convert + // to an integer type to start with. + if (SrcVal->getType()->isPtrOrPtrVectorTy()) + SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); + if (!SrcVal->getType()->isIntegerTy()) + SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); + + // Shift the bits to the least significant depending on endianness. + unsigned ShiftAmt; + if (DL.isLittleEndian()) + ShiftAmt = Offset * 8; + else + ShiftAmt = (StoreSize - LoadSize - Offset) * 8; + if (ShiftAmt) + SrcVal = Helper.CreateLShr(SrcVal, + ConstantInt::get(SrcVal->getType(), ShiftAmt)); + + if (LoadSize != StoreSize) + SrcVal = Helper.CreateTruncOrBitCast(SrcVal, + IntegerType::get(Ctx, LoadSize * 8)); + return SrcVal; +} + +/// This function is called when we have a memdep query of a load that ends up +/// being a clobbering store. This means that the store provides bits used by +/// the load but the pointers don't must-alias. Check this case to see if +/// there is anything more we can do before we give up. +Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, + Instruction *InsertPt, const DataLayout &DL) { + + IRBuilder<> Builder(InsertPt); + SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL); + return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, Builder, DL); +} + +Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset, + Type *LoadTy, const DataLayout &DL) { + ConstantFolder F; + SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, F, DL); + return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, F, DL); +} + +/// This function is called when we have a memdep query of a load that ends up +/// being a clobbering load. This means that the load *may* provide bits used +/// by the load but we can't be sure because the pointers don't must-alias. +/// Check this case to see if there is anything more we can do before we give +/// up. +Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, + Instruction *InsertPt, const DataLayout &DL) { + // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to + // widen SrcVal out to a larger load. + unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + if (Offset + LoadSize > SrcValStoreSize) { + assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); + assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); + // If we have a load/load clobber an DepLI can be widened to cover this + // load, then we should widen it to the next power of 2 size big enough! + unsigned NewLoadSize = Offset + LoadSize; + if (!isPowerOf2_32(NewLoadSize)) + NewLoadSize = NextPowerOf2(NewLoadSize); + + Value *PtrVal = SrcVal->getPointerOperand(); + // Insert the new load after the old load. This ensures that subsequent + // memdep queries will find the new load. We can't easily remove the old + // load completely because it is already in the value numbering table. + IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); + Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8); + DestPTy = + PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace()); + Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); + PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); + LoadInst *NewLoad = Builder.CreateLoad(PtrVal); + NewLoad->takeName(SrcVal); + NewLoad->setAlignment(SrcVal->getAlignment()); + + DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); + DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); + + // Replace uses of the original load with the wider load. On a big endian + // system, we need to shift down to get the relevant bits. + Value *RV = NewLoad; + if (DL.isBigEndian()) + RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8); + RV = Builder.CreateTrunc(RV, SrcVal->getType()); + SrcVal->replaceAllUsesWith(RV); + + SrcVal = NewLoad; + } + + return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL); +} + +Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset, + Type *LoadTy, const DataLayout &DL) { + unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + if (Offset + LoadSize > SrcValStoreSize) + return nullptr; + return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL); +} + +template <class T, class HelperClass> +T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset, + Type *LoadTy, HelperClass &Helper, + const DataLayout &DL) { + LLVMContext &Ctx = LoadTy->getContext(); + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8; + + // We know that this method is only called when the mem transfer fully + // provides the bits for the load. + if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { + // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and + // independently of what the offset is. + T *Val = cast<T>(MSI->getValue()); + if (LoadSize != 1) + Val = + Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8)); + T *OneElt = Val; + + // Splat the value out to the right number of bits. + for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) { + // If we can double the number of bytes set, do it. + if (NumBytesSet * 2 <= LoadSize) { + T *ShVal = Helper.CreateShl( + Val, ConstantInt::get(Val->getType(), NumBytesSet * 8)); + Val = Helper.CreateOr(Val, ShVal); + NumBytesSet <<= 1; + continue; + } + + // Otherwise insert one byte at a time. + T *ShVal = Helper.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8)); + Val = Helper.CreateOr(OneElt, ShVal); + ++NumBytesSet; + } + + return coerceAvailableValueToLoadTypeHelper(Val, LoadTy, Helper, DL); + } + + // Otherwise, this is a memcpy/memmove from a constant global. + MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); + Constant *Src = cast<Constant>(MTI->getSource()); + unsigned AS = Src->getType()->getPointerAddressSpace(); + + // Otherwise, see if we can constant fold a load from the constant with the + // offset applied as appropriate. + Src = + ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS)); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); + Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); + return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL); +} + +/// This function is called when we have a +/// memdep query of a load that ends up being a clobbering mem intrinsic. +Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, + Type *LoadTy, Instruction *InsertPt, + const DataLayout &DL) { + IRBuilder<> Builder(InsertPt); + return getMemInstValueForLoadHelper<Value, IRBuilder<>>(SrcInst, Offset, + LoadTy, Builder, DL); +} + +Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, + Type *LoadTy, const DataLayout &DL) { + // The only case analyzeLoadFromClobberingMemInst cannot be converted to a + // constant is when it's a memset of a non-constant. + if (auto *MSI = dyn_cast<MemSetInst>(SrcInst)) + if (!isa<Constant>(MSI->getValue())) + return nullptr; + ConstantFolder F; + return getMemInstValueForLoadHelper<Constant, ConstantFolder>(SrcInst, Offset, + LoadTy, F, DL); +} +} // namespace VNCoercion +} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 0e9baaf..9309729 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -121,6 +121,8 @@ public: void addFlags(RemapFlags Flags); + void remapGlobalObjectMetadata(GlobalObject &GO); + Value *mapValue(const Value *V); void remapInstruction(Instruction *I); void remapFunction(Function &F); @@ -681,6 +683,7 @@ void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) { remapOperands(*ClonedN, [this, &D, &G](Metadata *Old) { if (Optional<Metadata *> MappedOp = getMappedOp(Old)) return *MappedOp; + (void)D; assert(G.Info[Old].ID > D.ID && "Expected a forward reference"); return &G.getFwdReference(*cast<MDNode>(Old)); }); @@ -801,6 +804,7 @@ void Mapper::flush() { switch (E.Kind) { case WorklistEntry::MapGlobalInit: E.Data.GVInit.GV->setInitializer(mapConstant(E.Data.GVInit.Init)); + remapGlobalObjectMetadata(*E.Data.GVInit.GV); break; case WorklistEntry::MapAppendingVar: { unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers; @@ -891,6 +895,14 @@ void Mapper::remapInstruction(Instruction *I) { I->mutateType(TypeMapper->remapType(I->getType())); } +void Mapper::remapGlobalObjectMetadata(GlobalObject &GO) { + SmallVector<std::pair<unsigned, MDNode *>, 8> MDs; + GO.getAllMetadata(MDs); + GO.clearMetadata(); + for (const auto &I : MDs) + GO.addMetadata(I.first, *cast<MDNode>(mapMetadata(I.second))); +} + void Mapper::remapFunction(Function &F) { // Remap the operands. for (Use &Op : F.operands()) @@ -898,11 +910,7 @@ void Mapper::remapFunction(Function &F) { Op = mapValue(Op); // Remap the metadata attachments. - SmallVector<std::pair<unsigned, MDNode *>, 8> MDs; - F.getAllMetadata(MDs); - F.clearMetadata(); - for (const auto &I : MDs) - F.addMetadata(I.first, *cast<MDNode>(mapMetadata(I.second))); + remapGlobalObjectMetadata(F); // Remap the argument types. if (TypeMapper) @@ -941,11 +949,10 @@ void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, Constant *NewV; if (IsOldCtorDtor) { auto *S = cast<ConstantStruct>(V); - auto *E1 = mapValue(S->getOperand(0)); - auto *E2 = mapValue(S->getOperand(1)); - Value *Null = Constant::getNullValue(VoidPtrTy); - NewV = - ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null, nullptr); + auto *E1 = cast<Constant>(mapValue(S->getOperand(0))); + auto *E2 = cast<Constant>(mapValue(S->getOperand(1))); + Constant *Null = Constant::getNullValue(VoidPtrTy); + NewV = ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null); } else { NewV = cast_or_null<Constant>(mapValue(V)); } |