diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 148 |
1 files changed, 97 insertions, 51 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index e623054..31ee920 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -14,16 +14,32 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUIntrinsicInfo.h" #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" - +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/InstVisitor.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include <cassert> +#include <iterator> #define DEBUG_TYPE "amdgpu-codegenprepare" @@ -33,18 +49,15 @@ namespace { class AMDGPUCodeGenPrepare : public FunctionPass, public InstVisitor<AMDGPUCodeGenPrepare, bool> { - const GCNTargetMachine *TM; - const SISubtarget *ST; - DivergenceAnalysis *DA; - Module *Mod; - bool HasUnsafeFPMath; + const SISubtarget *ST = nullptr; + DivergenceAnalysis *DA = nullptr; + Module *Mod = nullptr; + bool HasUnsafeFPMath = false; /// \brief Copies exact/nsw/nuw flags (if any) from binary operation \p I to /// binary operation \p V. /// /// \returns Binary operation \p V. - Value *copyFlags(const BinaryOperator &I, Value *V) const; - /// \returns \p T's base element bit width. unsigned getBaseElementBitWidth(const Type *T) const; @@ -113,13 +126,8 @@ class AMDGPUCodeGenPrepare : public FunctionPass, public: static char ID; - AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) : - FunctionPass(ID), - TM(static_cast<const GCNTargetMachine *>(TM)), - ST(nullptr), - DA(nullptr), - Mod(nullptr), - HasUnsafeFPMath(false) { } + + AMDGPUCodeGenPrepare() : FunctionPass(ID) {} bool visitFDiv(BinaryOperator &I); @@ -142,22 +150,7 @@ public: } }; -} // End anonymous namespace - -Value *AMDGPUCodeGenPrepare::copyFlags( - const BinaryOperator &I, Value *V) const { - BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V); - if (!BinOp) // Possibly constant expression. - return V; - - if (isa<OverflowingBinaryOperator>(BinOp)) { - BinOp->setHasNoSignedWrap(I.hasNoSignedWrap()); - BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - } else if (isa<PossiblyExactOperator>(BinOp)) - BinOp->setIsExact(I.isExact()); - - return V; -} +} // end anonymous namespace unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const { assert(needsPromotionToI32(T) && "T does not need promotion to i32"); @@ -186,12 +179,48 @@ bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const { } bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const { - if (T->isIntegerTy() && T->getIntegerBitWidth() > 1 && - T->getIntegerBitWidth() <= 16) + const IntegerType *IntTy = dyn_cast<IntegerType>(T); + if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16) + return true; + + if (const VectorType *VT = dyn_cast<VectorType>(T)) { + // TODO: The set of packed operations is more limited, so may want to + // promote some anyway. + if (ST->hasVOP3PInsts()) + return false; + + return needsPromotionToI32(VT->getElementType()); + } + + return false; +} + +// Return true if the op promoted to i32 should have nsw set. +static bool promotedOpIsNSW(const Instruction &I) { + switch (I.getOpcode()) { + case Instruction::Shl: + case Instruction::Add: + case Instruction::Sub: return true; - if (!T->isVectorTy()) + case Instruction::Mul: + return I.hasNoUnsignedWrap(); + default: + return false; + } +} + +// Return true if the op promoted to i32 should have nuw set. +static bool promotedOpIsNUW(const Instruction &I) { + switch (I.getOpcode()) { + case Instruction::Shl: + case Instruction::Add: + case Instruction::Mul: + return true; + case Instruction::Sub: + return I.hasNoUnsignedWrap(); + default: return false; - return needsPromotionToI32(cast<VectorType>(T)->getElementType()); + } } bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const { @@ -218,7 +247,19 @@ bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const { ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty); ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty); } - ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1)); + + ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1); + if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) { + if (promotedOpIsNSW(cast<Instruction>(I))) + Inst->setHasNoSignedWrap(); + + if (promotedOpIsNUW(cast<Instruction>(I))) + Inst->setHasNoUnsignedWrap(); + + if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) + Inst->setIsExact(ExactOp->isExact()); + } + TruncRes = Builder.CreateTrunc(ExtRes, I.getType()); I.replaceAllUsesWith(TruncRes); @@ -339,16 +380,16 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { FastMathFlags FMF = FPOp->getFastMathFlags(); bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() || FMF.allowReciprocal(); - if (ST->hasFP32Denormals() && !UnsafeDiv) + + // With UnsafeDiv node will be optimized to just rcp and mul. + if (ST->hasFP32Denormals() || UnsafeDiv) return false; IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath); Builder.setFastMathFlags(FMF); Builder.SetCurrentDebugLocation(FDiv.getDebugLoc()); - const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo(); - Function *Decl - = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {}); + Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast); Value *Num = FDiv.getOperand(0); Value *Den = FDiv.getOperand(1); @@ -447,10 +488,15 @@ bool AMDGPUCodeGenPrepare::doInitialization(Module &M) { } bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { - if (!TM || skipFunction(F)) + if (skipFunction(F)) + return false; + + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (!TPC) return false; - ST = &TM->getSubtarget<SISubtarget>(F); + const TargetMachine &TM = TPC->getTM<TargetMachine>(); + ST = &TM.getSubtarget<SISubtarget>(F); DA = &getAnalysis<DivergenceAnalysis>(); HasUnsafeFPMath = hasUnsafeFPMath(F); @@ -467,14 +513,14 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { return MadeChange; } -INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) -INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, - "AMDGPU IR optimizations", false, false) +INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations", + false, false) char AMDGPUCodeGenPrepare::ID = 0; -FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM) { - return new AMDGPUCodeGenPrepare(TM); +FunctionPass *llvm::createAMDGPUCodeGenPreparePass() { + return new AMDGPUCodeGenPrepare(); } |