summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp148
1 files changed, 97 insertions, 51 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index e623054..31ee920 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -14,16 +14,32 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/InstVisitor.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <iterator>
#define DEBUG_TYPE "amdgpu-codegenprepare"
@@ -33,18 +49,15 @@ namespace {
class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
- const GCNTargetMachine *TM;
- const SISubtarget *ST;
- DivergenceAnalysis *DA;
- Module *Mod;
- bool HasUnsafeFPMath;
+ const SISubtarget *ST = nullptr;
+ DivergenceAnalysis *DA = nullptr;
+ Module *Mod = nullptr;
+ bool HasUnsafeFPMath = false;
/// \brief Copies exact/nsw/nuw flags (if any) from binary operation \p I to
/// binary operation \p V.
///
/// \returns Binary operation \p V.
- Value *copyFlags(const BinaryOperator &I, Value *V) const;
-
/// \returns \p T's base element bit width.
unsigned getBaseElementBitWidth(const Type *T) const;
@@ -113,13 +126,8 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
public:
static char ID;
- AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
- FunctionPass(ID),
- TM(static_cast<const GCNTargetMachine *>(TM)),
- ST(nullptr),
- DA(nullptr),
- Mod(nullptr),
- HasUnsafeFPMath(false) { }
+
+ AMDGPUCodeGenPrepare() : FunctionPass(ID) {}
bool visitFDiv(BinaryOperator &I);
@@ -142,22 +150,7 @@ public:
}
};
-} // End anonymous namespace
-
-Value *AMDGPUCodeGenPrepare::copyFlags(
- const BinaryOperator &I, Value *V) const {
- BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V);
- if (!BinOp) // Possibly constant expression.
- return V;
-
- if (isa<OverflowingBinaryOperator>(BinOp)) {
- BinOp->setHasNoSignedWrap(I.hasNoSignedWrap());
- BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
- } else if (isa<PossiblyExactOperator>(BinOp))
- BinOp->setIsExact(I.isExact());
-
- return V;
-}
+} // end anonymous namespace
unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
assert(needsPromotionToI32(T) && "T does not need promotion to i32");
@@ -186,12 +179,48 @@ bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
}
bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
- if (T->isIntegerTy() && T->getIntegerBitWidth() > 1 &&
- T->getIntegerBitWidth() <= 16)
+ const IntegerType *IntTy = dyn_cast<IntegerType>(T);
+ if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16)
+ return true;
+
+ if (const VectorType *VT = dyn_cast<VectorType>(T)) {
+ // TODO: The set of packed operations is more limited, so may want to
+ // promote some anyway.
+ if (ST->hasVOP3PInsts())
+ return false;
+
+ return needsPromotionToI32(VT->getElementType());
+ }
+
+ return false;
+}
+
+// Return true if the op promoted to i32 should have nsw set.
+static bool promotedOpIsNSW(const Instruction &I) {
+ switch (I.getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::Add:
+ case Instruction::Sub:
return true;
- if (!T->isVectorTy())
+ case Instruction::Mul:
+ return I.hasNoUnsignedWrap();
+ default:
+ return false;
+ }
+}
+
+// Return true if the op promoted to i32 should have nuw set.
+static bool promotedOpIsNUW(const Instruction &I) {
+ switch (I.getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::Add:
+ case Instruction::Mul:
+ return true;
+ case Instruction::Sub:
+ return I.hasNoUnsignedWrap();
+ default:
return false;
- return needsPromotionToI32(cast<VectorType>(T)->getElementType());
+ }
}
bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
@@ -218,7 +247,19 @@ bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
}
- ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1));
+
+ ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
+ if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) {
+ if (promotedOpIsNSW(cast<Instruction>(I)))
+ Inst->setHasNoSignedWrap();
+
+ if (promotedOpIsNUW(cast<Instruction>(I)))
+ Inst->setHasNoUnsignedWrap();
+
+ if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
+ Inst->setIsExact(ExactOp->isExact());
+ }
+
TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
I.replaceAllUsesWith(TruncRes);
@@ -339,16 +380,16 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
FastMathFlags FMF = FPOp->getFastMathFlags();
bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() ||
FMF.allowReciprocal();
- if (ST->hasFP32Denormals() && !UnsafeDiv)
+
+ // With UnsafeDiv node will be optimized to just rcp and mul.
+ if (ST->hasFP32Denormals() || UnsafeDiv)
return false;
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
Builder.setFastMathFlags(FMF);
Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
- const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo();
- Function *Decl
- = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {});
+ Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
Value *Num = FDiv.getOperand(0);
Value *Den = FDiv.getOperand(1);
@@ -447,10 +488,15 @@ bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
}
bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
- if (!TM || skipFunction(F))
+ if (skipFunction(F))
+ return false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
return false;
- ST = &TM->getSubtarget<SISubtarget>(F);
+ const TargetMachine &TM = TPC->getTM<TargetMachine>();
+ ST = &TM.getSubtarget<SISubtarget>(F);
DA = &getAnalysis<DivergenceAnalysis>();
HasUnsafeFPMath = hasUnsafeFPMath(F);
@@ -467,14 +513,14 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
return MadeChange;
}
-INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
"AMDGPU IR optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
-INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE,
- "AMDGPU IR optimizations", false, false)
+INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
+ false, false)
char AMDGPUCodeGenPrepare::ID = 0;
-FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM) {
- return new AMDGPUCodeGenPrepare(TM);
+FunctionPass *llvm::createAMDGPUCodeGenPreparePass() {
+ return new AMDGPUCodeGenPrepare();
}
OpenPOWER on IntegriCloud