summaryrefslogtreecommitdiffstats
path: root/lib/Target/PTX/PTXFPRoundingModePass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PTX/PTXFPRoundingModePass.cpp')
-rw-r--r--lib/Target/PTX/PTXFPRoundingModePass.cpp179
1 files changed, 179 insertions, 0 deletions
diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp
new file mode 100644
index 0000000..0b653e0
--- /dev/null
+++ b/lib/Target/PTX/PTXFPRoundingModePass.cpp
@@ -0,0 +1,179 @@
+//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a machine function pass that sets appropriate FP rounding
+// modes for all relevant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-fp-rounding-mode"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+// NOTE: PTXFPRoundingModePass should be executed just before emission.
+
+namespace llvm {
+ /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
+ /// all FP instructions. Essentially, this pass just looks for all FP
+ /// instructions that have a rounding mode set to RndDefault, and sets an
+ /// appropriate rounding mode based on the target device.
+ ///
+ class PTXFPRoundingModePass : public MachineFunctionPass {
+ private:
+ static char ID;
+
+ typedef std::pair<unsigned, unsigned> RndModeDesc;
+
+ PTXTargetMachine& TargetMachine;
+ DenseMap<unsigned, RndModeDesc> Instrs;
+
+ public:
+ PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : MachineFunctionPass(ID),
+ TargetMachine(TM) {
+ initializeMap();
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PTX FP Rounding Mode Pass";
+ }
+
+ private:
+
+ void initializeMap();
+ void processInstruction(MachineInstr &MI);
+ }; // class PTXFPRoundingModePass
+} // namespace llvm
+
+using namespace llvm;
+
+char PTXFPRoundingModePass::ID = 0;
+
+bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
+ // Look at each basic block
+ for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
+ ++bbi) {
+ MachineBasicBlock &MBB = *bbi;
+ // Look at each instruction
+ for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end();
+ ii != ie; ++ii) {
+ MachineInstr &MI = *ii;
+ processInstruction(MI);
+ }
+ }
+ return false;
+}
+
+void PTXFPRoundingModePass::initializeMap() {
+ using namespace PTXRoundingMode;
+ const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
+
+ // Build a map of default rounding mode for all instructions that need a
+ // rounding mode.
+ Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
+
+ unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
+
+ unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
+
+ Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
+
+ Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+
+ Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+}
+
+void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
+ // Is this an instruction that needs a rounding mode?
+ if (Instrs.count(MI.getOpcode())) {
+ const RndModeDesc &Desc = Instrs[MI.getOpcode()];
+ // Get the rounding mode operand
+ MachineOperand &Op = MI.getOperand(Desc.first);
+ // Update the rounding mode if needed
+ if (Op.getImm() == PTXRoundingMode::RndDefault) {
+ Op.setImm(Desc.second);
+ }
+ }
+}
+
+FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXFPRoundingModePass(TM, OptLevel);
+}
+
OpenPOWER on IntegriCloud