diff options
Diffstat (limited to 'lib/Target/PTX/PTXFPRoundingModePass.cpp')
-rw-r--r-- | lib/Target/PTX/PTXFPRoundingModePass.cpp | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp new file mode 100644 index 0000000..0b653e0 --- /dev/null +++ b/lib/Target/PTX/PTXFPRoundingModePass.cpp @@ -0,0 +1,179 @@ +//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a machine function pass that sets appropriate FP rounding +// modes for all relevant instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-fp-rounding-mode" + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +// NOTE: PTXFPRoundingModePass should be executed just before emission. + +namespace llvm { + /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to + /// all FP instructions. Essentially, this pass just looks for all FP + /// instructions that have a rounding mode set to RndDefault, and sets an + /// appropriate rounding mode based on the target device. + /// + class PTXFPRoundingModePass : public MachineFunctionPass { + private: + static char ID; + + typedef std::pair<unsigned, unsigned> RndModeDesc; + + PTXTargetMachine& TargetMachine; + DenseMap<unsigned, RndModeDesc> Instrs; + + public: + PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) + : MachineFunctionPass(ID), + TargetMachine(TM) { + initializeMap(); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "PTX FP Rounding Mode Pass"; + } + + private: + + void initializeMap(); + void processInstruction(MachineInstr &MI); + }; // class PTXFPRoundingModePass +} // namespace llvm + +using namespace llvm; + +char PTXFPRoundingModePass::ID = 0; + +bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) { + // Look at each basic block + for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe; + ++bbi) { + MachineBasicBlock &MBB = *bbi; + // Look at each instruction + for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end(); + ii != ie; ++ii) { + MachineInstr &MI = *ii; + processInstruction(MI); + } + } + return false; +} + +void PTXFPRoundingModePass::initializeMap() { + using namespace PTXRoundingMode; + const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>(); + + // Build a map of default rounding mode for all instructions that need a + // rounding mode. + Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone); + + unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode); + + unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode); + + Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox); + + Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + + Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven); +} + +void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) { + // Is this an instruction that needs a rounding mode? + if (Instrs.count(MI.getOpcode())) { + const RndModeDesc &Desc = Instrs[MI.getOpcode()]; + // Get the rounding mode operand + MachineOperand &Op = MI.getOperand(Desc.first); + // Update the rounding mode if needed + if (Op.getImm() == PTXRoundingMode::RndDefault) { + Op.setImm(Desc.second); + } + } +} + +FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PTXFPRoundingModePass(TM, OptLevel); +} + |