summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp')
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp520
1 files changed, 496 insertions, 24 deletions
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 80c000d..f28e8b3 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -14,11 +14,13 @@
#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
#include "MipsTargetMachine.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -27,8 +29,8 @@ using namespace llvm;
#define DEBUG_TYPE "mips-isel"
static cl::opt<bool>
-EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
- cl::desc("MIPS: Enable tail calls."), cl::init(false));
+UseMipsTailCalls("mips-tail-calls", cl::Hidden,
+ cl::desc("MIPS: permit tail calls."), cl::init(false));
static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
cl::desc("Expand double precision loads and "
@@ -92,6 +94,44 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
+ // f16 is a storage-only type, always promote it to f32.
+ addRegisterClass(MVT::f16, &Mips::MSA128HRegClass);
+ setOperationAction(ISD::SETCC, MVT::f16, Promote);
+ setOperationAction(ISD::BR_CC, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT, MVT::f16, Promote);
+ setOperationAction(ISD::FADD, MVT::f16, Promote);
+ setOperationAction(ISD::FSUB, MVT::f16, Promote);
+ setOperationAction(ISD::FMUL, MVT::f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::FMA, MVT::f16, Promote);
+ setOperationAction(ISD::FNEG, MVT::f16, Promote);
+ setOperationAction(ISD::FABS, MVT::f16, Promote);
+ setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+ setOperationAction(ISD::FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FSQRT, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
+
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::SRA);
@@ -852,7 +892,7 @@ static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
- unsigned EltSize = Ty.getVectorElementType().getSizeInBits();
+ unsigned EltSize = Ty.getScalarSizeInBits();
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
if (!Subtarget.hasDSP())
@@ -1172,13 +1212,25 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitFEXP2_W_1(MI, BB);
case Mips::FEXP2_D_1_PSEUDO:
return emitFEXP2_D_1(MI, BB);
+ case Mips::ST_F16:
+ return emitST_F16_PSEUDO(MI, BB);
+ case Mips::LD_F16:
+ return emitLD_F16_PSEUDO(MI, BB);
+ case Mips::MSA_FP_EXTEND_W_PSEUDO:
+ return emitFPEXTEND_PSEUDO(MI, BB, false);
+ case Mips::MSA_FP_ROUND_W_PSEUDO:
+ return emitFPROUND_PSEUDO(MI, BB, false);
+ case Mips::MSA_FP_EXTEND_D_PSEUDO:
+ return emitFPEXTEND_PSEUDO(MI, BB, true);
+ case Mips::MSA_FP_ROUND_D_PSEUDO:
+ return emitFPROUND_PSEUDO(MI, BB, true);
}
}
bool MipsSETargetLowering::isEligibleForTailCallOptimization(
const CCState &CCInfo, unsigned NextStackOffset,
const MipsFunctionInfo &FI) const {
- if (!EnableMipsTailCalls)
+ if (!UseMipsTailCalls)
return false;
// Exception has to be cleared with eret.
@@ -1406,9 +1458,12 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
return Result;
}
-static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) {
- return DAG.getConstant(Op->getConstantOperandVal(ImmOp), SDLoc(Op),
- Op->getValueType(0));
+static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
+ bool IsSigned = false) {
+ return DAG.getConstant(
+ APInt(Op->getValueType(0).getScalarType().getSizeInBits(),
+ Op->getConstantOperandVal(ImmOp), IsSigned),
+ SDLoc(Op), Op->getValueType(0));
}
static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
@@ -1504,7 +1559,7 @@ static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT ResTy = Op->getValueType(0);
- APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1)
+ APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
<< cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy);
@@ -1514,8 +1569,8 @@ static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
-
- switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
+ unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue();
+ switch (Intrinsic) {
default:
return SDValue();
case Intrinsic::mips_shilo:
@@ -1585,6 +1640,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
// binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
EVT VecTy = Op->getValueType(0);
EVT EltTy = VecTy.getVectorElementType();
+ if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
+ report_fatal_error("Immediate out of range");
APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(),
Op->getConstantOperandVal(3));
return DAG.getNode(ISD::VSELECT, DL, VecTy,
@@ -1598,6 +1655,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
// binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
EVT VecTy = Op->getValueType(0);
EVT EltTy = VecTy.getVectorElementType();
+ if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
+ report_fatal_error("Immediate out of range");
APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(),
Op->getConstantOperandVal(3));
return DAG.getNode(ISD::VSELECT, DL, VecTy,
@@ -1691,7 +1750,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_ceqi_w:
case Intrinsic::mips_ceqi_d:
return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
- lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ);
+ lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ);
case Intrinsic::mips_cle_s_b:
case Intrinsic::mips_cle_s_h:
case Intrinsic::mips_cle_s_w:
@@ -1703,7 +1762,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_clei_s_w:
case Intrinsic::mips_clei_s_d:
return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
- lowerMSASplatImm(Op, 2, DAG), ISD::SETLE);
+ lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE);
case Intrinsic::mips_cle_u_b:
case Intrinsic::mips_cle_u_h:
case Intrinsic::mips_cle_u_w:
@@ -1727,7 +1786,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_clti_s_w:
case Intrinsic::mips_clti_s_d:
return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
- lowerMSASplatImm(Op, 2, DAG), ISD::SETLT);
+ lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT);
case Intrinsic::mips_clt_u_b:
case Intrinsic::mips_clt_u_h:
case Intrinsic::mips_clt_u_w:
@@ -1940,15 +1999,28 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_insve_b:
case Intrinsic::mips_insve_h:
case Intrinsic::mips_insve_w:
- case Intrinsic::mips_insve_d:
+ case Intrinsic::mips_insve_d: {
+ // Report an error for out of range values.
+ int64_t Max;
+ switch (Intrinsic) {
+ case Intrinsic::mips_insve_b: Max = 15; break;
+ case Intrinsic::mips_insve_h: Max = 7; break;
+ case Intrinsic::mips_insve_w: Max = 3; break;
+ case Intrinsic::mips_insve_d: Max = 1; break;
+ default: llvm_unreachable("Unmatched intrinsic");
+ }
+ int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
+ if (Value < 0 || Value > Max)
+ report_fatal_error("Immediate out of range");
return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0),
Op->getOperand(1), Op->getOperand(2), Op->getOperand(3),
DAG.getConstant(0, DL, MVT::i32));
+ }
case Intrinsic::mips_ldi_b:
case Intrinsic::mips_ldi_h:
case Intrinsic::mips_ldi_w:
case Intrinsic::mips_ldi_d:
- return lowerMSASplatImm(Op, 1, DAG);
+ return lowerMSASplatImm(Op, 1, DAG, true);
case Intrinsic::mips_lsa:
case Intrinsic::mips_dlsa: {
EVT ResTy = Op->getValueType(0);
@@ -1982,7 +2054,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_maxi_s_w:
case Intrinsic::mips_maxi_s_d:
return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0),
- Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
case Intrinsic::mips_maxi_u_b:
case Intrinsic::mips_maxi_u_h:
case Intrinsic::mips_maxi_u_w:
@@ -2006,7 +2078,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_mini_s_w:
case Intrinsic::mips_mini_s_d:
return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0),
- Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
case Intrinsic::mips_mini_u_b:
case Intrinsic::mips_mini_u_h:
case Intrinsic::mips_mini_u_w:
@@ -2079,11 +2151,59 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_pcnt_w:
case Intrinsic::mips_pcnt_d:
return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
+ case Intrinsic::mips_sat_s_b:
+ case Intrinsic::mips_sat_s_h:
+ case Intrinsic::mips_sat_s_w:
+ case Intrinsic::mips_sat_s_d:
+ case Intrinsic::mips_sat_u_b:
+ case Intrinsic::mips_sat_u_h:
+ case Intrinsic::mips_sat_u_w:
+ case Intrinsic::mips_sat_u_d: {
+ // Report an error for out of range values.
+ int64_t Max;
+ switch (Intrinsic) {
+ case Intrinsic::mips_sat_s_b:
+ case Intrinsic::mips_sat_u_b: Max = 7; break;
+ case Intrinsic::mips_sat_s_h:
+ case Intrinsic::mips_sat_u_h: Max = 15; break;
+ case Intrinsic::mips_sat_s_w:
+ case Intrinsic::mips_sat_u_w: Max = 31; break;
+ case Intrinsic::mips_sat_s_d:
+ case Intrinsic::mips_sat_u_d: Max = 63; break;
+ default: llvm_unreachable("Unmatched intrinsic");
+ }
+ int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
+ if (Value < 0 || Value > Max)
+ report_fatal_error("Immediate out of range");
+ return SDValue();
+ }
case Intrinsic::mips_shf_b:
case Intrinsic::mips_shf_h:
- case Intrinsic::mips_shf_w:
+ case Intrinsic::mips_shf_w: {
+ int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
+ if (Value < 0 || Value > 255)
+ report_fatal_error("Immediate out of range");
return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0),
Op->getOperand(2), Op->getOperand(1));
+ }
+ case Intrinsic::mips_sldi_b:
+ case Intrinsic::mips_sldi_h:
+ case Intrinsic::mips_sldi_w:
+ case Intrinsic::mips_sldi_d: {
+ // Report an error for out of range values.
+ int64_t Max;
+ switch (Intrinsic) {
+ case Intrinsic::mips_sldi_b: Max = 15; break;
+ case Intrinsic::mips_sldi_h: Max = 7; break;
+ case Intrinsic::mips_sldi_w: Max = 3; break;
+ case Intrinsic::mips_sldi_d: Max = 1; break;
+ default: llvm_unreachable("Unmatched intrinsic");
+ }
+ int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue();
+ if (Value < 0 || Value > Max)
+ report_fatal_error("Immediate out of range");
+ return SDValue();
+ }
case Intrinsic::mips_sll_b:
case Intrinsic::mips_sll_h:
case Intrinsic::mips_sll_w:
@@ -2126,6 +2246,24 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_srai_d:
return DAG.getNode(ISD::SRA, DL, Op->getValueType(0),
Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_srari_b:
+ case Intrinsic::mips_srari_h:
+ case Intrinsic::mips_srari_w:
+ case Intrinsic::mips_srari_d: {
+ // Report an error for out of range values.
+ int64_t Max;
+ switch (Intrinsic) {
+ case Intrinsic::mips_srari_b: Max = 7; break;
+ case Intrinsic::mips_srari_h: Max = 15; break;
+ case Intrinsic::mips_srari_w: Max = 31; break;
+ case Intrinsic::mips_srari_d: Max = 63; break;
+ default: llvm_unreachable("Unmatched intrinsic");
+ }
+ int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
+ if (Value < 0 || Value > Max)
+ report_fatal_error("Immediate out of range");
+ return SDValue();
+ }
case Intrinsic::mips_srl_b:
case Intrinsic::mips_srl_h:
case Intrinsic::mips_srl_w:
@@ -2138,6 +2276,24 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_srli_d:
return DAG.getNode(ISD::SRL, DL, Op->getValueType(0),
Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_srlri_b:
+ case Intrinsic::mips_srlri_h:
+ case Intrinsic::mips_srlri_w:
+ case Intrinsic::mips_srlri_d: {
+ // Report an error for out of range values.
+ int64_t Max;
+ switch (Intrinsic) {
+ case Intrinsic::mips_srlri_b: Max = 7; break;
+ case Intrinsic::mips_srlri_h: Max = 15; break;
+ case Intrinsic::mips_srlri_w: Max = 31; break;
+ case Intrinsic::mips_srlri_d: Max = 63; break;
+ default: llvm_unreachable("Unmatched intrinsic");
+ }
+ int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
+ if (Value < 0 || Value > Max)
+ report_fatal_error("Immediate out of range");
+ return SDValue();
+ }
case Intrinsic::mips_subv_b:
case Intrinsic::mips_subv_h:
case Intrinsic::mips_subv_w:
@@ -2169,7 +2325,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
}
}
-static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) {
+static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
+ const MipsSubtarget &Subtarget) {
SDLoc DL(Op);
SDValue ChainIn = Op->getOperand(0);
SDValue Address = Op->getOperand(2);
@@ -2177,6 +2334,12 @@ static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) {
EVT ResTy = Op->getValueType(0);
EVT PtrTy = Address->getValueType(0);
+ // For N64 addresses have the underlying type MVT::i64. This intrinsic
+ // however takes an i32 signed constant offset. The actual type of the
+ // intrinsic is a scaled signed i10.
+ if (Subtarget.isABI_N64())
+ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
+
Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
/* Alignment = */ 16);
@@ -2232,11 +2395,12 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::mips_ld_h:
case Intrinsic::mips_ld_w:
case Intrinsic::mips_ld_d:
- return lowerMSALoadIntr(Op, DAG, Intr);
+ return lowerMSALoadIntr(Op, DAG, Intr, Subtarget);
}
}
-static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) {
+static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
+ const MipsSubtarget &Subtarget) {
SDLoc DL(Op);
SDValue ChainIn = Op->getOperand(0);
SDValue Value = Op->getOperand(2);
@@ -2244,6 +2408,12 @@ static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) {
SDValue Offset = Op->getOperand(4);
EVT PtrTy = Address->getValueType(0);
+ // For N64 addresses have the underlying type MVT::i64. This intrinsic
+ // however takes an i32 signed constant offset. The actual type of the
+ // intrinsic is a scaled signed i10.
+ if (Subtarget.isABI_N64())
+ Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
+
Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(),
@@ -2260,7 +2430,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
case Intrinsic::mips_st_h:
case Intrinsic::mips_st_w:
case Intrinsic::mips_st_d:
- return lowerMSAStoreIntr(Op, DAG, Intr);
+ return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget);
}
}
@@ -3327,8 +3497,12 @@ MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
DebugLoc DL = MI.getDebugLoc();
unsigned Wd = MI.getOperand(0).getReg();
unsigned Fs = MI.getOperand(1).getReg();
- unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
- unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ unsigned Wt1 = RegInfo.createVirtualRegister(
+ Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
+ : &Mips::MSA128WEvensRegClass);
+ unsigned Wt2 = RegInfo.createVirtualRegister(
+ Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
+ : &Mips::MSA128WEvensRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
@@ -3372,6 +3546,304 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
return BB;
}
+// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
+// register.
+//
+// STF16 MSA128F16:$wd, mem_simm10:$addr
+// =>
+// copy_u.h $rtemp,$wd[0]
+// sh $rtemp, $addr
+//
+// Safety: We can't use st.h & co as they would over write the memory after
+// the destination. It would require half floats be allocated 16 bytes(!) of
+// space.
+MachineBasicBlock *
+MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Ws = MI.getOperand(0).getReg();
+ unsigned Rt = MI.getOperand(1).getReg();
+ const MachineMemOperand &MMO = **MI.memoperands_begin();
+ unsigned Imm = MMO.getOffset();
+
+ // Caution: A load via the GOT can expand to a GPR32 operand, a load via
+ // spill and reload can expand as a GPR64 operand. Examine the
+ // operand in detail and default to ABI.
+ const TargetRegisterClass *RC =
+ MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
+ : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
+ : &Mips::GPR64RegClass);
+ const bool UsingMips32 = RC == &Mips::GPR32RegClass;
+ unsigned Rs = RegInfo.createVirtualRegister(RC);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
+ BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64))
+ .addReg(Rs)
+ .addReg(Rt)
+ .addImm(Imm)
+ .addMemOperand(BB->getParent()->getMachineMemOperand(
+ &MMO, MMO.getOffset(), MMO.getSize()));
+
+ MI.eraseFromParent();
+ return BB;
+}
+
+// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
+//
+// LD_F16 MSA128F16:$wd, mem_simm10:$addr
+// =>
+// lh $rtemp, $addr
+// fill.h $wd, $rtemp
+//
+// Safety: We can't use ld.h & co as they over-read from the source.
+// Additionally, if the address is not modulo 16, 2 cases can occur:
+// a) Segmentation fault as the load instruction reads from a memory page
+// memory it's not supposed to.
+// b) The load crosses an implementation specific boundary, requiring OS
+// intervention.
+//
+MachineBasicBlock *
+MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Wd = MI.getOperand(0).getReg();
+
+ // Caution: A load via the GOT can expand to a GPR32 operand, a load via
+ // spill and reload can expand as a GPR64 operand. Examine the
+ // operand in detail and default to ABI.
+ const TargetRegisterClass *RC =
+ MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
+ : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
+ : &Mips::GPR64RegClass);
+
+ const bool UsingMips32 = RC == &Mips::GPR32RegClass;
+ unsigned Rt = RegInfo.createVirtualRegister(RC);
+
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
+ for (unsigned i = 1; i < MI.getNumOperands(); i++)
+ MIB.addOperand(MI.getOperand(i));
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
+
+ MI.eraseFromParent();
+ return BB;
+}
+
+// Emit the FPROUND_PSEUDO instruction.
+//
+// Round an FGR64Opnd, FGR32Opnd to an f16.
+//
+// Safety: Cycle the operand through the GPRs so the result always ends up
+// the correct MSA register.
+//
+// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
+// / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
+// (which they can be, as the MSA registers are defined to alias the
+// FPU's 64 bit and 32 bit registers) the result can be accessed using
+// the correct register class. That requires operands be tie-able across
+// register classes which have a sub/super register class relationship.
+//
+// For FPG32Opnd:
+//
+// FPROUND MSA128F16:$wd, FGR32Opnd:$fs
+// =>
+// mfc1 $rtemp, $fs
+// fill.w $rtemp, $wtemp
+// fexdo.w $wd, $wtemp, $wtemp
+//
+// For FPG64Opnd on mips32r2+:
+//
+// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
+// =>
+// mfc1 $rtemp, $fs
+// fill.w $rtemp, $wtemp
+// mfhc1 $rtemp2, $fs
+// insert.w $wtemp[1], $rtemp2
+// insert.w $wtemp[3], $rtemp2
+// fexdo.w $wtemp2, $wtemp, $wtemp
+// fexdo.h $wd, $temp2, $temp2
+//
+// For FGR64Opnd on mips64r2+:
+//
+// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
+// =>
+// dmfc1 $rtemp, $fs
+// fill.d $rtemp, $wtemp
+// fexdo.w $wtemp2, $wtemp, $wtemp
+// fexdo.h $wd, $wtemp2, $wtemp2
+//
+// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
+// undef bits are "just right" and the exception enable bits are
+// set. By using fill.w to replicate $fs into all elements over
+// insert.w for one element, we avoid that potiential case. If
+// fexdo.[hw] causes an exception in, the exception is valid and it
+// occurs for all elements.
+//
+MachineBasicBlock *
+MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
+ MachineBasicBlock *BB,
+ bool IsFGR64) const {
+
+ // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
+ // here. It's technically doable to support MIPS32 here, but the ISA forbids
+ // it.
+ assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
+
+ bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
+
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Wd = MI.getOperand(0).getReg();
+ unsigned Fs = MI.getOperand(1).getReg();
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ const TargetRegisterClass *GPRRC =
+ IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+ unsigned MFC1Opc = IsFGR64onMips64 ? Mips::DMFC1 : Mips::MFC1;
+ unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
+
+ // Perform the register class copy as mentioned above.
+ unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
+ BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs);
+ BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
+ unsigned WPHI = Wtemp;
+
+ if (!Subtarget.hasMips64() && IsFGR64) {
+ unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
+ BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
+ unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2)
+ .addReg(Wtemp)
+ .addReg(Rtemp2)
+ .addImm(1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3)
+ .addReg(Wtemp2)
+ .addReg(Rtemp2)
+ .addImm(3);
+ WPHI = Wtemp3;
+ }
+
+ if (IsFGR64) {
+ unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2)
+ .addReg(WPHI)
+ .addReg(WPHI);
+ WPHI = Wtemp2;
+ }
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI);
+
+ MI.eraseFromParent();
+ return BB;
+}
+
+// Emit the FPEXTEND_PSEUDO instruction.
+//
+// Expand an f16 to either a FGR32Opnd or FGR64Opnd.
+//
+// Safety: Cycle the result through the GPRs so the result always ends up
+// the correct floating point register.
+//
+// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
+// / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
+// (which they can be, as the MSA registers are defined to alias the
+// FPU's 64 bit and 32 bit registers) the result can be accessed using
+// the correct register class. That requires operands be tie-able across
+// register classes which have a sub/super register class relationship. I
+// haven't checked.
+//
+// For FGR32Opnd:
+//
+// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
+// =>
+// fexupr.w $wtemp, $ws
+// copy_s.w $rtemp, $ws[0]
+// mtc1 $rtemp, $fd
+//
+// For FGR64Opnd on Mips64:
+//
+// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
+// =>
+// fexupr.w $wtemp, $ws
+// fexupr.d $wtemp2, $wtemp
+// copy_s.d $rtemp, $wtemp2s[0]
+// dmtc1 $rtemp, $fd
+//
+// For FGR64Opnd on Mips32:
+//
+// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
+// =>
+// fexupr.w $wtemp, $ws
+// fexupr.d $wtemp2, $wtemp
+// copy_s.w $rtemp, $wtemp2[0]
+// mtc1 $rtemp, $ftemp
+// copy_s.w $rtemp2, $wtemp2[1]
+// $fd = mthc1 $rtemp2, $ftemp
+//
+MachineBasicBlock *
+MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
+ MachineBasicBlock *BB,
+ bool IsFGR64) const {
+
+ // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
+ // here. It's technically doable to support MIPS32 here, but the ISA forbids
+ // it.
+ assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
+
+ bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
+ bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
+
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Fd = MI.getOperand(0).getReg();
+ unsigned Ws = MI.getOperand(1).getReg();
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetRegisterClass *GPRRC =
+ IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+ unsigned MTC1Opc = IsFGR64onMips64 ? Mips::DMTC1 : Mips::MTC1;
+ unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
+
+ unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ unsigned WPHI = Wtemp;
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws);
+ if (IsFGR64) {
+ WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+ BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp);
+ }
+
+ // Perform the safety regclass copy mentioned above.
+ unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
+ unsigned FPRPHI = IsFGR64onMips32
+ ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass)
+ : Fd;
+ BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
+ BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
+
+ if (IsFGR64onMips32) {
+ unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
+ .addReg(WPHI)
+ .addImm(1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd)
+ .addReg(FPRPHI)
+ .addReg(Rtemp2);
+ }
+
+ MI.eraseFromParent();
+ return BB;
+}
+
// Emit the FEXP2_W_1 pseudo instructions.
//
// fexp2_w_1_pseudo $wd, $wt
OpenPOWER on IntegriCloud