diff options
Diffstat (limited to 'contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp | 520 |
1 files changed, 496 insertions, 24 deletions
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index 80c000d..f28e8b3 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -14,11 +14,13 @@ #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsTargetMachine.h" +#include "llvm/ADT/APInt.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -27,8 +29,8 @@ using namespace llvm; #define DEBUG_TYPE "mips-isel" static cl::opt<bool> -EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, - cl::desc("MIPS: Enable tail calls."), cl::init(false)); +UseMipsTailCalls("mips-tail-calls", cl::Hidden, + cl::desc("MIPS: permit tail calls."), cl::init(false)); static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), cl::desc("Expand double precision loads and " @@ -92,6 +94,44 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); + // f16 is a storage-only type, always promote it to f32. + addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::FMINNAN, MVT::f16, Promote); + setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); + setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::SRA); @@ -852,7 +892,7 @@ static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - unsigned EltSize = Ty.getVectorElementType().getSizeInBits(); + unsigned EltSize = Ty.getScalarSizeInBits(); BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); if (!Subtarget.hasDSP()) @@ -1172,13 +1212,25 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return emitFEXP2_W_1(MI, BB); case Mips::FEXP2_D_1_PSEUDO: return emitFEXP2_D_1(MI, BB); + case Mips::ST_F16: + return emitST_F16_PSEUDO(MI, BB); + case Mips::LD_F16: + return emitLD_F16_PSEUDO(MI, BB); + case Mips::MSA_FP_EXTEND_W_PSEUDO: + return emitFPEXTEND_PSEUDO(MI, BB, false); + case Mips::MSA_FP_ROUND_W_PSEUDO: + return emitFPROUND_PSEUDO(MI, BB, false); + case Mips::MSA_FP_EXTEND_D_PSEUDO: + return emitFPEXTEND_PSEUDO(MI, BB, true); + case Mips::MSA_FP_ROUND_D_PSEUDO: + return emitFPROUND_PSEUDO(MI, BB, true); } } bool MipsSETargetLowering::isEligibleForTailCallOptimization( const CCState &CCInfo, unsigned NextStackOffset, const MipsFunctionInfo &FI) const { - if (!EnableMipsTailCalls) + if (!UseMipsTailCalls) return false; // Exception has to be cleared with eret. @@ -1406,9 +1458,12 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { return Result; } -static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { - return DAG.getConstant(Op->getConstantOperandVal(ImmOp), SDLoc(Op), - Op->getValueType(0)); +static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, + bool IsSigned = false) { + return DAG.getConstant( + APInt(Op->getValueType(0).getScalarType().getSizeInBits(), + Op->getConstantOperandVal(ImmOp), IsSigned), + SDLoc(Op), Op->getValueType(0)); } static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, @@ -1504,7 +1559,7 @@ static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); EVT ResTy = Op->getValueType(0); - APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1) + APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); @@ -1514,8 +1569,8 @@ static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - - switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) { + unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue(); + switch (Intrinsic) { default: return SDValue(); case Intrinsic::mips_shilo: @@ -1585,6 +1640,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) EVT VecTy = Op->getValueType(0); EVT EltTy = VecTy.getVectorElementType(); + if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) + report_fatal_error("Immediate out of range"); APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), Op->getConstantOperandVal(3)); return DAG.getNode(ISD::VSELECT, DL, VecTy, @@ -1598,6 +1655,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) EVT VecTy = Op->getValueType(0); EVT EltTy = VecTy.getVectorElementType(); + if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) + report_fatal_error("Immediate out of range"); APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), Op->getConstantOperandVal(3)); return DAG.getNode(ISD::VSELECT, DL, VecTy, @@ -1691,7 +1750,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_ceqi_w: case Intrinsic::mips_ceqi_d: return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), - lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ); + lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ); case Intrinsic::mips_cle_s_b: case Intrinsic::mips_cle_s_h: case Intrinsic::mips_cle_s_w: @@ -1703,7 +1762,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_clei_s_w: case Intrinsic::mips_clei_s_d: return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), - lowerMSASplatImm(Op, 2, DAG), ISD::SETLE); + lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE); case Intrinsic::mips_cle_u_b: case Intrinsic::mips_cle_u_h: case Intrinsic::mips_cle_u_w: @@ -1727,7 +1786,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_clti_s_w: case Intrinsic::mips_clti_s_d: return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), - lowerMSASplatImm(Op, 2, DAG), ISD::SETLT); + lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT); case Intrinsic::mips_clt_u_b: case Intrinsic::mips_clt_u_h: case Intrinsic::mips_clt_u_w: @@ -1940,15 +1999,28 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_insve_b: case Intrinsic::mips_insve_h: case Intrinsic::mips_insve_w: - case Intrinsic::mips_insve_d: + case Intrinsic::mips_insve_d: { + // Report an error for out of range values. + int64_t Max; + switch (Intrinsic) { + case Intrinsic::mips_insve_b: Max = 15; break; + case Intrinsic::mips_insve_h: Max = 7; break; + case Intrinsic::mips_insve_w: Max = 3; break; + case Intrinsic::mips_insve_d: Max = 1; break; + default: llvm_unreachable("Unmatched intrinsic"); + } + int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > Max) + report_fatal_error("Immediate out of range"); return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), DAG.getConstant(0, DL, MVT::i32)); + } case Intrinsic::mips_ldi_b: case Intrinsic::mips_ldi_h: case Intrinsic::mips_ldi_w: case Intrinsic::mips_ldi_d: - return lowerMSASplatImm(Op, 1, DAG); + return lowerMSASplatImm(Op, 1, DAG, true); case Intrinsic::mips_lsa: case Intrinsic::mips_dlsa: { EVT ResTy = Op->getValueType(0); @@ -1982,7 +2054,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_maxi_s_w: case Intrinsic::mips_maxi_s_d: return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0), - Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); case Intrinsic::mips_maxi_u_b: case Intrinsic::mips_maxi_u_h: case Intrinsic::mips_maxi_u_w: @@ -2006,7 +2078,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_mini_s_w: case Intrinsic::mips_mini_s_d: return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0), - Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); case Intrinsic::mips_mini_u_b: case Intrinsic::mips_mini_u_h: case Intrinsic::mips_mini_u_w: @@ -2079,11 +2151,59 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_pcnt_w: case Intrinsic::mips_pcnt_d: return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::mips_sat_s_b: + case Intrinsic::mips_sat_s_h: + case Intrinsic::mips_sat_s_w: + case Intrinsic::mips_sat_s_d: + case Intrinsic::mips_sat_u_b: + case Intrinsic::mips_sat_u_h: + case Intrinsic::mips_sat_u_w: + case Intrinsic::mips_sat_u_d: { + // Report an error for out of range values. + int64_t Max; + switch (Intrinsic) { + case Intrinsic::mips_sat_s_b: + case Intrinsic::mips_sat_u_b: Max = 7; break; + case Intrinsic::mips_sat_s_h: + case Intrinsic::mips_sat_u_h: Max = 15; break; + case Intrinsic::mips_sat_s_w: + case Intrinsic::mips_sat_u_w: Max = 31; break; + case Intrinsic::mips_sat_s_d: + case Intrinsic::mips_sat_u_d: Max = 63; break; + default: llvm_unreachable("Unmatched intrinsic"); + } + int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > Max) + report_fatal_error("Immediate out of range"); + return SDValue(); + } case Intrinsic::mips_shf_b: case Intrinsic::mips_shf_h: - case Intrinsic::mips_shf_w: + case Intrinsic::mips_shf_w: { + int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > 255) + report_fatal_error("Immediate out of range"); return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), Op->getOperand(2), Op->getOperand(1)); + } + case Intrinsic::mips_sldi_b: + case Intrinsic::mips_sldi_h: + case Intrinsic::mips_sldi_w: + case Intrinsic::mips_sldi_d: { + // Report an error for out of range values. + int64_t Max; + switch (Intrinsic) { + case Intrinsic::mips_sldi_b: Max = 15; break; + case Intrinsic::mips_sldi_h: Max = 7; break; + case Intrinsic::mips_sldi_w: Max = 3; break; + case Intrinsic::mips_sldi_d: Max = 1; break; + default: llvm_unreachable("Unmatched intrinsic"); + } + int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue(); + if (Value < 0 || Value > Max) + report_fatal_error("Immediate out of range"); + return SDValue(); + } case Intrinsic::mips_sll_b: case Intrinsic::mips_sll_h: case Intrinsic::mips_sll_w: @@ -2126,6 +2246,24 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_srai_d: return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_srari_b: + case Intrinsic::mips_srari_h: + case Intrinsic::mips_srari_w: + case Intrinsic::mips_srari_d: { + // Report an error for out of range values. + int64_t Max; + switch (Intrinsic) { + case Intrinsic::mips_srari_b: Max = 7; break; + case Intrinsic::mips_srari_h: Max = 15; break; + case Intrinsic::mips_srari_w: Max = 31; break; + case Intrinsic::mips_srari_d: Max = 63; break; + default: llvm_unreachable("Unmatched intrinsic"); + } + int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > Max) + report_fatal_error("Immediate out of range"); + return SDValue(); + } case Intrinsic::mips_srl_b: case Intrinsic::mips_srl_h: case Intrinsic::mips_srl_w: @@ -2138,6 +2276,24 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_srli_d: return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_srlri_b: + case Intrinsic::mips_srlri_h: + case Intrinsic::mips_srlri_w: + case Intrinsic::mips_srlri_d: { + // Report an error for out of range values. + int64_t Max; + switch (Intrinsic) { + case Intrinsic::mips_srlri_b: Max = 7; break; + case Intrinsic::mips_srlri_h: Max = 15; break; + case Intrinsic::mips_srlri_w: Max = 31; break; + case Intrinsic::mips_srlri_d: Max = 63; break; + default: llvm_unreachable("Unmatched intrinsic"); + } + int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > Max) + report_fatal_error("Immediate out of range"); + return SDValue(); + } case Intrinsic::mips_subv_b: case Intrinsic::mips_subv_h: case Intrinsic::mips_subv_w: @@ -2169,7 +2325,8 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, } } -static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { +static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const MipsSubtarget &Subtarget) { SDLoc DL(Op); SDValue ChainIn = Op->getOperand(0); SDValue Address = Op->getOperand(2); @@ -2177,6 +2334,12 @@ static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { EVT ResTy = Op->getValueType(0); EVT PtrTy = Address->getValueType(0); + // For N64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i10. + if (Subtarget.isABI_N64()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), /* Alignment = */ 16); @@ -2232,11 +2395,12 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::mips_ld_h: case Intrinsic::mips_ld_w: case Intrinsic::mips_ld_d: - return lowerMSALoadIntr(Op, DAG, Intr); + return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); } } -static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { +static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const MipsSubtarget &Subtarget) { SDLoc DL(Op); SDValue ChainIn = Op->getOperand(0); SDValue Value = Op->getOperand(2); @@ -2244,6 +2408,12 @@ static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { SDValue Offset = Op->getOperand(4); EVT PtrTy = Address->getValueType(0); + // For N64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i10. + if (Subtarget.isABI_N64()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), @@ -2260,7 +2430,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, case Intrinsic::mips_st_h: case Intrinsic::mips_st_w: case Intrinsic::mips_st_d: - return lowerMSAStoreIntr(Op, DAG, Intr); + return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); } } @@ -3327,8 +3497,12 @@ MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, DebugLoc DL = MI.getDebugLoc(); unsigned Wd = MI.getOperand(0).getReg(); unsigned Fs = MI.getOperand(1).getReg(); - unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); - unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + unsigned Wt1 = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass + : &Mips::MSA128WEvensRegClass); + unsigned Wt2 = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass + : &Mips::MSA128WEvensRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) @@ -3372,6 +3546,304 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, return BB; } +// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA +// register. +// +// STF16 MSA128F16:$wd, mem_simm10:$addr +// => +// copy_u.h $rtemp,$wd[0] +// sh $rtemp, $addr +// +// Safety: We can't use st.h & co as they would over write the memory after +// the destination. It would require half floats be allocated 16 bytes(!) of +// space. +MachineBasicBlock * +MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BB) const { + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Ws = MI.getOperand(0).getReg(); + unsigned Rt = MI.getOperand(1).getReg(); + const MachineMemOperand &MMO = **MI.memoperands_begin(); + unsigned Imm = MMO.getOffset(); + + // Caution: A load via the GOT can expand to a GPR32 operand, a load via + // spill and reload can expand as a GPR64 operand. Examine the + // operand in detail and default to ABI. + const TargetRegisterClass *RC = + MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) + : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass + : &Mips::GPR64RegClass); + const bool UsingMips32 = RC == &Mips::GPR32RegClass; + unsigned Rs = RegInfo.createVirtualRegister(RC); + + BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); + BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) + .addReg(Rs) + .addReg(Rt) + .addImm(Imm) + .addMemOperand(BB->getParent()->getMachineMemOperand( + &MMO, MMO.getOffset(), MMO.getSize())); + + MI.eraseFromParent(); + return BB; +} + +// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. +// +// LD_F16 MSA128F16:$wd, mem_simm10:$addr +// => +// lh $rtemp, $addr +// fill.h $wd, $rtemp +// +// Safety: We can't use ld.h & co as they over-read from the source. +// Additionally, if the address is not modulo 16, 2 cases can occur: +// a) Segmentation fault as the load instruction reads from a memory page +// memory it's not supposed to. +// b) The load crosses an implementation specific boundary, requiring OS +// intervention. +// +MachineBasicBlock * +MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BB) const { + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Wd = MI.getOperand(0).getReg(); + + // Caution: A load via the GOT can expand to a GPR32 operand, a load via + // spill and reload can expand as a GPR64 operand. Examine the + // operand in detail and default to ABI. + const TargetRegisterClass *RC = + MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) + : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass + : &Mips::GPR64RegClass); + + const bool UsingMips32 = RC == &Mips::GPR32RegClass; + unsigned Rt = RegInfo.createVirtualRegister(RC); + + MachineInstrBuilder MIB = + BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); + for (unsigned i = 1; i < MI.getNumOperands(); i++) + MIB.addOperand(MI.getOperand(i)); + + BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); + + MI.eraseFromParent(); + return BB; +} + +// Emit the FPROUND_PSEUDO instruction. +// +// Round an FGR64Opnd, FGR32Opnd to an f16. +// +// Safety: Cycle the operand through the GPRs so the result always ends up +// the correct MSA register. +// +// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs +// / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register +// (which they can be, as the MSA registers are defined to alias the +// FPU's 64 bit and 32 bit registers) the result can be accessed using +// the correct register class. That requires operands be tie-able across +// register classes which have a sub/super register class relationship. +// +// For FPG32Opnd: +// +// FPROUND MSA128F16:$wd, FGR32Opnd:$fs +// => +// mfc1 $rtemp, $fs +// fill.w $rtemp, $wtemp +// fexdo.w $wd, $wtemp, $wtemp +// +// For FPG64Opnd on mips32r2+: +// +// FPROUND MSA128F16:$wd, FGR64Opnd:$fs +// => +// mfc1 $rtemp, $fs +// fill.w $rtemp, $wtemp +// mfhc1 $rtemp2, $fs +// insert.w $wtemp[1], $rtemp2 +// insert.w $wtemp[3], $rtemp2 +// fexdo.w $wtemp2, $wtemp, $wtemp +// fexdo.h $wd, $temp2, $temp2 +// +// For FGR64Opnd on mips64r2+: +// +// FPROUND MSA128F16:$wd, FGR64Opnd:$fs +// => +// dmfc1 $rtemp, $fs +// fill.d $rtemp, $wtemp +// fexdo.w $wtemp2, $wtemp, $wtemp +// fexdo.h $wd, $wtemp2, $wtemp2 +// +// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the +// undef bits are "just right" and the exception enable bits are +// set. By using fill.w to replicate $fs into all elements over +// insert.w for one element, we avoid that potiential case. If +// fexdo.[hw] causes an exception in, the exception is valid and it +// occurs for all elements. +// +MachineBasicBlock * +MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BB, + bool IsFGR64) const { + + // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous + // here. It's technically doable to support MIPS32 here, but the ISA forbids + // it. + assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); + + bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Wd = MI.getOperand(0).getReg(); + unsigned Fs = MI.getOperand(1).getReg(); + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + const TargetRegisterClass *GPRRC = + IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; + unsigned MFC1Opc = IsFGR64onMips64 ? Mips::DMFC1 : Mips::MFC1; + unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; + + // Perform the register class copy as mentioned above. + unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); + BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); + unsigned WPHI = Wtemp; + + if (!Subtarget.hasMips64() && IsFGR64) { + unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); + unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) + .addReg(Wtemp) + .addReg(Rtemp2) + .addImm(1); + BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) + .addReg(Wtemp2) + .addReg(Rtemp2) + .addImm(3); + WPHI = Wtemp3; + } + + if (IsFGR64) { + unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) + .addReg(WPHI) + .addReg(WPHI); + WPHI = Wtemp2; + } + + BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); + + MI.eraseFromParent(); + return BB; +} + +// Emit the FPEXTEND_PSEUDO instruction. +// +// Expand an f16 to either a FGR32Opnd or FGR64Opnd. +// +// Safety: Cycle the result through the GPRs so the result always ends up +// the correct floating point register. +// +// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd +// / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register +// (which they can be, as the MSA registers are defined to alias the +// FPU's 64 bit and 32 bit registers) the result can be accessed using +// the correct register class. That requires operands be tie-able across +// register classes which have a sub/super register class relationship. I +// haven't checked. +// +// For FGR32Opnd: +// +// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws +// => +// fexupr.w $wtemp, $ws +// copy_s.w $rtemp, $ws[0] +// mtc1 $rtemp, $fd +// +// For FGR64Opnd on Mips64: +// +// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws +// => +// fexupr.w $wtemp, $ws +// fexupr.d $wtemp2, $wtemp +// copy_s.d $rtemp, $wtemp2s[0] +// dmtc1 $rtemp, $fd +// +// For FGR64Opnd on Mips32: +// +// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws +// => +// fexupr.w $wtemp, $ws +// fexupr.d $wtemp2, $wtemp +// copy_s.w $rtemp, $wtemp2[0] +// mtc1 $rtemp, $ftemp +// copy_s.w $rtemp2, $wtemp2[1] +// $fd = mthc1 $rtemp2, $ftemp +// +MachineBasicBlock * +MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BB, + bool IsFGR64) const { + + // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous + // here. It's technically doable to support MIPS32 here, but the ISA forbids + // it. + assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); + + bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; + bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Fd = MI.getOperand(0).getReg(); + unsigned Ws = MI.getOperand(1).getReg(); + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetRegisterClass *GPRRC = + IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; + unsigned MTC1Opc = IsFGR64onMips64 ? Mips::DMTC1 : Mips::MTC1; + unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; + + unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + unsigned WPHI = Wtemp; + + BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); + if (IsFGR64) { + WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); + } + + // Perform the safety regclass copy mentioned above. + unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); + unsigned FPRPHI = IsFGR64onMips32 + ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) + : Fd; + BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); + BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); + + if (IsFGR64onMips32) { + unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) + .addReg(WPHI) + .addImm(1); + BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) + .addReg(FPRPHI) + .addReg(Rtemp2); + } + + MI.eraseFromParent(); + return BB; +} + // Emit the FEXP2_W_1 pseudo instructions. // // fexp2_w_1_pseudo $wd, $wt |