summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp')
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp342
1 files changed, 287 insertions, 55 deletions
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 92d3c00..4be26dd 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -24,11 +24,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -97,11 +97,13 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
// Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
if ((MI.getOpcode() == Mips::ADDiu) &&
(MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).isImm()) &&
(MI.getOperand(2).getImm() == 0)) {
DstReg = MI.getOperand(0).getReg();
ZeroReg = Mips::ZERO;
} else if ((MI.getOpcode() == Mips::DADDiu) &&
(MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).isImm()) &&
(MI.getOperand(2).getImm() == 0)) {
DstReg = MI.getOperand(0).getReg();
ZeroReg = Mips::ZERO_64;
@@ -243,46 +245,64 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
}
}
-void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
- SDValue CmpLHS, const SDLoc &DL,
- SDNode *Node) const {
- unsigned Opc = InFlag.getOpcode(); (void)Opc;
-
- assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
- (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
- "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
-
- unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu;
- if (Subtarget->isGP64bit()) {
- SLTuOp = Mips::SLTu64;
- ADDuOp = Mips::DADDu;
- }
-
- SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const {
+ SDValue InFlag = Node->getOperand(2);
+ unsigned Opc = InFlag.getOpcode();
SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
- SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops);
-
- if (Subtarget->isGP64bit()) {
- // On 64-bit targets, sltu produces an i64 but our backend currently says
- // that SLTu64 produces an i32. We need to fix this in the long run but for
- // now, just make the DAG type-correct by asserting the upper bits are zero.
- Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT,
- CurDAG->getTargetConstant(0, DL, VT),
- SDValue(Carry, 0),
- CurDAG->getTargetConstant(Mips::sub_32, DL,
- VT));
+ // In the base case, we can rely on the carry bit from the addsc
+ // instruction.
+ if (Opc == ISD::ADDC) {
+ SDValue Ops[3] = {LHS, RHS, InFlag};
+ CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops);
+ return;
}
- // Generate a second addition only if we know that RHS is not a
- // constant-zero node.
- SDNode *AddCarry = Carry;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
- if (!C || C->getZExtValue())
- AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);
+ assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!");
+
+ // The more complex case is when there is a chain of ISD::ADDE nodes like:
+ // (adde (adde (adde (addc a b) c) d) e).
+ //
+ // The addwc instruction does not write to the carry bit, instead it writes
+ // to bit 20 of the dsp control register. To match this series of nodes, each
+ // intermediate adde node must be expanded to write the carry bit before the
+ // addition.
+
+ // Start by reading the overflow field for addsc and moving the value to the
+ // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP
+ // corresponds to reading/writing the entire control register to/from a GPR.
+
+ SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32);
+
+ SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32);
+
+ SDNode *DSPCtrlField =
+ CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag);
+
+ SDNode *Carry = CurDAG->getMachineNode(
+ Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne);
- CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0));
+ SDValue Ops[4] = {SDValue(DSPCtrlField, 0),
+ CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne,
+ SDValue(Carry, 0)};
+ SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops);
+
+ // My reading of the the MIPS DSP 3.01 specification isn't as clear as I
+ // would like about whether bit 20 always gets overwritten by addwc.
+ // Hence take an extremely conservative view and presume it's sticky. We
+ // therefore need to clear it.
+
+ SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)};
+ SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps);
+
+ SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue,
+ SDValue(DSPCtrlFinal, 0), CstOne);
+
+ SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)};
+ CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands);
}
/// Match frameindex
@@ -690,7 +710,7 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {
// as the original value.
if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) {
- Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N),
+ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N),
EltTy);
return true;
}
@@ -722,7 +742,7 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
// Extract the run of set bits starting with bit zero, and test that the
// result is the same as the original value
if (ImmValue == (ImmValue & ~(ImmValue + 1))) {
- Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N),
+ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N),
EltTy);
return true;
}
@@ -763,19 +783,8 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
switch(Opcode) {
default: break;
- case ISD::SUBE: {
- SDValue InFlag = Node->getOperand(2);
- unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu;
- selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node);
- return true;
- }
-
case ISD::ADDE: {
- if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
- break;
- SDValue InFlag = Node->getOperand(2);
- unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu;
- selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node);
+ selectAddE(Node, DL);
return true;
}
@@ -932,6 +941,9 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
// same set/ of registers. Similarly, ldi.h isn't capable of producing {
// 0x00000000, 0x00000001, 0x00000000, 0x00000001 } but 'ldi.d wd, 1' can.
+ const MipsABIInfo &ABI =
+ static_cast<const MipsTargetMachine &>(TM).getABI();
+
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Node);
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
@@ -969,13 +981,233 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
break;
}
- if (!SplatValue.isSignedIntN(10))
- return false;
+ SDNode *Res;
- SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
- ViaVecTy.getVectorElementType());
-
- SDNode *Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm);
+ // If we have a signed 10 bit integer, we can splat it directly.
+ //
+ // If we have something bigger we can synthesize the value into a GPR and
+ // splat from there.
+ if (SplatValue.isSignedIntN(10)) {
+ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
+ ViaVecTy.getVectorElementType());
+
+ Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm);
+ } else if (SplatValue.isSignedIntN(16) &&
+ ((ABI.IsO32() && SplatBitSize < 64) ||
+ (ABI.IsN32() || ABI.IsN64()))) {
+ // Only handle signed 16 bit values when the element size is GPR width.
+ // MIPS64 can handle all the cases but MIPS32 would need to handle
+ // negative cases specifically here. Instead, handle those cases as
+ // 64bit values.
+
+ bool Is32BitSplat = ABI.IsO32() || SplatBitSize < 64;
+ const unsigned ADDiuOp = Is32BitSplat ? Mips::ADDiu : Mips::DADDiu;
+ const MVT SplatMVT = Is32BitSplat ? MVT::i32 : MVT::i64;
+ SDValue ZeroVal = CurDAG->getRegister(
+ Is32BitSplat ? Mips::ZERO : Mips::ZERO_64, SplatMVT);
+
+ const unsigned FILLOp =
+ SplatBitSize == 16
+ ? Mips::FILL_H
+ : (SplatBitSize == 32 ? Mips::FILL_W
+ : (SplatBitSize == 64 ? Mips::FILL_D : 0));
+
+ assert(FILLOp != 0 && "Unknown FILL Op for splat synthesis!");
+ assert((!ABI.IsO32() || (FILLOp != Mips::FILL_D)) &&
+ "Attempting to use fill.d on MIPS32!");
+
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, SplatMVT);
+
+ Res = CurDAG->getMachineNode(ADDiuOp, DL, SplatMVT, ZeroVal, LoVal);
+ Res = CurDAG->getMachineNode(FILLOp, DL, ViaVecTy, SDValue(Res, 0));
+
+ } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 32) {
+ // Only handle the cases where the splat size agrees with the size
+ // of the SplatValue here.
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue();
+ SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32);
+ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32);
+
+ if (Hi)
+ Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal);
+
+ if (Lo)
+ Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
+
+ assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!");
+ Res = CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32, SDValue(Res, 0));
+
+ } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 64 &&
+ (ABI.IsN32() || ABI.IsN64())) {
+ // N32 and N64 can perform some tricks that O32 can't for signed 32 bit
+ // integers due to having 64bit registers. lui will cause the necessary
+ // zero/sign extension.
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue();
+ SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32);
+ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32);
+
+ if (Hi)
+ Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal);
+
+ if (Lo)
+ Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
+
+ Res = CurDAG->getMachineNode(
+ Mips::SUBREG_TO_REG, DL, MVT::i64,
+ CurDAG->getTargetConstant(((Hi >> 15) & 0x1), DL, MVT::i64),
+ SDValue(Res, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64));
+
+ Res =
+ CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64, SDValue(Res, 0));
+
+ } else if (SplatValue.isSignedIntN(64)) {
+ // If we have a 64 bit Splat value, we perform a similar sequence to the
+ // above:
+ //
+ // MIPS32: MIPS64:
+ // lui $res, %highest(val) lui $res, %highest(val)
+ // ori $res, $res, %higher(val) ori $res, $res, %higher(val)
+ // lui $res2, %hi(val) lui $res2, %hi(val)
+ // ori $res2, %res2, %lo(val) ori $res2, %res2, %lo(val)
+ // $res3 = fill $res2 dinsu $res, $res2, 0, 32
+ // $res4 = insert.w $res3[1], $res fill.d $res
+ // splat.d $res4, 0
+ //
+ // The ability to use dinsu is guaranteed as MSA requires MIPSR5. This saves
+ // having to materialize the value by shifts and ors.
+ //
+ // FIXME: Implement the preferred sequence for MIPS64R6:
+ //
+ // MIPS64R6:
+ // ori $res, $zero, %lo(val)
+ // daui $res, $res, %hi(val)
+ // dahi $res, $res, %higher(val)
+ // dati $res, $res, %highest(cal)
+ // fill.d $res
+ //
+
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue();
+ const unsigned Higher = SplatValue.lshr(32).getLoBits(16).getZExtValue();
+ const unsigned Highest = SplatValue.lshr(48).getLoBits(16).getZExtValue();
+
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32);
+ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32);
+ SDValue HigherVal = CurDAG->getTargetConstant(Higher, DL, MVT::i32);
+ SDValue HighestVal = CurDAG->getTargetConstant(Highest, DL, MVT::i32);
+ SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ // Independent of whether we're targeting MIPS64 or not, the basic
+ // operations are the same. Also, directly use the $zero register if
+ // the 16 bit chunk is zero.
+ //
+ // For optimization purposes we always synthesize the splat value as
+ // an i32 value, then if we're targetting MIPS64, use SUBREG_TO_REG
+ // just before combining the values with dinsu to produce an i64. This
+ // enables SelectionDAG to aggressively share components of splat values
+ // where possible.
+ //
+ // FIXME: This is the general constant synthesis problem. This code
+ // should be factored out into a class shared between all the
+ // classes that need it. Specifically, for a splat size of 64
+ // bits that's a negative number we can do better than LUi/ORi
+ // for the upper 32bits.
+
+ if (Hi)
+ Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal);
+
+ if (Lo)
+ Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
+
+ SDNode *HiRes;
+ if (Highest)
+ HiRes = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HighestVal);
+
+ if (Higher)
+ HiRes = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Highest ? SDValue(HiRes, 0) : ZeroVal,
+ HigherVal);
+
+
+ if (ABI.IsO32()) {
+ Res = CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32,
+ (Hi || Lo) ? SDValue(Res, 0) : ZeroVal);
+
+ Res = CurDAG->getMachineNode(
+ Mips::INSERT_W, DL, MVT::v4i32, SDValue(Res, 0),
+ (Highest || Higher) ? SDValue(HiRes, 0) : ZeroVal,
+ CurDAG->getTargetConstant(1, DL, MVT::i32));
+
+ const TargetLowering *TLI = getTargetLowering();
+ const TargetRegisterClass *RC =
+ TLI->getRegClassFor(ViaVecTy.getSimpleVT());
+
+ Res = CurDAG->getMachineNode(
+ Mips::COPY_TO_REGCLASS, DL, ViaVecTy, SDValue(Res, 0),
+ CurDAG->getTargetConstant(RC->getID(), DL, MVT::i32));
+
+ Res = CurDAG->getMachineNode(
+ Mips::SPLATI_D, DL, MVT::v2i64, SDValue(Res, 0),
+ CurDAG->getTargetConstant(0, DL, MVT::i32));
+ } else if (ABI.IsN64() || ABI.IsN32()) {
+
+ SDValue Zero64Val = CurDAG->getRegister(Mips::ZERO_64, MVT::i64);
+ const bool HiResNonZero = Highest || Higher;
+ const bool ResNonZero = Hi || Lo;
+
+ if (HiResNonZero)
+ HiRes = CurDAG->getMachineNode(
+ Mips::SUBREG_TO_REG, DL, MVT::i64,
+ CurDAG->getTargetConstant(((Highest >> 15) & 0x1), DL, MVT::i64),
+ SDValue(HiRes, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64));
+
+ if (ResNonZero)
+ Res = CurDAG->getMachineNode(
+ Mips::SUBREG_TO_REG, DL, MVT::i64,
+ CurDAG->getTargetConstant(((Hi >> 15) & 0x1), DL, MVT::i64),
+ SDValue(Res, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64));
+
+ // We have 3 cases:
+ // The HiRes is nonzero but Res is $zero => dsll32 HiRes, 0
+ // The Res is nonzero but HiRes is $zero => dinsu Res, $zero, 32, 32
+ // Both are non zero => dinsu Res, HiRes, 32, 32
+ //
+ // The obvious "missing" case is when both are zero, but that case is
+ // handled by the ldi case.
+ if (ResNonZero) {
+ SDValue Ops[4] = {HiResNonZero ? SDValue(HiRes, 0) : Zero64Val,
+ CurDAG->getTargetConstant(64, DL, MVT::i32),
+ CurDAG->getTargetConstant(32, DL, MVT::i32),
+ SDValue(Res, 0)};
+
+ Res = CurDAG->getMachineNode(Mips::DINSU, DL, MVT::i64, Ops);
+ } else if (HiResNonZero) {
+ Res = CurDAG->getMachineNode(
+ Mips::DSLL32, DL, MVT::i64, SDValue(HiRes, 0),
+ CurDAG->getTargetConstant(0, DL, MVT::i32));
+ } else
+ llvm_unreachable(
+ "Zero splat value handled by non-zero 64bit splat synthesis!");
+
+ Res = CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64, SDValue(Res, 0));
+ } else
+ llvm_unreachable("Unknown ABI in MipsISelDAGToDAG!");
+
+ } else
+ return false;
if (ResVecTy != ViaVecTy) {
// If LdiOp is writing to a different register class to ResVecTy, then
OpenPOWER on IntegriCloud