diff options
Diffstat (limited to 'contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td')
-rw-r--r-- | contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td | 584 |
1 files changed, 301 insertions, 283 deletions
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td index ad81287..804a547 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -1,13 +1,14 @@ +//==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + // Pattern fragment that combines the value type and the register class // into a single parameter. -// The pat frags in the definitions below need to have a named register, -// otherwise i32 will be assumed regardless of the register class. The -// name of the register does not matter. -def I1 : PatLeaf<(i1 PredRegs:$R)>; -def I32 : PatLeaf<(i32 IntRegs:$R)>; -def I64 : PatLeaf<(i64 DoubleRegs:$R)>; -def F32 : PatLeaf<(f32 IntRegs:$R)>; -def F64 : PatLeaf<(f64 DoubleRegs:$R)>; // Pattern fragments to extract the low and high subregisters from a // 64-bit value. @@ -17,6 +18,16 @@ def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>; def IsOrAdd: PatFrag<(ops node:$Addr, node:$off), (or node:$Addr, node:$off), [{ return isOrEquivalentToAdd(N); }]>; +def Iss4_6 : PatLeaf<(i32 imm), [{ + int32_t V = N->getSExtValue(); + return isShiftedInt<4,6>(V); +}]>; + +def Iss4_7 : PatLeaf<(i32 imm), [{ + int32_t V = N->getSExtValue(); + return isShiftedInt<4,7>(V); +}]>; + def IsPow2_32 : PatLeaf<(i32 imm), [{ uint32_t V = N->getZExtValue(); return isPowerOf2_32(V); @@ -89,6 +100,11 @@ def LogN2_64 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(Log2_64(NV), SDLoc(N), MVT::i32); }]>; +def ToZext64: OutPatFrag<(ops node:$Rs), + (i64 (A4_combineir 0, (i32 $Rs)))>; +def ToSext64: OutPatFrag<(ops node:$Rs), + (i64 (A2_sxtw (i32 $Rs)))>; + class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred> : Pat<(i1 (OpNode I32:$src1, ImmPred:$src2)), @@ -153,8 +169,12 @@ def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs), def: Pat<(not I32:$src1), (A2_subri -1, IntRegs:$src1)>; +def TruncI64ToI32: SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); +}]>; + def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>; -def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi imm:$s8)>; +def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi (TruncI64ToI32 $s8))>; def : Pat<(select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs), (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>; @@ -274,7 +294,7 @@ def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; -def: Pat<(br bb:$dst), (J2_jump brtarget:$dst)>; +def: Pat<(br bb:$dst), (J2_jump b30_2Imm:$dst)>; def: Pat<(brcond I1:$src1, bb:$block), (J2_jumpt PredRegs:$src1, bb:$block)>; def: Pat<(brind I32:$dst), (J2_jumpr IntRegs:$dst)>; @@ -334,7 +354,7 @@ def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>; def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1), (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; -def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), +def: Pat<(add (add IntRegs:$src2, s32_0ImmPred:$src3), IntRegs:$src1), (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>; def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1), (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; @@ -371,48 +391,47 @@ def: T_MType_acc_pat3 <M4_or_andn, and, or>; def: T_MType_acc_pat3 <M4_and_andn, and, and>; def: T_MType_acc_pat3 <M4_xor_andn, and, xor>; +// This complex pattern is really only to detect various forms of +// sign-extension i32->i64. The selected value will be of type i64 +// whose low word is the value being extended. The high word is +// unspecified. +def Usxtw : ComplexPattern<i64, 1, "DetectUseSxtw", [], []>; + def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; -def Sext64: PatFrag<(ops node:$Rs), (i64 (sext node:$Rs))>; def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; +def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; -// Return true if for a 32 to 64-bit sign-extended load. -def Sext64Ld : PatLeaf<(i64 DoubleRegs:$src1), [{ - LoadSDNode *LD = dyn_cast<LoadSDNode>(N); - if (!LD) - return false; - return LD->getExtensionType() == ISD::SEXTLOAD && - LD->getMemoryVT().getScalarType() == MVT::i32; -}]>; - -def: Pat<(mul (Aext64 I32:$src1), (Aext64 I32:$src2)), - (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(i32 (trunc (sra (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), + (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; +def: Pat<(i32 (trunc (srl (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), + (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(mul (Sext64 I32:$src1), (Sext64 I32:$src2)), - (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)), + (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>; -def: Pat<(mul Sext64Ld:$src1, Sext64Ld:$src2), - (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>; +def: Pat<(mul Sext64:$Rs, Sext64:$Rt), + (M2_dpmpyss_s0 (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; // Multiply and accumulate, use full result. // Rxx[+-]=mpy(Rs,Rt) -def: Pat<(add I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))), - (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(add I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)), + (M2_dpmpyss_acc_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(sub I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))), - (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(sub I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)), + (M2_dpmpyss_nac_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(add I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))), - (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(add I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))), + (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; -def: Pat<(add I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))), - (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(add I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))), + (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; -def: Pat<(sub I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))), - (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(sub I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))), + (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; -def: Pat<(sub I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))), - (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(sub I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))), + (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, InstHexagon MI> @@ -534,7 +553,8 @@ def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>; def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>; def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>; -def: Pat <(Sext64 I32:$src), (A2_sxtw I32:$src)>; +def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>; +def: Pat <(i64 (sext_inreg I64:$src, i32)), (A2_sxtw (LoReg I64:$src))>; def: Pat<(select (i1 (setlt I32:$src, 0)), (sub 0, I32:$src), I32:$src), (A2_abs IntRegs:$src)>; @@ -668,6 +688,8 @@ def I32toI1: OutPatFrag<(ops node:$Rs), defm: Storexm_pat<store, I1, s32_0ImmPred, I1toI32, S2_storerb_io>; def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>; +def: Pat<(sra (add (sra I64:$src, u6_0ImmPred:$u6), 1), (i32 1)), + (S2_asr_i_p_rnd DoubleRegs:$src, imm:$u6)>, Requires<[HasV5T]>; def: Pat<(sra I64:$src, u6_0ImmPred:$u6), (S2_asr_i_p DoubleRegs:$src, imm:$u6)>; def: Pat<(srl I64:$src, u6_0ImmPred:$u6), @@ -695,15 +717,16 @@ def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; // Map TLS addressses to A2_tfrsi. -def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>; -def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16_0Ext:$label)>; +def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s32_0Imm:$addr)>; +def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s32_0Imm:$label)>; def: Pat<(i64 imm:$v), (CONST64 imm:$v)>; def: Pat<(i1 0), (PS_false)>; def: Pat<(i1 1), (PS_true)>; // Pseudo instructions. -def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -721,8 +744,8 @@ def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def: Pat<(callseq_start timm:$amt), - (ADJCALLSTACKDOWN imm:$amt)>; +def: Pat<(callseq_start timm:$amt, timm:$amt2), + (ADJCALLSTACKDOWN imm:$amt, imm:$amt2)>; def: Pat<(callseq_end timm:$amt1, timm:$amt2), (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>; @@ -779,27 +802,19 @@ def: Pat<(i64 (sext_inreg I64:$src1, i16)), def: Pat<(i64 (sext_inreg I64:$src1, i8)), (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; -// We want to prevent emitting pnot's as much as possible. -// Map brcond with an unsupported setcc to a J2_jumpf. -def : Pat <(brcond (i1 (setne I32:$src1, I32:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeq I32:$src1, I32:$src2), - bb:$offset)>; - -def : Pat <(brcond (i1 (setne I32:$src1, s10_0ImmPred:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeqi I32:$src1, s10_0ImmPred:$src2), bb:$offset)>; - -def: Pat<(brcond (i1 (setne I1:$src1, (i1 -1))), bb:$offset), - (J2_jumpf PredRegs:$src1, bb:$offset)>; - -def: Pat<(brcond (i1 (setne I1:$src1, (i1 0))), bb:$offset), - (J2_jumpt PredRegs:$src1, bb:$offset)>; +def: Pat<(brcond (i1 (setne I32:$Rs, I32:$Rt)), bb:$offset), + (J2_jumpf (C2_cmpeq I32:$Rs, I32:$Rt), bb:$offset)>; +def: Pat<(brcond (i1 (setne I32:$Rs, s10_0ImmPred:$s10)), bb:$offset), + (J2_jumpf (C2_cmpeqi I32:$Rs, imm:$s10), bb:$offset)>; +def: Pat<(brcond (i1 (setne I1:$Pu, (i1 -1))), bb:$offset), + (J2_jumpf PredRegs:$Pu, bb:$offset)>; +def: Pat<(brcond (i1 (setne I1:$Pu, (i1 0))), bb:$offset), + (J2_jumpt PredRegs:$Pu, bb:$offset)>; // cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) -def: Pat<(brcond (i1 (setlt I32:$src1, s8_0ImmPred:$src2)), bb:$offset), - (J2_jumpf (C2_cmpgti IntRegs:$src1, (SDEC1 s8_0ImmPred:$src2)), - bb:$offset)>; +def: Pat<(brcond (i1 (setlt I32:$Rs, s8_0ImmPred:$s8)), bb:$offset), + (J2_jumpf (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s8)), bb:$offset)>; + // Map from a 64-bit select to an emulated 64-bit mux. // Hexagon does not support 64-bit MUXes; so emulate with combines. @@ -853,15 +868,13 @@ def: Pat<(i1 (setne I1:$src1, I1:$src2)), def: Pat<(i1 (setne I64:$src1, I64:$src2)), (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; -// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt). -// rs >= rt -> !(rt > rs). -def : Pat <(i1 (setge I32:$src1, I32:$src2)), - (i1 (C2_not (i1 (C2_cmpgt I32:$src2, I32:$src1))))>; +// rs >= rt -> rt <= rs +def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), + (C4_cmplte I32:$Rt, I32:$Rs)>; -// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) let AddedComplexity = 30 in -def: Pat<(i1 (setge I32:$src1, s32_0ImmPred:$src2)), - (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>; +def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)), + (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s10))>; // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). @@ -898,26 +911,35 @@ def: Pat<(i1 (setule I64:$src1, I64:$src2)), (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>; // Sign extends. -// i1 -> i32 -def: Pat<(i32 (sext I1:$src1)), - (C2_muxii PredRegs:$src1, -1, 0)>; +// sext i1->i32 +def: Pat<(i32 (sext I1:$Pu)), + (C2_muxii I1:$Pu, -1, 0)>; -// i1 -> i64 -def: Pat<(i64 (sext I1:$src1)), - (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>; +// sext i1->i64 +def: Pat<(i64 (sext I1:$Pu)), + (A2_combinew (C2_muxii PredRegs:$Pu, -1, 0), + (C2_muxii PredRegs:$Pu, -1, 0))>; // Zero extends. -// i1 -> i32 -def: Pat<(i32 (zext I1:$src1)), - (C2_muxii PredRegs:$src1, 1, 0)>; +// zext i1->i32 +def: Pat<(i32 (zext I1:$Pu)), + (C2_muxii PredRegs:$Pu, 1, 0)>; + +// zext i1->i64 +def: Pat<(i64 (zext I1:$Pu)), + (ToZext64 (C2_muxii PredRegs:$Pu, 1, 0))>; + +// zext i32->i64 +def: Pat<(Zext64 I32:$Rs), + (ToZext64 IntRegs:$Rs)>; // Map from Rs = Pd to Pd = mux(Pd, #1, #0) -def: Pat<(i32 (anyext I1:$src1)), - (C2_muxii PredRegs:$src1, 1, 0)>; +def: Pat<(i32 (anyext I1:$Pu)), + (C2_muxii PredRegs:$Pu, 1, 0)>; -// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) -def: Pat<(i64 (anyext I1:$src1)), - (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>; +// Map from Rss = Pd to Rdd = combine(#0, (mux(Pd, #1, #0))) +def: Pat<(i64 (anyext I1:$Pu)), + (ToZext64 (C2_muxii PredRegs:$Pu, 1, 0))>; // Clear the sign bit in a 64-bit register. def ClearSign : OutPatFrag<(ops node:$Rss), @@ -1138,8 +1160,8 @@ multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> { defm: T_MinMax_pats<Op, I64, Inst, SwapInst>; } -def: Pat<(add (Sext64 I32:$Rs), I64:$Rt), - (A2_addsp IntRegs:$Rs, DoubleRegs:$Rt)>; +def: Pat<(add Sext64:$Rs, I64:$Rt), + (A2_addsp (LoReg Sext64:$Rs), DoubleRegs:$Rt)>; let AddedComplexity = 200 in { defm: MinMax_pats_p<setge, A2_maxp, A2_minp>; @@ -1244,11 +1266,6 @@ def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8), } -def ToZext64: OutPatFrag<(ops node:$Rs), - (i64 (A4_combineir 0, (i32 $Rs)))>; -def ToSext64: OutPatFrag<(ops node:$Rs), - (i64 (A2_sxtw (i32 $Rs)))>; - // Patterns to generate indexed loads with different forms of the address: // - frameindex, // - base + offset, @@ -1349,14 +1366,6 @@ let AddedComplexity = 20 in { def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>; } -// zext i1->i64 -def: Pat<(i64 (zext I1:$src1)), - (ToZext64 (C2_muxii PredRegs:$src1, 1, 0))>; - -// zext i32->i64 -def: Pat<(Zext64 I32:$src1), - (ToZext64 IntRegs:$src1)>; - let AddedComplexity = 40 in multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT, PatFrag stOp> { @@ -1466,16 +1475,22 @@ def i32in8ImmPred: PatLeaf<(i32 imm), [{ return v == (int64_t)(int8_t)v; }]>; +class SmallStackStore<PatFrag Store> + : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{ + return isSmallStackStore(cast<StoreSDNode>(N)); +}]>; let AddedComplexity = 40 in { // Even though the offset is not extendable in the store-immediate, we // can still generate the fi# in the base address. If the final offset // is not valid for the instruction, we will replace it with a scratch // register. -// def: Storexm_fi_pat <truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>; -// def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf, -// S4_storeirh_io>; -// def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>; + def: Storexm_fi_pat <SmallStackStore<truncstorei8>, s32_0ImmPred, + ToImmByte, S4_storeirb_io>; + def: Storexm_fi_pat <SmallStackStore<truncstorei16>, i16in8ImmPred, + ToImmHalf, S4_storeirh_io>; + def: Storexm_fi_pat <SmallStackStore<store>, i32in8ImmPred, + ToImmWord, S4_storeiri_io>; // defm: Storexm_fi_add_pat <truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte, // S4_storeirb_io>; @@ -1587,6 +1602,15 @@ def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>; def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>; def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>; +def: Pat<(i64 (ctpop I64:$Rss)), (ToZext64 (S5_popcountp I64:$Rss))>; +def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>; + +def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>; +def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>; + +def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>; +def: Pat<(bswap I64:$Rss), (A2_combinew (A2_swiz (LoReg $Rss)), + (A2_swiz (HiReg $Rss)))>; let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), @@ -1622,9 +1646,14 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6), (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; +def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), + (HexagonCONST32 tglobaladdr:$global)), + (M4_mpyri_addi tglobaladdr:$global, IntRegs:$Rs, imm:$U6)>; def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6), (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; - +def: Pat<(add (mul I32:$Rs, I32:$Rt), + (HexagonCONST32 tglobaladdr:$global)), + (M4_mpyrr_addi tglobaladdr:$global, IntRegs:$Rs, IntRegs:$Rt)>; def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)), (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>; def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)), @@ -2117,6 +2146,11 @@ let AddedComplexity = 30 in { def: Storea_pat<truncstorei8, I32, u32_0ImmPred, PS_storerbabs>; def: Storea_pat<truncstorei16, I32, u32_0ImmPred, PS_storerhabs>; def: Storea_pat<store, I32, u32_0ImmPred, PS_storeriabs>; + def: Storea_pat<store, I64, u32_0ImmPred, PS_storerdabs>; + + def: Stoream_pat<truncstorei8, I64, u32_0ImmPred, LoReg, PS_storerbabs>; + def: Stoream_pat<truncstorei16, I64, u32_0ImmPred, LoReg, PS_storerhabs>; + def: Stoream_pat<truncstorei32, I64, u32_0ImmPred, LoReg, PS_storeriabs>; } let AddedComplexity = 30 in { @@ -2125,6 +2159,19 @@ let AddedComplexity = 30 in { def: Loada_pat<zextloadi8, i32, u32_0ImmPred, PS_loadrubabs>; def: Loada_pat<sextloadi16, i32, u32_0ImmPred, PS_loadrhabs>; def: Loada_pat<zextloadi16, i32, u32_0ImmPred, PS_loadruhabs>; + def: Loada_pat<load, i64, u32_0ImmPred, PS_loadrdabs>; + + def: Loadam_pat<extloadi8, i64, u32_0ImmPred, ToZext64, PS_loadrubabs>; + def: Loadam_pat<sextloadi8, i64, u32_0ImmPred, ToSext64, PS_loadrbabs>; + def: Loadam_pat<zextloadi8, i64, u32_0ImmPred, ToZext64, PS_loadrubabs>; + + def: Loadam_pat<extloadi16, i64, u32_0ImmPred, ToZext64, PS_loadruhabs>; + def: Loadam_pat<sextloadi16, i64, u32_0ImmPred, ToSext64, PS_loadrhabs>; + def: Loadam_pat<zextloadi16, i64, u32_0ImmPred, ToZext64, PS_loadruhabs>; + + def: Loadam_pat<extloadi32, i64, u32_0ImmPred, ToZext64, PS_loadriabs>; + def: Loadam_pat<sextloadi32, i64, u32_0ImmPred, ToSext64, PS_loadriabs>; + def: Loadam_pat<zextloadi32, i64, u32_0ImmPred, ToZext64, PS_loadriabs>; } // Indexed store word - global address. @@ -2203,6 +2250,12 @@ def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, PS_storerhabs>; def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, PS_storeriabs>; def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, PS_storerdabs>; +// Prefer this pattern to S2_asl_i_p_or for the special case of joining +// two 32-bit words into a 64-bit word. +let AddedComplexity = 200 in +def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)), + (A2_combinew I32:$a, I32:$b)>; + def: Pat<(or (or (or (shl (i64 (zext (and I32:$b, (i32 65535)))), (i32 16)), (i64 (zext (i32 (and I32:$a, (i32 65535)))))), (shl (i64 (anyext (and I32:$c, (i32 65535)))), (i32 32))), @@ -2235,12 +2288,6 @@ def ftoi : SDNodeXForm<fpimm, [{ def: Pat<(sra (i64 (add (sra I64:$src1, u6_0ImmPred:$src2), 1)), (i32 1)), (S2_asr_i_p_rnd I64:$src1, imm:$src2)>; -def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, i64>]>; -def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>; - -def: Pat<(HexagonPOPCOUNT I64:$Rss), (S5_popcountp I64:$Rss)>; - let AddedComplexity = 20 in { defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>; defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>; @@ -2701,6 +2748,15 @@ def: Pat<(fneg F64:$Rs), (S2_togglebit_i (HiReg $Rs), 31), isub_hi, (i32 (LoReg $Rs)), isub_lo)>; +def: Pat<(mul I64:$Rss, I64:$Rtt), + (A2_combinew + (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))), + (LoReg $Rss), + (HiReg $Rtt)), + (LoReg $Rtt), + (HiReg $Rss)), + (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))))>; + def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ return isAlignedMemNode(dyn_cast<MemSDNode>(N)); }]>; @@ -2718,19 +2774,11 @@ def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [ }]>; -def s4_6ImmPred: PatLeaf<(i32 imm), [{ - int64_t V = N->getSExtValue(); - return isShiftedInt<4,6>(V); -}]>; - -def s4_7ImmPred: PatLeaf<(i32 imm), [{ - int64_t V = N->getSExtValue(); - return isShiftedInt<4,7>(V); -}]>; - - multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { // Aligned stores + def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32b_nt_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, Requires<[UseHVXSgl]>; @@ -2739,6 +2787,9 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { Requires<[UseHVXSgl]>; // 128B Aligned stores + def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32b_nt_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, Requires<[UseHVXDbl]>; @@ -2748,26 +2799,36 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { // Fold Add R+OFF into vector store. let AddedComplexity = 10 in { + def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, Iss4_6:$offset)), + (V6_vS32b_nt_ai IntRegs:$src2, Iss4_6:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VectorRegs:$src1), - (add IntRegs:$src2, s4_6ImmPred:$offset)), - (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, + (add IntRegs:$src2, Iss4_6:$offset)), + (V6_vS32b_ai IntRegs:$src2, Iss4_6:$offset, (VTSgl VectorRegs:$src1))>, Requires<[UseHVXSgl]>; def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), - (add IntRegs:$src2, s4_6ImmPred:$offset)), - (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset, + (add IntRegs:$src2, Iss4_6:$offset)), + (V6_vS32Ub_ai IntRegs:$src2, Iss4_6:$offset, (VTSgl VectorRegs:$src1))>, Requires<[UseHVXSgl]>; // Fold Add R+OFF into vector store 128B. + def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, Iss4_7:$offset)), + (V6_vS32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), - (add IntRegs:$src2, s4_7ImmPred:$offset)), - (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (add IntRegs:$src2, Iss4_7:$offset)), + (V6_vS32b_ai_128B IntRegs:$src2, Iss4_7:$offset, (VTDbl VectorRegs128B:$src1))>, Requires<[UseHVXDbl]>; def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), - (add IntRegs:$src2, s4_7ImmPred:$offset)), - (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (add IntRegs:$src2, Iss4_7:$offset)), + (V6_vS32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset, (VTDbl VectorRegs128B:$src1))>, Requires<[UseHVXDbl]>; } @@ -2781,6 +2842,9 @@ defm : vS32b_ai_pats <v8i64, v16i64>; multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { // Aligned loads + def : Pat < (VTSgl (alignednontemporalload IntRegs:$addr)), + (V6_vL32b_nt_ai IntRegs:$addr, 0) >, + Requires<[UseHVXSgl]>; def : Pat < (VTSgl (alignedload IntRegs:$addr)), (V6_vL32b_ai IntRegs:$addr, 0) >, Requires<[UseHVXSgl]>; @@ -2789,6 +2853,9 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { Requires<[UseHVXSgl]>; // 128B Load + def : Pat < (VTDbl (alignednontemporalload IntRegs:$addr)), + (V6_vL32b_nt_ai_128B IntRegs:$addr, 0) >, + Requires<[UseHVXDbl]>; def : Pat < (VTDbl (alignedload IntRegs:$addr)), (V6_vL32b_ai_128B IntRegs:$addr, 0) >, Requires<[UseHVXDbl]>; @@ -2798,18 +2865,24 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { // Fold Add R+OFF into vector load. let AddedComplexity = 10 in { - def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), - (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + def : Pat<(VTDbl (alignednontemporalload (add IntRegs:$src2, Iss4_7:$offset))), + (V6_vL32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset)>, + Requires<[UseHVXDbl]>; + def : Pat<(VTDbl (alignedload (add IntRegs:$src2, Iss4_7:$offset))), + (V6_vL32b_ai_128B IntRegs:$src2, Iss4_7:$offset)>, Requires<[UseHVXDbl]>; - def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))), - (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, Iss4_7:$offset))), + (V6_vL32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset)>, Requires<[UseHVXDbl]>; - def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), - (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + def : Pat<(VTSgl (alignednontemporalload (add IntRegs:$src2, Iss4_6:$offset))), + (V6_vL32b_nt_ai IntRegs:$src2, Iss4_6:$offset)>, + Requires<[UseHVXSgl]>; + def : Pat<(VTSgl (alignedload (add IntRegs:$src2, Iss4_6:$offset))), + (V6_vL32b_ai IntRegs:$src2, Iss4_6:$offset)>, Requires<[UseHVXSgl]>; - def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))), - (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, Iss4_6:$offset))), + (V6_vL32Ub_ai IntRegs:$src2, Iss4_6:$offset)>, Requires<[UseHVXSgl]>; } } @@ -2820,6 +2893,9 @@ defm : vL32b_ai_pats <v16i32, v32i32>; defm : vL32b_ai_pats <v8i64, v16i64>; multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> { + def : Pat<(alignednontemporalstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), + (PS_vstorerw_nt_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, Requires<[UseHVXSgl]>; @@ -2827,6 +2903,10 @@ multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> { (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, Requires<[UseHVXSgl]>; + def : Pat<(alignednontemporalstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), + (PS_vstorerw_nt_ai_128B IntRegs:$addr, 0, + (VTDbl VecDblRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), (PS_vstorerw_ai_128B IntRegs:$addr, 0, (VTDbl VecDblRegs128B:$src1))>, @@ -2843,6 +2923,9 @@ defm : STrivv_pats <v32i32, v64i32>; defm : STrivv_pats <v16i64, v32i64>; multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> { + def : Pat<(VTSgl (alignednontemporalload I32:$addr)), + (PS_vloadrw_nt_ai I32:$addr, 0)>, + Requires<[UseHVXSgl]>; def : Pat<(VTSgl (alignedload I32:$addr)), (PS_vloadrw_ai I32:$addr, 0)>, Requires<[UseHVXSgl]>; @@ -2850,6 +2933,9 @@ multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> { (PS_vloadrwu_ai I32:$addr, 0)>, Requires<[UseHVXSgl]>; + def : Pat<(VTDbl (alignednontemporalload I32:$addr)), + (PS_vloadrw_nt_ai_128B I32:$addr, 0)>, + Requires<[UseHVXDbl]>; def : Pat<(VTDbl (alignedload I32:$addr)), (PS_vloadrw_ai_128B I32:$addr, 0)>, Requires<[UseHVXDbl]>; @@ -2891,45 +2977,40 @@ def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, Requires<[UseHVXDbl]>; -def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>, - SDTCisInt<3>]>; - -def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>; - -// 0 as the last argument denotes vpacke. 1 denotes vpacko -def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), - (v64i8 VectorRegs:$Vt), (i32 0))), - (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs), - (v64i8 VectorRegs:$Vt), (i32 1))), - (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), - (v32i16 VectorRegs:$Vt), (i32 0))), - (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; -def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs), - (v32i16 VectorRegs:$Vt), (i32 1))), - (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>, - Requires<[UseHVXSgl]>; - -def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), - (v128i8 VecDblRegs:$Vt), (i32 0))), - (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs), - (v128i8 VecDblRegs:$Vt), (i32 1))), - (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), - (v64i16 VecDblRegs:$Vt), (i32 0))), - (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; -def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs), - (v64i16 VecDblRegs:$Vt), (i32 1))), - (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, - Requires<[UseHVXDbl]>; +def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>; + +def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>; +def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>; + +let Predicates = [UseHVXSgl] in { + def: Pat<(v64i8 (HexagonVPACKE (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt))), + (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>; + def: Pat<(v64i8 (HexagonVPACKO (v64i8 VectorRegs:$Vs), + (v64i8 VectorRegs:$Vt))), + (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>; + def: Pat<(v32i16 (HexagonVPACKE (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt))), + (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>; + def: Pat<(v32i16 (HexagonVPACKO (v32i16 VectorRegs:$Vs), + (v32i16 VectorRegs:$Vt))), + (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>; +} + +let Predicates = [UseHVXDbl] in { + def: Pat<(v128i8 (HexagonVPACKE (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt))), + (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; + def: Pat<(v128i8 (HexagonVPACKO (v128i8 VecDblRegs:$Vs), + (v128i8 VecDblRegs:$Vt))), + (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; + def: Pat<(v64i16 (HexagonVPACKE (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt))), + (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; + def: Pat<(v64i16 (HexagonVPACKO (v64i16 VecDblRegs:$Vs), + (v64i16 VecDblRegs:$Vt))), + (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>; +} def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; @@ -2982,16 +3063,20 @@ def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; -def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; -def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; +def SDTHexagonVSPLAT: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; +def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>; // Replicate the low 8-bits from 32-bits input register into each of the // four bytes of 32-bits destination register. -def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; +def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>; // Replicate the low 16-bits from 32-bits input register into each of the // four halfwords of 64-bits destination register. -def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; +def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>; + +def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)), + (A2_combineii imm:$s8, imm:$s8)>; +def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (A2_combinew I32:$Rs, I32:$Rs)>; class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type> @@ -3019,94 +3104,51 @@ def: VArith_pat <A2_xorp, xor, V8I8>; def: VArith_pat <A2_xorp, xor, V4I16>; def: VArith_pat <A2_xorp, xor, V2I32>; -def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), +def: Pat<(v2i32 (sra V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), (S2_asr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), +def: Pat<(v2i32 (srl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), (S2_lsr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), - (i32 u5_0ImmPred:$c))))), +def: Pat<(v2i32 (shl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), (S2_asl_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_asr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_lsr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_asl_i_vh V4I16:$b, imm:$c)>; -def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; -def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; +def SDTHexagonVShift + : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>; -def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>; +def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>; +def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>; -def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)), +def: Pat<(v2i32 (HexagonVASL V2I32:$Rs, u5_0ImmPred:$u5)), + (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVASL V4I16:$Rs, u4_0ImmPred:$u4)), + (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVASR V2I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)), +def: Pat<(v4i16 (HexagonVASR V4I16:$Rs, u4_0ImmPred:$u4)), (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)), +def: Pat<(v2i32 (HexagonVLSR V2I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)), +def: Pat<(v4i16 (HexagonVLSR V4I16:$Rs, u4_0ImmPred:$u4)), (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value> : Pat <(Op Value:$Rs, I32:$Rt), (MI Value:$Rs, I32:$Rt)>; -def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>; -def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>; -def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>; -def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>; -def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>; -def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>; - - -def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; -def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; -def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; - -def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; - - -class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value> - : Pat <(i1 (Op Value:$Rs, Value:$Rt)), - (MI Value:$Rs, Value:$Rt)>; - -def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>; -def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>; -def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>; - -def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>; -def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>; -def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>; - -def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>; -def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>; -def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>; +def: vshift_rr_pat <S2_asl_r_vw, HexagonVASL, V2I32>; +def: vshift_rr_pat <S2_asl_r_vh, HexagonVASL, V4I16>; +def: vshift_rr_pat <S2_asr_r_vw, HexagonVASR, V2I32>; +def: vshift_rr_pat <S2_asr_r_vh, HexagonVASR, V4I16>; +def: vshift_rr_pat <S2_lsr_r_vw, HexagonVLSR, V2I32>; +def: vshift_rr_pat <S2_lsr_r_vh, HexagonVLSR, V4I16>; class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy> @@ -3216,13 +3258,6 @@ def: Pat<(v4i8 (trunc V4I16:$Rs)), def: Pat<(v2i16 (trunc V2I32:$Rs)), (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; - -def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; -def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; - -def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; -def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; - def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; @@ -3253,8 +3288,8 @@ def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), - (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), - (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; + (LoReg (S2_vtrunewh (A2_combineii 0, 0), + (vmpyh V2I16:$Rs, V2I16:$Rt)))>; // Multiplies two v4i16 vectors. def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), @@ -3283,31 +3318,6 @@ def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; -def SDTHexagonBinOp64 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; - -def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; -def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; -def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; -def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; - -class ShufflePat<InstHexagon MI, SDNode Op> - : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), - (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b -def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>; - -// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b -def: ShufflePat<S2_shuffob, HexagonSHUFFOB>; - -// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h -def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>; - -// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h -def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>; - - // Truncated store from v4i16 to v4i8. def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr), @@ -3345,3 +3355,11 @@ def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; + +// Read cycle counter. +// +def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; +def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, + [SDNPHasChain]>; + +def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>; |