summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp222
1 files changed, 161 insertions, 61 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 0e9b2da..1e51c1f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -215,7 +215,7 @@ namespace {
void InsertVRSaveCode(MachineFunction &MF);
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return "PowerPC DAG->DAG Pattern Instruction Selection";
}
@@ -334,12 +334,12 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
}
} else {
GlobalBaseReg =
- RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
+ RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
}
} else {
- GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
}
@@ -633,6 +633,13 @@ static unsigned getInt64CountDirect(int64_t Imm) {
// If no shift, we're done.
if (!Shift) return Result;
+ // If Hi word == Lo word,
+ // we can use rldimi to insert the Lo word into Hi word.
+ if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
+ ++Result;
+ return Result;
+ }
+
// Shift for next step if the upper 32-bits were not zero.
if (Imm)
++Result;
@@ -731,6 +738,14 @@ static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl,
// If no shift, we're done.
if (!Shift) return Result;
+ // If Hi word == Lo word,
+ // we can use rldimi to insert the Lo word into Hi word.
+ if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
+ SDValue Ops[] =
+ { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
+ return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
+ }
+
// Shift for next step if the upper 32-bits were not zero.
if (Imm) {
Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
@@ -912,84 +927,95 @@ class BitPermutationSelector {
}
};
- // Return true if something interesting was deduced, return false if we're
+ using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
+ using ValueBitsMemoizer =
+ DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
+ ValueBitsMemoizer Memoizer;
+
+ // Return a pair of bool and a SmallVector pointer to a memoization entry.
+ // The bool is true if something interesting was deduced, otherwise if we're
// providing only a generic representation of V (or something else likewise
- // uninteresting for instruction selection).
- bool getValueBits(SDValue V, SmallVector<ValueBit, 64> &Bits) {
+ // uninteresting for instruction selection) through the SmallVector.
+ std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
+ unsigned NumBits) {
+ auto &ValueEntry = Memoizer[V];
+ if (ValueEntry)
+ return std::make_pair(ValueEntry->first, &ValueEntry->second);
+ ValueEntry.reset(new ValueBitsMemoizedValue());
+ bool &Interesting = ValueEntry->first;
+ SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
+ Bits.resize(NumBits);
+
switch (V.getOpcode()) {
default: break;
case ISD::ROTL:
if (isa<ConstantSDNode>(V.getOperand(1))) {
unsigned RotAmt = V.getConstantOperandVal(1);
- SmallVector<ValueBit, 64> LHSBits(Bits.size());
- getValueBits(V.getOperand(0), LHSBits);
+ const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
- for (unsigned i = 0; i < Bits.size(); ++i)
- Bits[i] = LHSBits[i < RotAmt ? i + (Bits.size() - RotAmt) : i - RotAmt];
+ for (unsigned i = 0; i < NumBits; ++i)
+ Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
- return true;
+ return std::make_pair(Interesting = true, &Bits);
}
break;
case ISD::SHL:
if (isa<ConstantSDNode>(V.getOperand(1))) {
unsigned ShiftAmt = V.getConstantOperandVal(1);
- SmallVector<ValueBit, 64> LHSBits(Bits.size());
- getValueBits(V.getOperand(0), LHSBits);
+ const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
- for (unsigned i = ShiftAmt; i < Bits.size(); ++i)
+ for (unsigned i = ShiftAmt; i < NumBits; ++i)
Bits[i] = LHSBits[i - ShiftAmt];
for (unsigned i = 0; i < ShiftAmt; ++i)
Bits[i] = ValueBit(ValueBit::ConstZero);
- return true;
+ return std::make_pair(Interesting = true, &Bits);
}
break;
case ISD::SRL:
if (isa<ConstantSDNode>(V.getOperand(1))) {
unsigned ShiftAmt = V.getConstantOperandVal(1);
- SmallVector<ValueBit, 64> LHSBits(Bits.size());
- getValueBits(V.getOperand(0), LHSBits);
+ const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
- for (unsigned i = 0; i < Bits.size() - ShiftAmt; ++i)
+ for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
Bits[i] = LHSBits[i + ShiftAmt];
- for (unsigned i = Bits.size() - ShiftAmt; i < Bits.size(); ++i)
+ for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
Bits[i] = ValueBit(ValueBit::ConstZero);
- return true;
+ return std::make_pair(Interesting = true, &Bits);
}
break;
case ISD::AND:
if (isa<ConstantSDNode>(V.getOperand(1))) {
uint64_t Mask = V.getConstantOperandVal(1);
- SmallVector<ValueBit, 64> LHSBits(Bits.size());
- bool LHSTrivial = getValueBits(V.getOperand(0), LHSBits);
+ const SmallVector<ValueBit, 64> *LHSBits;
+ // Mark this as interesting, only if the LHS was also interesting. This
+ // prevents the overall procedure from matching a single immediate 'and'
+ // (which is non-optimal because such an and might be folded with other
+ // things if we don't select it here).
+ std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
- for (unsigned i = 0; i < Bits.size(); ++i)
+ for (unsigned i = 0; i < NumBits; ++i)
if (((Mask >> i) & 1) == 1)
- Bits[i] = LHSBits[i];
+ Bits[i] = (*LHSBits)[i];
else
Bits[i] = ValueBit(ValueBit::ConstZero);
- // Mark this as interesting, only if the LHS was also interesting. This
- // prevents the overall procedure from matching a single immediate 'and'
- // (which is non-optimal because such an and might be folded with other
- // things if we don't select it here).
- return LHSTrivial;
+ return std::make_pair(Interesting, &Bits);
}
break;
case ISD::OR: {
- SmallVector<ValueBit, 64> LHSBits(Bits.size()), RHSBits(Bits.size());
- getValueBits(V.getOperand(0), LHSBits);
- getValueBits(V.getOperand(1), RHSBits);
+ const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
+ const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
bool AllDisjoint = true;
- for (unsigned i = 0; i < Bits.size(); ++i)
+ for (unsigned i = 0; i < NumBits; ++i)
if (LHSBits[i].isZero())
Bits[i] = RHSBits[i];
else if (RHSBits[i].isZero())
@@ -1002,14 +1028,14 @@ class BitPermutationSelector {
if (!AllDisjoint)
break;
- return true;
+ return std::make_pair(Interesting = true, &Bits);
}
}
- for (unsigned i = 0; i < Bits.size(); ++i)
+ for (unsigned i = 0; i < NumBits; ++i)
Bits[i] = ValueBit(V, i);
- return false;
+ return std::make_pair(Interesting = false, &Bits);
}
// For each value (except the constant ones), compute the left-rotate amount
@@ -1648,9 +1674,12 @@ class BitPermutationSelector {
unsigned NumRLInsts = 0;
bool FirstBG = true;
+ bool MoreBG = false;
for (auto &BG : BitGroups) {
- if (!MatchingBG(BG))
+ if (!MatchingBG(BG)) {
+ MoreBG = true;
continue;
+ }
NumRLInsts +=
SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
!FirstBG);
@@ -1668,7 +1697,10 @@ class BitPermutationSelector {
// because that exposes more opportunities for CSE.
if (NumAndInsts > NumRLInsts)
continue;
- if (Use32BitInsts && NumAndInsts == NumRLInsts)
+ // When merging multiple bit groups, instruction or is used.
+ // But when rotate is used, rldimi can inert the rotated value into any
+ // register, so instruction or can be avoided.
+ if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
continue;
DEBUG(dbgs() << "\t\t\t\tusing masking\n");
@@ -1886,8 +1918,7 @@ class BitPermutationSelector {
}
void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
- BitGroups.erase(std::remove_if(BitGroups.begin(), BitGroups.end(), F),
- BitGroups.end());
+ BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
}
SmallVector<ValueBit, 64> Bits;
@@ -1910,9 +1941,12 @@ public:
// rotate-and-shift/shift/and/or instructions, using a set of heuristics
// known to produce optimial code for common cases (like i32 byte swapping).
SDNode *Select(SDNode *N) {
- Bits.resize(N->getValueType(0).getSizeInBits());
- if (!getValueBits(SDValue(N, 0), Bits))
+ Memoizer.clear();
+ auto Result =
+ getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
+ if (!Result.first)
return nullptr;
+ Bits = std::move(*Result.second);
DEBUG(dbgs() << "Considering bit-permutation-based instruction"
" selection for: ");
@@ -2623,6 +2657,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
MB = 64 - countTrailingOnes(Imm64);
SH = 0;
+ if (Val.getOpcode() == ISD::ANY_EXTEND) {
+ auto Op0 = Val.getOperand(0);
+ if ( Op0.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
+
+ auto ResultType = Val.getNode()->getValueType(0);
+ auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
+ ResultType);
+ SDValue IDVal (ImDef, 0);
+
+ Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
+ ResultType, IDVal, Op0.getOperand(0),
+ getI32Imm(1, dl)), 0);
+ SH = 64 - Imm;
+ }
+ }
+
// If the operand is a logical right shift, we can fold it into this
// instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
// for n <= mb. The right shift is really a left rotate followed by a
@@ -3187,7 +3238,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
isa<ConstantSDNode>(Op0.getOperand(1))) {
- unsigned Bits = Op0.getValueType().getSizeInBits();
+ unsigned Bits = Op0.getValueSizeInBits();
if (b != Bits/8-1)
return false;
if (Op0.getConstantOperandVal(1) != Bits-8)
@@ -3215,9 +3266,9 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
// Now we need to make sure that the upper bytes are known to be
// zero.
- unsigned Bits = Op0.getValueType().getSizeInBits();
- if (!CurDAG->MaskedValueIsZero(Op0,
- APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
+ unsigned Bits = Op0.getValueSizeInBits();
+ if (!CurDAG->MaskedValueIsZero(
+ Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
return false;
LHS = Op0.getOperand(0);
@@ -3250,7 +3301,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
} else if (Op.getOpcode() == ISD::SRL) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return false;
- unsigned Bits = Op.getValueType().getSizeInBits();
+ unsigned Bits = Op.getValueSizeInBits();
if (b != Bits/8-1)
return false;
if (Op.getConstantOperandVal(1) != Bits-8)
@@ -3562,7 +3613,8 @@ void PPCDAGToDAGISel::PeepholeCROps() {
Op.getOperand(0) == Op.getOperand(1))
Op2Not = true;
}
- } // fallthrough
+ LLVM_FALLTHROUGH;
+ }
case PPC::BC:
case PPC::BCn:
case PPC::SELECT_I4:
@@ -3989,8 +4041,9 @@ static bool PeepholePPC64ZExtGather(SDValue Op32,
return true;
}
- // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
- if (Op32.getMachineOpcode() == PPC::CNTLZW) {
+ // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
+ if (Op32.getMachineOpcode() == PPC::CNTLZW ||
+ Op32.getMachineOpcode() == PPC::CNTTZW) {
ToPromote.insert(Op32.getNode());
return true;
}
@@ -4185,6 +4238,7 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
+ case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
case PPC::OR: NewOpcode = PPC::OR8; break;
case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
@@ -4312,13 +4366,6 @@ void PPCDAGToDAGISel::PeepholePPC64() {
if (!Base.isMachineOpcode())
continue;
- // On targets with fusion, we don't want this to fire and remove a fusion
- // opportunity, unless a) it results in another fusion opportunity or
- // b) optimizing for size.
- if (PPCSubTarget->hasFusion() &&
- (!MF->getFunction()->optForSize() && !Base.hasOneUse()))
- continue;
-
unsigned Flags = 0;
bool ReplaceFlags = true;
@@ -4363,15 +4410,64 @@ void PPCDAGToDAGISel::PeepholePPC64() {
}
SDValue ImmOpnd = Base.getOperand(1);
- int MaxDisplacement = 0;
+
+ // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
+ // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
+ // we might have needed different @ha relocation values for the offset
+ // pointers).
+ int MaxDisplacement = 7;
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
const GlobalValue *GV = GA->getGlobal();
- MaxDisplacement = GV->getAlignment() - 1;
+ MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement);
}
+ bool UpdateHBase = false;
+ SDValue HBase = Base.getOperand(0);
+
int Offset = N->getConstantOperandVal(FirstOp);
- if (Offset < 0 || Offset > MaxDisplacement)
- continue;
+ if (ReplaceFlags) {
+ if (Offset < 0 || Offset > MaxDisplacement) {
+ // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
+ // one use, then we can do this for any offset, we just need to also
+ // update the offset (i.e. the symbol addend) on the addis also.
+ if (Base.getMachineOpcode() != PPC::ADDItocL)
+ continue;
+
+ if (!HBase.isMachineOpcode() ||
+ HBase.getMachineOpcode() != PPC::ADDIStocHA)
+ continue;
+
+ if (!Base.hasOneUse() || !HBase.hasOneUse())
+ continue;
+
+ SDValue HImmOpnd = HBase.getOperand(1);
+ if (HImmOpnd != ImmOpnd)
+ continue;
+
+ UpdateHBase = true;
+ }
+ } else {
+ // If we're directly folding the addend from an addi instruction, then:
+ // 1. In general, the offset on the memory access must be zero.
+ // 2. If the addend is a constant, then it can be combined with a
+ // non-zero offset, but only if the result meets the encoding
+ // requirements.
+ if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
+ Offset += C->getSExtValue();
+
+ if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD ||
+ StorageOpcode == PPC::STD) && (Offset % 4) != 0)
+ continue;
+
+ if (!isInt<16>(Offset))
+ continue;
+
+ ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
+ ImmOpnd.getValueType());
+ } else if (Offset != 0) {
+ continue;
+ }
+ }
// We found an opportunity. Reverse the operands from the add
// immediate and substitute them into the load or store. If
@@ -4414,6 +4510,10 @@ void PPCDAGToDAGISel::PeepholePPC64() {
(void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
N->getOperand(2));
+ if (UpdateHBase)
+ (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
+ ImmOpnd);
+
// The add-immediate may now be dead, in which case remove it.
if (Base.getNode()->use_empty())
CurDAG->RemoveDeadNode(Base.getNode());
OpenPOWER on IntegriCloud