summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp28
1 files changed, 19 insertions, 9 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 9331e41..f94d1ea 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -131,12 +131,12 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
return TTI::TCC_Free;
case Instruction::And:
RunFree = true; // (for the rotate-and-mask instructions)
- // Fallthrough...
+ LLVM_FALLTHROUGH;
case Instruction::Add:
case Instruction::Or:
case Instruction::Xor:
ShiftedFree = true;
- // Fallthrough...
+ LLVM_FALLTHROUGH;
case Instruction::Sub:
case Instruction::Mul:
case Instruction::Shl:
@@ -147,7 +147,8 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
case Instruction::ICmp:
UnsignedFree = true;
ImmIdx = 1;
- // Fallthrough... (zero comparisons can use record-form instructions)
+ // Zero comparisons can use record-form instructions.
+ LLVM_FALLTHROUGH;
case Instruction::Select:
ZeroFree = true;
break;
@@ -280,7 +281,7 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
int PPCTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
- TTI::OperandValueProperties Opd2PropInfo) {
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation.
@@ -359,11 +360,6 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
- // Aligned loads and stores are easy.
- unsigned SrcBytes = LT.second.getStoreSize();
- if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
- return Cost;
-
bool IsAltivecType = ST->hasAltivec() &&
(LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
@@ -372,6 +368,20 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
bool IsQPXType = ST->hasQPX() &&
(LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
+ // VSX has 32b/64b load instructions. Legalization can handle loading of
+ // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
+ // PPCTargetLowering can't compute the cost appropriately. So here we
+ // explicitly check this case.
+ unsigned MemBytes = Src->getPrimitiveSizeInBits();
+ if (Opcode == Instruction::Load && ST->hasVSX() && IsAltivecType &&
+ (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))
+ return 1;
+
+ // Aligned loads and stores are easy.
+ unsigned SrcBytes = LT.second.getStoreSize();
+ if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
+ return Cost;
+
// If we can use the permutation-based load sequence, then this is also
// relatively cheap (not counting loop-invariant instructions): one load plus
// one permute (the last load in a series has extra cost, but we're
OpenPOWER on IntegriCloud