diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 28 |
1 files changed, 19 insertions, 9 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 9331e41..f94d1ea 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -131,12 +131,12 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, return TTI::TCC_Free; case Instruction::And: RunFree = true; // (for the rotate-and-mask instructions) - // Fallthrough... + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::Or: case Instruction::Xor: ShiftedFree = true; - // Fallthrough... + LLVM_FALLTHROUGH; case Instruction::Sub: case Instruction::Mul: case Instruction::Shl: @@ -147,7 +147,8 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, case Instruction::ICmp: UnsignedFree = true; ImmIdx = 1; - // Fallthrough... (zero comparisons can use record-form instructions) + // Zero comparisons can use record-form instructions. + LLVM_FALLTHROUGH; case Instruction::Select: ZeroFree = true; break; @@ -280,7 +281,7 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { int PPCTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo) { + TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); // Fallback to the default implementation. @@ -359,11 +360,6 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); - // Aligned loads and stores are easy. - unsigned SrcBytes = LT.second.getStoreSize(); - if (!SrcBytes || !Alignment || Alignment >= SrcBytes) - return Cost; - bool IsAltivecType = ST->hasAltivec() && (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 || LT.second == MVT::v4i32 || LT.second == MVT::v4f32); @@ -372,6 +368,20 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, bool IsQPXType = ST->hasQPX() && (LT.second == MVT::v4f64 || LT.second == MVT::v4f32); + // VSX has 32b/64b load instructions. Legalization can handle loading of + // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and + // PPCTargetLowering can't compute the cost appropriately. So here we + // explicitly check this case. + unsigned MemBytes = Src->getPrimitiveSizeInBits(); + if (Opcode == Instruction::Load && ST->hasVSX() && IsAltivecType && + (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32))) + return 1; + + // Aligned loads and stores are easy. + unsigned SrcBytes = LT.second.getStoreSize(); + if (!SrcBytes || !Alignment || Alignment >= SrcBytes) + return Cost; + // If we can use the permutation-based load sequence, then this is also // relatively cheap (not counting loop-invariant instructions): one load plus // one permute (the last load in a series has extra cost, but we're |