diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
53 files changed, 26277 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp new file mode 100644 index 0000000..b6a0835 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -0,0 +1,302 @@ +//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an PPC MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "PPCInstPrinter.h" +#include "MCTargetDesc/PPCBaseInfo.h" +#include "MCTargetDesc/PPCPredicates.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#include "PPCGenAsmWriter.inc" + +StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + +void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << getRegisterName(RegNo); +} + +void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + // Check for slwi/srwi mnemonics. + if (MI->getOpcode() == PPC::RLWINM) { + unsigned char SH = MI->getOperand(2).getImm(); + unsigned char MB = MI->getOperand(3).getImm(); + unsigned char ME = MI->getOperand(4).getImm(); + bool useSubstituteMnemonic = false; + if (SH <= 31 && MB == 0 && ME == (31-SH)) { + O << "\tslwi "; useSubstituteMnemonic = true; + } + if (SH <= 31 && MB == (32-SH) && ME == 31) { + O << "\tsrwi "; useSubstituteMnemonic = true; + SH = 32-SH; + } + if (useSubstituteMnemonic) { + printOperand(MI, 0, O); + O << ", "; + printOperand(MI, 1, O); + O << ", " << (unsigned int)SH; + + printAnnotation(O, Annot); + return; + } + } + + if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + O << "\tmr "; + printOperand(MI, 0, O); + O << ", "; + printOperand(MI, 1, O); + printAnnotation(O, Annot); + return; + } + + if (MI->getOpcode() == PPC::RLDICR) { + unsigned char SH = MI->getOperand(2).getImm(); + unsigned char ME = MI->getOperand(3).getImm(); + // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH + if (63-SH == ME) { + O << "\tsldi "; + printOperand(MI, 0, O); + O << ", "; + printOperand(MI, 1, O); + O << ", " << (unsigned int)SH; + printAnnotation(O, Annot); + return; + } + } + + printInstruction(MI, O); + printAnnotation(O, Annot); +} + + +void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, + const char *Modifier) { + assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!"); + unsigned Code = MI->getOperand(OpNo).getImm(); + if (StringRef(Modifier) == "cc") { + switch ((PPC::Predicate)Code) { + default: assert(0 && "Invalid predicate"); + case PPC::PRED_ALWAYS: return; // Don't print anything for always. + case PPC::PRED_LT: O << "lt"; return; + case PPC::PRED_LE: O << "le"; return; + case PPC::PRED_EQ: O << "eq"; return; + case PPC::PRED_GE: O << "ge"; return; + case PPC::PRED_GT: O << "gt"; return; + case PPC::PRED_NE: O << "ne"; return; + case PPC::PRED_UN: O << "un"; return; + case PPC::PRED_NU: O << "nu"; return; + } + } + + assert(StringRef(Modifier) == "reg" && + "Need to specify 'cc' or 'reg' as predicate op modifier!"); + // Don't print the register for 'always'. + if (Code == PPC::PRED_ALWAYS) return; + printOperand(MI, OpNo+1, O); +} + +void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + char Value = MI->getOperand(OpNo).getImm(); + Value = (Value << (32-5)) >> (32-5); + O << (int)Value; +} + +void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned char Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 31 && "Invalid u5imm argument!"); + O << (unsigned int)Value; +} + +void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned char Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 63 && "Invalid u6imm argument!"); + O << (unsigned int)Value; +} + +void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << (short)MI->getOperand(OpNo).getImm(); +} + +void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << (unsigned short)MI->getOperand(OpNo).getImm(); +} + +void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) + O << (short)(MI->getOperand(OpNo).getImm()*4); + else + printOperand(MI, OpNo, O); +} + +void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (!MI->getOperand(OpNo).isImm()) + return printOperand(MI, OpNo, O); + + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + O << "$+"; + printAbsAddrOperand(MI, OpNo, O); +} + +void PPCInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << (int)MI->getOperand(OpNo).getImm()*4; +} + + +void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned CCReg = MI->getOperand(OpNo).getReg(); + unsigned RegNo; + switch (CCReg) { + default: assert(0 && "Unknown CR register"); + case PPC::CR0: RegNo = 0; break; + case PPC::CR1: RegNo = 1; break; + case PPC::CR2: RegNo = 2; break; + case PPC::CR3: RegNo = 3; break; + case PPC::CR4: RegNo = 4; break; + case PPC::CR5: RegNo = 5; break; + case PPC::CR6: RegNo = 6; break; + case PPC::CR7: RegNo = 7; break; + } + O << (0x80 >> RegNo); +} + +void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printSymbolLo(MI, OpNo, O); + O << '('; + if (MI->getOperand(OpNo+1).getReg() == PPC::R0) + O << "0"; + else + printOperand(MI, OpNo+1, O); + O << ')'; +} + +void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) + printS16X4ImmOperand(MI, OpNo, O); + else + printSymbolLo(MI, OpNo, O); + O << '('; + + if (MI->getOperand(OpNo+1).getReg() == PPC::R0) + O << "0"; + else + printOperand(MI, OpNo+1, O); + O << ')'; +} + + +void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + // When used as the base register, r0 reads constant zero rather than + // the value contained in the register. For this reason, the darwin + // assembler requires that we print r0 as 0 (no r) when used as the base. + if (MI->getOperand(OpNo).getReg() == PPC::R0) + O << "0"; + else + printOperand(MI, OpNo, O); + O << ", "; + printOperand(MI, OpNo+1, O); +} + + + +/// stripRegisterPrefix - This method strips the character prefix from a +/// register name so that only the number is left. Used by for linux asm. +static const char *stripRegisterPrefix(const char *RegName) { + switch (RegName[0]) { + case 'r': + case 'f': + case 'v': return RegName + 1; + case 'c': if (RegName[1] == 'r') return RegName + 2; + } + + return RegName; +} + +void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + const char *RegName = getRegisterName(Op.getReg()); + // The linux and AIX assembler does not take register prefixes. + if (!isDarwinSyntax()) + RegName = stripRegisterPrefix(RegName); + + O << RegName; + return; + } + + if (Op.isImm()) { + O << Op.getImm(); + return; + } + + assert(Op.isExpr() && "unknown operand kind in printOperand"); + O << *Op.getExpr(); +} + +void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) + return printS16ImmOperand(MI, OpNo, O); + + // FIXME: This is a terrible hack because we can't encode lo16() as an operand + // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower. + if (MI->getOperand(OpNo).isExpr() && + isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) { + O << "lo16("; + printOperand(MI, OpNo, O); + O << ')'; + } else { + printOperand(MI, OpNo, O); + } +} + +void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) + return printS16ImmOperand(MI, OpNo, O); + + // FIXME: This is a terrible hack because we can't encode lo16() as an operand + // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower. + if (MI->getOperand(OpNo).isExpr() && + isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) { + O << "ha16("; + printOperand(MI, OpNo, O); + O << ')'; + } else { + printOperand(MI, OpNo, O); + } +} + + diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h new file mode 100644 index 0000000..4ed4b76 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -0,0 +1,71 @@ +//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an PPC MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCINSTPRINTER_H +#define PPCINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + +class MCOperand; + +class PPCInstPrinter : public MCInstPrinter { + // 0 -> AIX, 1 -> Darwin. + unsigned SyntaxVariant; +public: + PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant) + : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {} + + bool isDarwinSyntax() const { + return SyntaxVariant == 1; + } + + virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + virtual StringRef getOpcodeName(unsigned Opcode) const; + + static const char *getInstructionName(unsigned Opcode); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printPredicateOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, const char *Modifier); + + + void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + // FIXME: Remove + void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O); +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp new file mode 100644 index 0000000..9f2fd6d --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -0,0 +1,191 @@ +//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmBackend.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + return Value; + case PPC::fixup_ppc_brcond14: + return Value & 0x3ffc; + case PPC::fixup_ppc_br24: + return Value & 0x3fffffc; +#if 0 + case PPC::fixup_ppc_hi16: + return (Value >> 16) & 0xffff; +#endif + case PPC::fixup_ppc_ha16: + return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff; + case PPC::fixup_ppc_lo16: + return Value & 0xffff; + } +} + +namespace { +class PPCMachObjectWriter : public MCMachObjectTargetWriter { +public: + PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, + uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} + + void RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) {} +}; + +class PPCELFObjectWriter : public MCELFObjectTargetWriter { +public: + PPCELFObjectWriter(bool Is64Bit, Triple::OSType OSType, uint16_t EMachine, + bool HasRelocationAddend, bool isLittleEndian) + : MCELFObjectTargetWriter(Is64Bit, OSType, EMachine, HasRelocationAddend) {} +}; + +class PPCAsmBackend : public MCAsmBackend { +const Target &TheTarget; +public: + PPCAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {} + + unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = { + // name offset bits flags + { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_lo16", 16, 16, 0 }, + { "fixup_ppc_ha16", 16, 16, 0 }, + { "fixup_ppc_lo14", 16, 14, 0 } + }; + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + bool MayNeedRelaxation(const MCInst &Inst) const { + // FIXME. + return false; + } + + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const { + // FIXME. + assert(0 && "RelaxInstruction() unimplemented"); + } + + bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + // FIXME: Zero fill for now. That's not right, but at least will get the + // section size right. + for (uint64_t i = 0; i != Count; ++i) + OW->Write8(0); + return true; + } + + unsigned getPointerSize() const { + StringRef Name = TheTarget.getName(); + if (Name == "ppc64") return 8; + assert(Name == "ppc32" && "Unknown target name!"); + return 4; + } +}; +} // end anonymous namespace + + +// FIXME: This should be in a separate file. +namespace { + class DarwinPPCAsmBackend : public PPCAsmBackend { + public: + DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { } + + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + assert(0 && "UNIMP"); + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + bool is64 = getPointerSize() == 8; + return createMachObjectWriter(new PPCMachObjectWriter( + /*Is64Bit=*/is64, + (is64 ? object::mach::CTM_PowerPC64 : + object::mach::CTM_PowerPC), + object::mach::CSPPC_ALL), + OS, /*IsLittleEndian=*/false); + } + + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { + return false; + } + }; + + class ELFPPCAsmBackend : public PPCAsmBackend { + Triple::OSType OSType; + public: + ELFPPCAsmBackend(const Target &T, Triple::OSType OSType) : + PPCAsmBackend(T), OSType(OSType) { } + + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + Value = adjustFixupValue(Fixup.getKind(), Value); + if (!Value) return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + + // For each byte of the fragment that the fixup touches, mask in the bits from + // the fixup value. The Value has been "split up" into the appropriate + // bitfields above. + for (unsigned i = 0; i != 4; ++i) + Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + bool is64 = getPointerSize() == 8; + return createELFObjectWriter(new PPCELFObjectWriter( + /*Is64Bit=*/is64, + OSType, + is64 ? ELF::EM_PPC64 : ELF::EM_PPC, + /*addend*/ true, /*isLittleEndian*/ false), + OS, /*IsLittleEndian=*/false); + } + + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { + return false; + } + }; + +} // end anonymous namespace + + + + +MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) { + if (Triple(TT).isOSDarwin()) + return new DarwinPPCAsmBackend(T); + + return new ELFPPCAsmBackend(T, Triple(TT).getOS()); +} diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h new file mode 100644 index 0000000..369bbdc --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h @@ -0,0 +1,70 @@ +//===-- PPCBaseInfo.h - Top level definitions for PPC -------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the PPC target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCBASEINFO_H +#define PPCBASEINFO_H + +#include "PPCMCTargetDesc.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +/// getPPCRegisterNumbering - Given the enum value for some register, e.g. +/// PPC::F14, return the number that it corresponds to (e.g. 14). +inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) { + using namespace PPC; + switch (RegEnum) { + case 0: return 0; + case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0; + case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1; + case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2; + case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3; + case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4; + case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5; + case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6; + case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7; + case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8; + case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9; + case R10: case X10: case F10: case V10: case CR2EQ: return 10; + case R11: case X11: case F11: case V11: case CR2UN: return 11; + case R12: case X12: case F12: case V12: case CR3LT: return 12; + case R13: case X13: case F13: case V13: case CR3GT: return 13; + case R14: case X14: case F14: case V14: case CR3EQ: return 14; + case R15: case X15: case F15: case V15: case CR3UN: return 15; + case R16: case X16: case F16: case V16: case CR4LT: return 16; + case R17: case X17: case F17: case V17: case CR4GT: return 17; + case R18: case X18: case F18: case V18: case CR4EQ: return 18; + case R19: case X19: case F19: case V19: case CR4UN: return 19; + case R20: case X20: case F20: case V20: case CR5LT: return 20; + case R21: case X21: case F21: case V21: case CR5GT: return 21; + case R22: case X22: case F22: case V22: case CR5EQ: return 22; + case R23: case X23: case F23: case V23: case CR5UN: return 23; + case R24: case X24: case F24: case V24: case CR6LT: return 24; + case R25: case X25: case F25: case V25: case CR6GT: return 25; + case R26: case X26: case F26: case V26: case CR6EQ: return 26; + case R27: case X27: case F27: case V27: case CR6UN: return 27; + case R28: case X28: case F28: case V28: case CR7LT: return 28; + case R29: case X29: case F29: case V29: case CR7GT: return 29; + case R30: case X30: case F30: case V30: case CR7EQ: return 30; + case R31: case X31: case F31: case V31: case CR7UN: return 31; + default: + llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!"); + } +} + +} // end namespace llvm; + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h new file mode 100644 index 0000000..b3c889e --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -0,0 +1,45 @@ +//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PPC_PPCFIXUPKINDS_H +#define LLVM_PPC_PPCFIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace PPC { +enum Fixups { + // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b' + // and 'bl'. + fixup_ppc_br24 = FirstTargetFixupKind, + + /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional + /// branches. + fixup_ppc_brcond14, + + /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs + /// like 'li'. + fixup_ppc_lo16, + + /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs + /// like 'lis'. + fixup_ppc_ha16, + + /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs + /// like 'std'. + fixup_ppc_lo14, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} +} + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp new file mode 100644 index 0000000..e9424d8 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -0,0 +1,66 @@ +//===-- PPCMCAsmInfo.cpp - PPC asm properties -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the MCAsmInfoDarwin properties. +// +//===----------------------------------------------------------------------===// + +#include "PPCMCAsmInfo.h" +using namespace llvm; + +PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { + if (is64Bit) + PointerSize = 8; + IsLittleEndian = false; + + PCSymbol = "."; + CommentString = ";"; + ExceptionsType = ExceptionHandling::DwarfCFI; + + if (!is64Bit) + Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode. + + AssemblerDialect = 1; // New-Style mnemonics. + SupportsDebugInformation= true; // Debug information. +} + +PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { + if (is64Bit) + PointerSize = 8; + IsLittleEndian = false; + + // ".comm align is in bytes but .align is pow-2." + AlignmentIsInBytes = false; + + CommentString = "#"; + GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + WeakRefDirective = "\t.weak\t"; + + // Uses '.section' before '.bss' directive + UsesELFSectionDirectiveForBSS = true; + + // Debug Information + SupportsDebugInformation = true; + + PCSymbol = "."; + + // Set up DWARF directives + HasLEB128 = true; // Target asm supports leb128 directives (little-endian) + + // Exceptions handling + if (!is64Bit) + ExceptionsType = ExceptionHandling::DwarfCFI; + + ZeroDirective = "\t.space\t"; + Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; + LCOMMDirectiveType = LCOMM::NoAlignment; + AssemblerDialect = 0; // Old-Style mnemonics. +} + diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h new file mode 100644 index 0000000..96ae6fb --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -0,0 +1,31 @@ +//=====-- PPCMCAsmInfo.h - PPC asm properties -----------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MCAsmInfoDarwin class. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCTARGETASMINFO_H +#define PPCTARGETASMINFO_H + +#include "llvm/MC/MCAsmInfoDarwin.h" + +namespace llvm { + + struct PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { + explicit PPCMCAsmInfoDarwin(bool is64Bit); + }; + + struct PPCLinuxMCAsmInfo : public MCAsmInfo { + explicit PPCLinuxMCAsmInfo(bool is64Bit); + }; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp new file mode 100644 index 0000000..262f97c3 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -0,0 +1,193 @@ +//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPCMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/PPCBaseInfo.h" +#include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCInst.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); + +namespace { +class PPCMCCodeEmitter : public MCCodeEmitter { + PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT + +public: + PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) { + } + + ~PPCMCCodeEmitter() {} + + unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + unsigned getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups) const; + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + unsigned Bits = getBinaryCodeForInstr(MI, Fixups); + + // Output the constant in big endian byte order. + for (unsigned i = 0; i != 4; ++i) { + OS << (char)(Bits >> 24); + Bits <<= 8; + } + + ++MCNumEmitted; // Keep track of the # of mi's emitted. + } + +}; + +} // end anonymous namespace + +MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new PPCMCCodeEmitter(MCII, STI, Ctx); +} + +unsigned PPCMCCodeEmitter:: +getDirectBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_br24)); + return 0; +} + +unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_brcond14)); + return 0; +} + +unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_ha16)); + return 0; +} + +unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16)); + return 0; +} + +unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + // Encode (imm, reg) as a memri, which has the low 16-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16; + + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; + + // Add a fixup for the displacement field. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16)); + return RegBits; +} + + +unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + // Encode (imm, reg) as a memrix, which has the low 14-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14; + + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits; + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo14)); + return RegBits; +} + + +unsigned PPCMCCodeEmitter:: +get_crbitm_encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && + (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); + return 0x80 >> getPPCRegisterNumbering(MO.getReg()); +} + + +unsigned PPCMCCodeEmitter:: +getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MO.isReg()) { + // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. + // The GPR operand should come through here though. + assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || + MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); + return getPPCRegisterNumbering(MO.getReg()); + } + + assert(MO.isImm() && + "Relocation required in an instruction that we cannot encode!"); + return MO.getImm(); +} + + +#include "PPCGenMCCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp new file mode 100644 index 0000000..d5c8a9e --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -0,0 +1,151 @@ +//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PowerPC specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "PPCMCTargetDesc.h" +#include "PPCMCAsmInfo.h" +#include "InstPrinter/PPCInstPrinter.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "PPCGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "PPCGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "PPCGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createPPCMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitPPCMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) { + Triple TheTriple(TT); + bool isPPC64 = (TheTriple.getArch() == Triple::ppc64); + unsigned Flavour = isPPC64 ? 0 : 1; + unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR; + + MCRegisterInfo *X = new MCRegisterInfo(); + InitPPCMCRegisterInfo(X, RA, Flavour, Flavour); + return X; +} + +static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitPPCMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + bool isPPC64 = TheTriple.getArch() == Triple::ppc64; + + MCAsmInfo *MAI; + if (TheTriple.isOSDarwin()) + MAI = new PPCMCAsmInfoDarwin(isPPC64); + else + MAI = new PPCLinuxMCAsmInfo(isPPC64); + + // Initial state of the frame pointer is R1. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(PPC::R1, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + + if (RM == Reloc::Default) { + Triple T(TT); + if (T.isOSDarwin()) + RM = Reloc::DynamicNoPIC; + else + RM = Reloc::Static; + } + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +// This is duplicated code. Refactor this. +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + if (Triple(TT).isOSDarwin()) + return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + + return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); +} + +static MCInstPrinter *createPPCMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + return new PPCInstPrinter(MAI, SyntaxVariant); +} + +extern "C" void LLVMInitializePowerPCTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo); + RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target, + createPPCMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target, + createPPCMCSubtargetInfo); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend); + TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter); +} diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h new file mode 100644 index 0000000..e5bf2a9 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -0,0 +1,51 @@ +//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PowerPC specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCMCTARGETDESC_H +#define PPCMCTARGETDESC_H + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target ThePPC32Target; +extern Target ThePPC64Target; + +MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT); + +} // End llvm namespace + +// Defines symbolic names for PowerPC registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "PPCGenRegisterInfo.inc" + +// Defines symbolic names for the PowerPC instructions. +// +#define GET_INSTRINFO_ENUM +#include "PPCGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "PPCGenSubtargetInfo.inc" + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp new file mode 100644 index 0000000..12bb0a1 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -0,0 +1,31 @@ +//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PowerPC branch predicates. +// +//===----------------------------------------------------------------------===// + +#include "PPCPredicates.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +using namespace llvm; + +PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { + switch (Opcode) { + default: llvm_unreachable("Unknown PPC branch opcode!"); + case PPC::PRED_EQ: return PPC::PRED_NE; + case PPC::PRED_NE: return PPC::PRED_EQ; + case PPC::PRED_LT: return PPC::PRED_GE; + case PPC::PRED_GE: return PPC::PRED_LT; + case PPC::PRED_GT: return PPC::PRED_LE; + case PPC::PRED_LE: return PPC::PRED_GT; + case PPC::PRED_NU: return PPC::PRED_UN; + case PPC::PRED_UN: return PPC::PRED_NU; + } +} diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h new file mode 100644 index 0000000..f872e86 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -0,0 +1,37 @@ +//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PowerPC branch predicates. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H +#define LLVM_TARGET_POWERPC_PPCPREDICATES_H + +namespace llvm { +namespace PPC { + /// Predicate - These are "(BI << 5) | BO" for various predicates. + enum Predicate { + PRED_ALWAYS = (0 << 5) | 20, + PRED_LT = (0 << 5) | 12, + PRED_LE = (1 << 5) | 4, + PRED_EQ = (2 << 5) | 12, + PRED_GE = (0 << 5) | 4, + PRED_GT = (1 << 5) | 12, + PRED_NE = (2 << 5) | 4, + PRED_UN = (3 << 5) | 12, + PRED_NU = (3 << 5) | 4 + }; + + /// Invert the specified predicate. != -> ==, < -> >=. + Predicate InvertPredicate(Predicate Opcode); +} +} + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h new file mode 100644 index 0000000..5dc1863 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPC.h @@ -0,0 +1,76 @@ +//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// PowerPC back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_POWERPC_H +#define LLVM_TARGET_POWERPC_H + +#include "MCTargetDesc/PPCBaseInfo.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include <string> + +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + +namespace llvm { + class PPCTargetMachine; + class FunctionPass; + class formatted_raw_ostream; + class JITCodeEmitter; + class Target; + class MachineInstr; + class AsmPrinter; + class MCInst; + class TargetMachine; + + FunctionPass *createPPCBranchSelectionPass(); + FunctionPass *createPPCISelDag(PPCTargetMachine &TM); + FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, + JITCodeEmitter &MCE); + void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AsmPrinter &AP, bool isDarwin); + + namespace PPCII { + + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // PPC Specific MachineOperand flags. + MO_NO_FLAG, + + /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$stub" symbol. This is used for calls + /// and jumps to external functions on Tiger and earlier. + MO_DARWIN_STUB = 1, + + /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol) + MO_LO16 = 4, MO_HA16 = 8, + + /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to + /// the function's picbase, e.g. lo16(symbol-picbase). + MO_PIC_FLAG = 16, + + /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to + /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase). + MO_NLP_FLAG = 32, + + /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a + /// symbol with hidden visibility. This causes a different kind of + /// non-lazy-pointer to be generated. + MO_NLP_HIDDEN_FLAG = 64 + }; + } // end namespace PPCII + +} // end namespace llvm; + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td new file mode 100644 index 0000000..2d5d302 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPC.td @@ -0,0 +1,112 @@ +//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the top level entry point for the PowerPC target. +// +//===----------------------------------------------------------------------===// + +// Get the target-independent interfaces which we are implementing. +// +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// PowerPC Subtarget features. +// + +//===----------------------------------------------------------------------===// +// CPU Directives // +//===----------------------------------------------------------------------===// + +def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">; +def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">; +def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; +def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; +def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; +def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">; +def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">; +def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; +def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; +def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; + +def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", + "Enable 64-bit instructions">; +def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true", + "Enable 64-bit registers usage for ppc32 [beta]">; +def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", + "Enable Altivec instructions">; +def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true", + "Enable GPUL instructions">; +def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", + "Enable the fsqrt instruction">; +def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", + "Enable the stfiwx instruction">; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "PPCRegisterInfo.td" +include "PPCSchedule.td" +include "PPCInstrInfo.td" + +//===----------------------------------------------------------------------===// +// PowerPC processors supported. +// + +def : Processor<"generic", G3Itineraries, [Directive32]>; +def : Processor<"601", G3Itineraries, [Directive601]>; +def : Processor<"602", G3Itineraries, [Directive602]>; +def : Processor<"603", G3Itineraries, [Directive603]>; +def : Processor<"603e", G3Itineraries, [Directive603]>; +def : Processor<"603ev", G3Itineraries, [Directive603]>; +def : Processor<"604", G3Itineraries, [Directive604]>; +def : Processor<"604e", G3Itineraries, [Directive604]>; +def : Processor<"620", G3Itineraries, [Directive620]>; +def : Processor<"g3", G3Itineraries, [Directive7400]>; +def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>; +def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>; +def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>; +def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>; +def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>; +def : Processor<"970", G5Itineraries, + [Directive970, FeatureAltivec, + FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, + Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"g5", G5Itineraries, + [Directive970, FeatureAltivec, + FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, + Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"ppc", G3Itineraries, [Directive32]>; +def : Processor<"ppc64", G5Itineraries, + [Directive64, FeatureAltivec, + FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, + Feature64Bit /*, Feature64BitRegs */]>; + + +//===----------------------------------------------------------------------===// +// Calling Conventions +//===----------------------------------------------------------------------===// + +include "PPCCallingConv.td" + +def PPCInstrInfo : InstrInfo { + let isLittleEndianEncoding = 1; +} + +def PPCAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +def PPC : Target { + // Information about the instructions. + let InstructionSet = PPCInstrInfo; + + let AssemblyWriters = [PPCAsmWriter]; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp new file mode 100644 index 0000000..9528459 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -0,0 +1,686 @@ +//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to PowerPC assembly language. This printer is +// the output mechanism used by `llc'. +// +// Documentation at http://developer.apple.com/documentation/DeveloperTools/ +// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asmprinter" +#include "PPC.h" +#include "PPCTargetMachine.h" +#include "PPCSubtarget.h" +#include "MCTargetDesc/PPCPredicates.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/SmallString.h" +#include "InstPrinter/PPCInstPrinter.h" +using namespace llvm; + +namespace { + class PPCAsmPrinter : public AsmPrinter { + protected: + DenseMap<MCSymbol*, MCSymbol*> TOC; + const PPCSubtarget &Subtarget; + uint64_t TOCLabelID; + public: + explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), + Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {} + + virtual const char *getPassName() const { + return "PowerPC Assembly Printer"; + } + + + virtual void EmitInstruction(const MachineInstr *MI); + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + + MachineLocation getDebugValueLocation(const MachineInstr *MI) const { + MachineLocation Location; + assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm()); + else { + DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + } + return Location; + } + }; + + /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux + class PPCLinuxAsmPrinter : public PPCAsmPrinter { + public: + explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : PPCAsmPrinter(TM, Streamer) {} + + virtual const char *getPassName() const { + return "Linux PPC Assembly Printer"; + } + + bool doFinalization(Module &M); + + virtual void EmitFunctionEntryLabel(); + }; + + /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac + /// OS X + class PPCDarwinAsmPrinter : public PPCAsmPrinter { + public: + explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : PPCAsmPrinter(TM, Streamer) {} + + virtual const char *getPassName() const { + return "Darwin PPC Assembly Printer"; + } + + bool doFinalization(Module &M); + void EmitStartOfAsmFile(Module &M); + + void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs); + }; +} // end of anonymous namespace + +/// stripRegisterPrefix - This method strips the character prefix from a +/// register name so that only the number is left. Used by for linux asm. +static const char *stripRegisterPrefix(const char *RegName) { + switch (RegName[0]) { + case 'r': + case 'f': + case 'v': return RegName + 1; + case 'c': if (RegName[1] == 'r') return RegName + 2; + } + + return RegName; +} + +void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + + switch (MO.getType()) { + case MachineOperand::MO_Register: { + const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg()); + // Linux assembler (Others?) does not take register mnemonics. + // FIXME - What about special registers used in mfspr/mtspr? + if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName); + O << RegName; + return; + } + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + // FIXME: PIC relocation model + return; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getIndex(); + return; + case MachineOperand::MO_BlockAddress: + O << *GetBlockAddressSymbol(MO.getBlockAddress()); + return; + case MachineOperand::MO_ExternalSymbol: { + // Computing the address of an external symbol, not calling it. + if (TM.getRelocationModel() == Reloc::Static) { + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + return; + } + + MCSymbol *NLPSym = + OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+ + MO.getSymbolName()+"$non_lazy_ptr"); + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym); + if (StubSym.getPointer() == 0) + StubSym = MachineModuleInfoImpl:: + StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true); + + O << *NLPSym; + return; + } + case MachineOperand::MO_GlobalAddress: { + // Computing the address of a global symbol, not calling it. + const GlobalValue *GV = MO.getGlobal(); + MCSymbol *SymToPrint; + + // External or weakly linked global variables need non-lazily-resolved stubs + if (TM.getRelocationModel() != Reloc::Static && + (GV->isDeclaration() || GV->isWeakForLinker())) { + if (!GV->hasHiddenVisibility()) { + SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>() + .getGVStubEntry(SymToPrint); + if (StubSym.getPointer() == 0) + StubSym = MachineModuleInfoImpl:: + StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); + } else if (GV->isDeclaration() || GV->hasCommonLinkage() || + GV->hasAvailableExternallyLinkage()) { + SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>(). + getHiddenGVStubEntry(SymToPrint); + if (StubSym.getPointer() == 0) + StubSym = MachineModuleInfoImpl:: + StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); + } else { + SymToPrint = Mang->getSymbol(GV); + } + } else { + SymToPrint = Mang->getSymbol(GV); + } + + O << *SymToPrint; + + printOffset(MO.getOffset(), O); + return; + } + + default: + O << "<unknown operand type: " << MO.getType() << ">"; + return; + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'c': // Don't print "$" before a global var name or constant. + break; // PPC never has a prefix. + case 'L': // Write second word of DImode reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + break; + case 'I': + // Write 'i' if an integer constant, otherwise nothing. Used to print + // addi vs add, etc. + if (MI->getOperand(OpNo).isImm()) + O << "i"; + return false; + } + } + + printOperand(MI, OpNo, O); + return false; +} + +// At the moment, all inline asm memory operands are a single register. +// In any case, the output of this routine should always be just one +// assembler operand. + +bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + assert(MI->getOperand(OpNo).isReg()); + O << "0("; + printOperand(MI, OpNo, O); + O << ")"; + return false; +} + + +/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to +/// the current output stream. +/// +void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { + MCInst TmpInst; + + // Lower multi-instruction pseudo operations. + switch (MI->getOpcode()) { + default: break; + case TargetOpcode::DBG_VALUE: { + if (!isVerbose() || !OutStreamer.hasRawTextSupport()) return; + + SmallString<32> Str; + raw_svector_ostream O(Str); + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + O << V.getName(); + O << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O); + O << ']'; + O << "+"; + printOperand(MI, NOps-2, O); + OutStreamer.EmitRawText(O.str()); + return; + } + + case PPC::MovePCtoLR: + case PPC::MovePCtoLR8: { + // Transform %LR = MovePCtoLR + // Into this, where the label is the PIC base: + // bl L1$pb + // L1$pb: + MCSymbol *PICBase = MF->getPICBaseSymbol(); + + // Emit the 'bl'. + TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here. + + + // FIXME: We would like an efficient form for this, so we don't have to do + // a lot of extra uniquing. + TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr:: + Create(PICBase, OutContext))); + OutStreamer.EmitInstruction(TmpInst); + + // Emit the label. + OutStreamer.EmitLabel(PICBase); + return; + } + case PPC::LDtoc: { + // Transform %X3 = LDtoc <ga:@min1>, %X2 + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + + // Change the opcode to LD, and the global address operand to be a + // reference to the TOC entry we will synthesize later. + TmpInst.setOpcode(PPC::LD); + const MachineOperand &MO = MI->getOperand(1); + assert(MO.isGlobal()); + + // Map symbol -> label of TOC entry. + MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())]; + if (TOCEntry == 0) + TOCEntry = GetTempSymbol("C", TOCLabelID++); + + const MCExpr *Exp = + MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, + OutContext); + TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); + OutStreamer.EmitInstruction(TmpInst); + return; + } + + case PPC::MFCRpseud: + // Transform: %R3 = MFCRpseud %CR7 + // Into: %R3 = MFCR ;; cr7 + OutStreamer.AddComment(PPCInstPrinter:: + getRegisterName(MI->getOperand(1).getReg())); + TmpInst.setOpcode(PPC::MFCR); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + OutStreamer.EmitInstruction(TmpInst); + return; + } + + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + OutStreamer.EmitInstruction(TmpInst); +} + +void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { + if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label. + return AsmPrinter::EmitFunctionEntryLabel(); + + // Emit an official procedure descriptor. + // FIXME 64-bit SVR4: Use MCSection here! + OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\"")); + OutStreamer.EmitRawText(StringRef("\t.align 3")); + OutStreamer.EmitLabel(CurrentFnSym); + OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) + + ",.TOC.@tocbase"); + OutStreamer.EmitRawText(StringRef("\t.previous")); + OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":"); +} + + +bool PPCLinuxAsmPrinter::doFinalization(Module &M) { + const TargetData *TD = TM.getTargetData(); + + bool isPPC64 = TD->getPointerSizeInBits() == 64; + + if (isPPC64 && !TOC.empty()) { + // FIXME 64-bit SVR4: Use MCSection here? + OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\"")); + + // FIXME: This is nondeterminstic! + for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), + E = TOC.end(); I != E; ++I) { + OutStreamer.EmitLabel(I->second); + OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) + + "[TC]," + I->first->getName()); + } + } + + return AsmPrinter::doFinalization(M); +} + +void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { + static const char *const CPUDirectives[] = { + "", + "ppc", + "ppc601", + "ppc602", + "ppc603", + "ppc7400", + "ppc750", + "ppc970", + "ppc64" + }; + + unsigned Directive = Subtarget.getDarwinDirective(); + if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970) + Directive = PPC::DIR_970; + if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400) + Directive = PPC::DIR_7400; + if (Subtarget.isPPC64() && Directive < PPC::DIR_970) + Directive = PPC::DIR_64; + assert(Directive <= PPC::DIR_64 && "Directive out of range."); + + // FIXME: This is a total hack, finish mc'izing the PPC backend. + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive])); + + // Prime text sections so they are adjacent. This reduces the likelihood a + // large data or debug section causes a branch to exceed 16M limit. + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); + OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); + if (TM.getRelocationModel() == Reloc::PIC_) { + OutStreamer.SwitchSection( + OutContext.getMachOSection("__TEXT", "__picsymbolstub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 32, SectionKind::getText())); + } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) { + OutStreamer.SwitchSection( + OutContext.getMachOSection("__TEXT","__symbol_stub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 16, SectionKind::getText())); + } + OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); +} + +static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) { + // Remove $stub suffix, add $lazy_ptr. + SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5); + TmpStr += "$lazy_ptr"; + return Ctx.GetOrCreateSymbol(TmpStr.str()); +} + +static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { + // Add $tmp suffix to $stub, yielding $stub$tmp. + SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()); + TmpStr += "$tmp"; + return Ctx.GetOrCreateSymbol(TmpStr.str()); +} + +void PPCDarwinAsmPrinter:: +EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { + bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; + + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); + + // .lazy_symbol_pointer + const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection(); + + // Output stubs for dynamically-linked functions + if (TM.getRelocationModel() == Reloc::PIC_) { + const MCSection *StubSection = + OutContext.getMachOSection("__TEXT", "__picsymbolstub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 32, SectionKind::getText()); + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + OutStreamer.SwitchSection(StubSection); + EmitAlignment(4); + + MCSymbol *Stub = Stubs[i].first; + MCSymbol *RawSym = Stubs[i].second.getPointer(); + MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); + MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext); + + OutStreamer.EmitLabel(Stub); + OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + // FIXME: MCize this. + OutStreamer.EmitRawText(StringRef("\tmflr r0")); + OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName())); + OutStreamer.EmitLabel(AnonSymbol); + OutStreamer.EmitRawText(StringRef("\tmflr r11")); + OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+ + "-" + AnonSymbol->getName() + ")"); + OutStreamer.EmitRawText(StringRef("\tmtlr r0")); + + if (isPPC64) + OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) + + "-" + AnonSymbol->getName() + ")(r11)"); + else + OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) + + "-" + AnonSymbol->getName() + ")(r11)"); + OutStreamer.EmitRawText(StringRef("\tmtctr r12")); + OutStreamer.EmitRawText(StringRef("\tbctr")); + + OutStreamer.SwitchSection(LSPSection); + OutStreamer.EmitLabel(LazyPtr); + OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + + if (isPPC64) + OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper")); + else + OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper")); + } + OutStreamer.AddBlankLine(); + return; + } + + const MCSection *StubSection = + OutContext.getMachOSection("__TEXT","__symbol_stub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 16, SectionKind::getText()); + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + MCSymbol *Stub = Stubs[i].first; + MCSymbol *RawSym = Stubs[i].second.getPointer(); + MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); + + OutStreamer.SwitchSection(StubSection); + EmitAlignment(4); + OutStreamer.EmitLabel(Stub); + OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")"); + if (isPPC64) + OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) + + ")(r11)"); + else + OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) + + ")(r11)"); + OutStreamer.EmitRawText(StringRef("\tmtctr r12")); + OutStreamer.EmitRawText(StringRef("\tbctr")); + OutStreamer.SwitchSection(LSPSection); + OutStreamer.EmitLabel(LazyPtr); + OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + + if (isPPC64) + OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper")); + else + OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper")); + } + + OutStreamer.AddBlankLine(); +} + + +bool PPCDarwinAsmPrinter::doFinalization(Module &M) { + bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; + + // Darwin/PPC always uses mach-o. + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); + MachineModuleInfoMachO &MMIMacho = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList(); + if (!Stubs.empty()) + EmitFunctionStubs(Stubs); + + if (MAI->doesSupportExceptionHandling() && MMI) { + // Add the (possibly multiple) personalities to the set of global values. + // Only referenced functions get into the Personalities list. + const std::vector<const Function*> &Personalities = MMI->getPersonalities(); + for (std::vector<const Function*>::const_iterator I = Personalities.begin(), + E = Personalities.end(); I != E; ++I) { + if (*I) { + MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); + MachineModuleInfoImpl::StubValueTy &StubSym = + MMIMacho.getGVStubEntry(NLPSym); + StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true); + } + } + } + + // Output stubs for dynamically-linked functions. + Stubs = MMIMacho.GetGVStubList(); + + // Output macho stubs for external and common global variables. + if (!Stubs.empty()) { + // Switch with ".non_lazy_symbol_pointer" directive. + OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); + EmitAlignment(isPPC64 ? 3 : 2); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + // L_foo$stub: + OutStreamer.EmitLabel(Stubs[i].first); + // .indirect_symbol _foo + MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second; + OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol); + + if (MCSym.getInt()) + // External to current translation unit. + OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + else + // Internal to current translation unit. + // + // When we place the LSDA into the TEXT section, the type info pointers + // need to be indirect and pc-rel. We accomplish this by using NLPs. + // However, sometimes the types are local to the file. So we need to + // fill in the value for the NLP in those cases. + OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), + OutContext), + isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + } + + Stubs.clear(); + OutStreamer.AddBlankLine(); + } + + Stubs = MMIMacho.GetHiddenGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + EmitAlignment(isPPC64 ? 3 : 2); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + // L_foo$stub: + OutStreamer.EmitLabel(Stubs[i].first); + // .long _foo + OutStreamer.EmitValue(MCSymbolRefExpr:: + Create(Stubs[i].second.getPointer(), + OutContext), + isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + } + + Stubs.clear(); + OutStreamer.AddBlankLine(); + } + + // Funny Darwin hack: This flag tells the linker that no global symbols + // contain code that falls through to other global symbols (e.g. the obvious + // implementation of multiple entry points). If this doesn't occur, the + // linker can safely perform dead code stripping. Since LLVM never generates + // code that does this, it is always safe to set. + OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); + + return AsmPrinter::doFinalization(M); +} + +/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code +/// for a MachineFunction to the given output stream, in a format that the +/// Darwin assembler can deal with. +/// +static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm, + MCStreamer &Streamer) { + const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>(); + + if (Subtarget->isDarwin()) + return new PPCDarwinAsmPrinter(tm, Streamer); + return new PPCLinuxAsmPrinter(tm, Streamer); +} + +// Force static initialization. +extern "C" void LLVMInitializePowerPCAsmPrinter() { + TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass); + TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass); +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp new file mode 100644 index 0000000..475edf3 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -0,0 +1,174 @@ +//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that scans a machine function to determine which +// conditional branches need more than 16 bits of displacement to reach their +// target basic block. It does this in two passes; a calculation of basic block +// positions pass, and a branch pseudo op to machine branch opcode pass. This +// pass should be run last, just before the assembly printer. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ppc-branch-select" +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" +#include "MCTargetDesc/PPCPredicates.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +STATISTIC(NumExpanded, "Number of branches expanded to long format"); + +namespace { + struct PPCBSel : public MachineFunctionPass { + static char ID; + PPCBSel() : MachineFunctionPass(ID) {} + + /// BlockSizes - The sizes of the basic blocks in the function. + std::vector<unsigned> BlockSizes; + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "PowerPC Branch Selector"; + } + }; + char PPCBSel::ID = 0; +} + +/// createPPCBranchSelectionPass - returns an instance of the Branch Selection +/// Pass +/// +FunctionPass *llvm::createPPCBranchSelectionPass() { + return new PPCBSel(); +} + +bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { + const PPCInstrInfo *TII = + static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo()); + // Give the blocks of the function a dense, in-order, numbering. + Fn.RenumberBlocks(); + BlockSizes.resize(Fn.getNumBlockIDs()); + + // Measure each MBB and compute a size for the entire function. + unsigned FuncSize = 0; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock *MBB = MFI; + + unsigned BlockSize = 0; + for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); + MBBI != EE; ++MBBI) + BlockSize += TII->GetInstSizeInBytes(MBBI); + + BlockSizes[MBB->getNumber()] = BlockSize; + FuncSize += BlockSize; + } + + // If the entire function is smaller than the displacement of a branch field, + // we know we don't need to shrink any branches in this function. This is a + // common case. + if (FuncSize < (1 << 15)) { + BlockSizes.clear(); + return false; + } + + // For each conditional branch, if the offset to its destination is larger + // than the offset field allows, transform it into a long branch sequence + // like this: + // short branch: + // bCC MBB + // long branch: + // b!CC $PC+8 + // b MBB + // + bool MadeChange = true; + bool EverMadeChange = false; + while (MadeChange) { + // Iteratively expand branches until we reach a fixed point. + MadeChange = false; + + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + unsigned MBBStartOffset = 0; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) { + MBBStartOffset += TII->GetInstSizeInBytes(I); + continue; + } + + // Determine the offset from the current branch to the destination + // block. + MachineBasicBlock *Dest = I->getOperand(2).getMBB(); + + int BranchSize; + if (Dest->getNumber() <= MBB.getNumber()) { + // If this is a backwards branch, the delta is the offset from the + // start of this block to this branch, plus the sizes of all blocks + // from this block to the dest. + BranchSize = MBBStartOffset; + + for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i) + BranchSize += BlockSizes[i]; + } else { + // Otherwise, add the size of the blocks between this block and the + // dest to the number of bytes left in this block. + BranchSize = -MBBStartOffset; + + for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i) + BranchSize += BlockSizes[i]; + } + + // If this branch is in range, ignore it. + if (isInt<16>(BranchSize)) { + MBBStartOffset += 4; + continue; + } + + // Otherwise, we have to expand it to a long branch. + // The BCC operands are: + // 0. PPC branch predicate + // 1. CR register + // 2. Target MBB + PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); + unsigned CRReg = I->getOperand(1).getReg(); + + MachineInstr *OldBranch = I; + DebugLoc dl = OldBranch->getDebugLoc(); + + // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. + BuildMI(MBB, I, dl, TII->get(PPC::BCC)) + .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); + + // Uncond branch to the real destination. + I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest); + + // Remove the old branch from the function. + OldBranch->eraseFromParent(); + + // Remember that this instruction is 8-bytes, increase the size of the + // block by 4, remember to iterate. + BlockSizes[MBB.getNumber()] += 4; + MBBStartOffset += 8; + ++NumExpanded; + MadeChange = true; + } + } + EverMadeChange |= MadeChange; + } + + BlockSizes.clear(); + return true; +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td new file mode 100644 index 0000000..441db94 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -0,0 +1,132 @@ +//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PowerPC 32- and 64-bit +// architectures. +// +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget<string F, CCAction A> + : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention +//===----------------------------------------------------------------------===// + +// Return-value convention for PowerPC +def RetCC_PPC : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, + CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, + + CCIfType<[f32], CCAssignToReg<[F1]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2]>>, + + // Vector types are always returned in V2. + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> +]>; + + +//===----------------------------------------------------------------------===// +// PowerPC Argument Calling Conventions +//===----------------------------------------------------------------------===// +/* +def CC_PPC : CallingConv<[ + // The first 8 integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, + CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, + + // Common sub-targets passes FP values in F1 - F13 + CCIfType<[f32, f64], + CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>, + + // The first 12 Vector arguments are passed in altivec registers. + CCIfType<[v16i8, v8i16, v4i32, v4f32], + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>> + +/* + // Integer/FP values get stored in stack slots that are 8 bytes in size and + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToStack<16, 16>>*/ +]>; + +*/ + +//===----------------------------------------------------------------------===// +// PowerPC System V Release 4 ABI +//===----------------------------------------------------------------------===// + +def CC_PPC_SVR4_Common : CallingConv<[ + // The ABI requires i64 to be passed in two adjacent registers with the first + // register having an odd register number. + CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>, + + // The first 8 integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, + + // Make sure the i64 words from a long double are either both passed in + // registers or both passed on the stack. + CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>, + + // FP values are passed in F1 - F8. + CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + + // Split arguments have an alignment of 8 bytes on the stack. + CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>, + + CCIfType<[i32], CCAssignToStack<4, 4>>, + + // Floats are stored in double precision format, thus they have the same + // alignment and size as doubles. + CCIfType<[f32,f64], CCAssignToStack<8, 8>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>> +]>; + +// This calling convention puts vector arguments always on the stack. It is used +// to assign vector arguments which belong to the variable portion of the +// parameter list of a variable argument function. +def CC_PPC_SVR4_VarArg : CallingConv<[ + CCDelegateTo<CC_PPC_SVR4_Common> +]>; + +// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put +// vector arguments in vector registers before putting them on the stack. +def CC_PPC_SVR4 : CallingConv<[ + // The first 12 Vector arguments are passed in AltiVec registers. + CCIfType<[v16i8, v8i16, v4i32, v4f32], + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>, + + CCDelegateTo<CC_PPC_SVR4_Common> +]>; + +// Helper "calling convention" to handle aggregate by value arguments. +// Aggregate by value arguments are always placed in the local variable space +// of the caller. This calling convention is only used to assign those stack +// offsets in the callers stack frame. +// +// Still, the address of the aggregate copy in the callers stack frame is passed +// in a GPR (or in the parameter list area if all GPRs are allocated) from the +// caller to the callee. The location for the address argument is assigned by +// the CC_PPC_SVR4 calling convention. +// +// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are +// not passed by value. + +def CC_PPC_SVR4_ByVal : CallingConv<[ + CCIfByVal<CCPassByVal<4, 4>>, + + CCCustom<"CC_PPC_SVR4_Custom_Dummy"> +]>; + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp new file mode 100644 index 0000000..4a1f182 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -0,0 +1,261 @@ +//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to +// JIT-compile bitcode to native PowerPC. +// +//===----------------------------------------------------------------------===// + +#include "PPCTargetMachine.h" +#include "PPCRelocations.h" +#include "PPC.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +namespace { + class PPCCodeEmitter : public MachineFunctionPass { + TargetMachine &TM; + JITCodeEmitter &MCE; + MachineModuleInfo *MMI; + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineModuleInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + + /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record + /// its address in the function into this pointer. + void *MovePCtoLROffset; + public: + + PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(ID), TM(tm), MCE(mce) {} + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + + + MachineRelocation GetRelocation(const MachineOperand &MO, + unsigned RelocID) const; + + /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + + unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; + + unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; + + const char *getPassName() const { return "PowerPC Machine Code Emitter"; } + + /// runOnMachineFunction - emits the given MachineFunction to memory + /// + bool runOnMachineFunction(MachineFunction &MF); + + /// emitBasicBlock - emits the given MachineBasicBlock to memory + /// + void emitBasicBlock(MachineBasicBlock &MBB); + }; +} + +char PPCCodeEmitter::ID = 0; + +/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code +/// to the specified MCE object. +FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM, + JITCodeEmitter &JCE) { + return new PPCCodeEmitter(TM, JCE); +} + +bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) { + assert((MF.getTarget().getRelocationModel() != Reloc::Default || + MF.getTarget().getRelocationModel() != Reloc::Static) && + "JIT relocation model must be set to static or default!"); + + MMI = &getAnalysis<MachineModuleInfo>(); + MCE.setModuleInfo(MMI); + do { + MovePCtoLROffset = 0; + MCE.startFunction(MF); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + emitBasicBlock(*BB); + } while (MCE.finishFunction(MF)); + + return false; +} + +void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { + MCE.StartMachineBasicBlock(&MBB); + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){ + const MachineInstr &MI = *I; + MCE.processDebugLoc(MI.getDebugLoc(), true); + switch (MI.getOpcode()) { + default: + MCE.emitWordBE(getBinaryCodeForInstr(MI)); + break; + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + MCE.emitLabel(MI.getOperand(0).getMCSymbol()); + break; + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + break; // pseudo opcode, no side effects + case PPC::MovePCtoLR: + case PPC::MovePCtoLR8: + assert(TM.getRelocationModel() == Reloc::PIC_); + MovePCtoLROffset = (void*)MCE.getCurrentPCValue(); + MCE.emitWordBE(0x48000005); // bl 1 + break; + } + MCE.processDebugLoc(MI.getDebugLoc(), false); + } +} + +unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && + (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); + return 0x80 >> getPPCRegisterNumbering(MO.getReg()); +} + +MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, + unsigned RelocID) const { + // If in PIC mode, we need to encode the negated address of the + // 'movepctolr' into the unrelocated field. After relocation, we'll have + // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm + // field, we get &gv. This doesn't happen for branch relocations, which are + // always implicitly pc relative. + intptr_t Cst = 0; + if (TM.getRelocationModel() == Reloc::PIC_) { + assert(MovePCtoLROffset && "MovePCtoLR not seen yet?"); + Cst = -(intptr_t)MovePCtoLROffset - 4; + } + + if (MO.isGlobal()) + return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID, + const_cast<GlobalValue *>(MO.getGlobal()), + Cst, isa<Function>(MO.getGlobal())); + if (MO.isSymbol()) + return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + RelocID, MO.getSymbolName(), Cst); + if (MO.isCPI()) + return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + RelocID, MO.getIndex(), Cst); + + if (MO.isMBB()) + return MachineRelocation::getBB(MCE.getCurrentPCOffset(), + RelocID, MO.getMBB()); + + assert(MO.isJTI()); + return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + RelocID, MO.getIndex(), Cst); +} + +unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx)); + return 0; +} + +unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx)); + return 0; +} + +unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high)); + return 0; +} + +unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low)); + return 0; +} + +unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // Encode (imm, reg) as a memri, which has the low 16-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16; + + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits; + + // Add a fixup for the displacement field. + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low)); + return RegBits; +} + +unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // Encode (imm, reg) as a memrix, which has the low 14-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14; + + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits; + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix)); + return RegBits; +} + + +unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + + if (MO.isReg()) { + // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. + // The GPR operand should come through here though. + assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || + MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); + return getPPCRegisterNumbering(MO.getReg()); + } + + assert(MO.isImm() && + "Relocation required in an instruction that we cannot encode!"); + return MO.getImm(); +} + +#include "PPCGenCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp new file mode 100644 index 0000000..7dead10 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -0,0 +1,971 @@ +//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PPC implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "PPCFrameLowering.h" +#include "PPCInstrInfo.h" +#include "PPCMachineFunctionInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; + +// FIXME This disables some code that aligns the stack to a boundary bigger than +// the default (16 bytes on Darwin) when there is a stack local of greater +// alignment. This does not currently work, because the delta between old and +// new stack pointers is added to offsets that reference incoming parameters +// after the prolog is generated, and the code that does that doesn't handle a +// variable delta. You don't want to do that anyway; a better approach is to +// reserve another register that retains to the incoming stack pointer, and +// reference parameters relative to that. +#define ALIGN_STACK 0 + + +/// VRRegNo - Map from a numbered VR register to its enum value. +/// +static const unsigned short VRRegNo[] = { + PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , + PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 +}; + +/// RemoveVRSaveCode - We have found that this function does not need any code +/// to manipulate the VRSAVE register, even though it uses vector registers. +/// This can happen when the only registers used are known to be live in or out +/// of the function. Remove all of the VRSAVE related code from the function. +static void RemoveVRSaveCode(MachineInstr *MI) { + MachineBasicBlock *Entry = MI->getParent(); + MachineFunction *MF = Entry->getParent(); + + // We know that the MTVRSAVE instruction immediately follows MI. Remove it. + MachineBasicBlock::iterator MBBI = MI; + ++MBBI; + assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); + MBBI->eraseFromParent(); + + bool RemovedAllMTVRSAVEs = true; + // See if we can find and remove the MTVRSAVE instruction from all of the + // epilog blocks. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { + // If last instruction is a return instruction, add an epilogue + if (!I->empty() && I->back().getDesc().isReturn()) { + bool FoundIt = false; + for (MBBI = I->end(); MBBI != I->begin(); ) { + --MBBI; + if (MBBI->getOpcode() == PPC::MTVRSAVE) { + MBBI->eraseFromParent(); // remove it. + FoundIt = true; + break; + } + } + RemovedAllMTVRSAVEs &= FoundIt; + } + } + + // If we found and removed all MTVRSAVE instructions, remove the read of + // VRSAVE as well. + if (RemovedAllMTVRSAVEs) { + MBBI = MI; + assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); + --MBBI; + assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); + MBBI->eraseFromParent(); + } + + // Finally, nuke the UPDATE_VRSAVE. + MI->eraseFromParent(); +} + +// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the +// instruction selector. Based on the vector registers that have been used, +// transform this into the appropriate ORI instruction. +static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { + MachineFunction *MF = MI->getParent()->getParent(); + DebugLoc dl = MI->getDebugLoc(); + + unsigned UsedRegMask = 0; + for (unsigned i = 0; i != 32; ++i) + if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i])) + UsedRegMask |= 1 << (31-i); + + // Live in and live out values already must be in the mask, so don't bother + // marking them. + for (MachineRegisterInfo::livein_iterator + I = MF->getRegInfo().livein_begin(), + E = MF->getRegInfo().livein_end(); I != E; ++I) { + unsigned RegNo = getPPCRegisterNumbering(I->first); + if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. + UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. + } + for (MachineRegisterInfo::liveout_iterator + I = MF->getRegInfo().liveout_begin(), + E = MF->getRegInfo().liveout_end(); I != E; ++I) { + unsigned RegNo = getPPCRegisterNumbering(*I); + if (VRRegNo[RegNo] == *I) // If this really is a vector reg. + UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. + } + + // If no registers are used, turn this into a copy. + if (UsedRegMask == 0) { + // Remove all VRSAVE code. + RemoveVRSaveCode(MI); + return; + } + + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + + if ((UsedRegMask & 0xFFFF) == UsedRegMask) { + if (DstReg != SrcReg) + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) + .addReg(SrcReg) + .addImm(UsedRegMask); + else + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(UsedRegMask); + } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { + if (DstReg != SrcReg) + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg) + .addImm(UsedRegMask >> 16); + else + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(UsedRegMask >> 16); + } else { + if (DstReg != SrcReg) + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg) + .addImm(UsedRegMask >> 16); + else + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(UsedRegMask >> 16); + + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) + .addReg(DstReg, RegState::Kill) + .addImm(UsedRegMask & 0xFFFF); + } + + // Remove the old UPDATE_VRSAVE instruction. + MI->eraseFromParent(); +} + +/// determineFrameLayout - Determine the size of the frame and maximum call +/// frame size. +void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo + unsigned FrameSize = MFI->getStackSize(); + + // Get the alignments provided by the target, and the maximum alignment + // (if any) of the fixed frame objects. + unsigned MaxAlign = MFI->getMaxAlignment(); + unsigned TargetAlign = getStackAlignment(); + unsigned AlignMask = TargetAlign - 1; // + + // If we are a leaf function, and use up to 224 bytes of stack space, + // don't have a frame pointer, calls, or dynamic alloca then we do not need + // to adjust the stack pointer (we fit in the Red Zone). + bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); + // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. + if (!DisableRedZone && + FrameSize <= 224 && // Fits in red zone. + !MFI->hasVarSizedObjects() && // No dynamic alloca. + !MFI->adjustsStack() && // No calls. + (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. + // No need for frame + MFI->setStackSize(0); + return; + } + + // Get the maximum call frame size of all the calls. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + + // Maximum call frame needs to be at least big enough for linkage and 8 args. + unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(), + Subtarget.isDarwinABI()); + maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); + + // If we have dynamic alloca then maxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI->hasVarSizedObjects()) + maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; + + // Update maximum call frame size. + MFI->setMaxCallFrameSize(maxCallFrameSize); + + // Include call frame size in total. + FrameSize += maxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = (FrameSize + AlignMask) & ~AlignMask; + + // Update frame info. + MFI->setStackSize(FrameSize); +} + +// hasFP - Return true if the specified function actually has a dedicated frame +// pointer register. +bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + // FIXME: This is pretty much broken by design: hasFP() might be called really + // early, before the stack layout was calculated and thus hasFP() might return + // true or false here depending on the time of call. + return (MFI->getStackSize()) && needsFP(MF); +} + +// needsFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Naked functions have no stack frame pushed, so we don't have a frame + // pointer. + if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + return false; + + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || + (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); +} + + +void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + + MachineModuleInfo &MMI = MF.getMMI(); + DebugLoc dl; + bool needsFrameMoves = MMI.hasDebugInfo() || + MF.getFunction()->needsUnwindTableEntry(); + + // Prepare for frame info. + MCSymbol *FrameLabel = 0; + + // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, + // process it. + for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { + if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { + HandleVRSaveUpdate(MBBI, TII); + break; + } + } + + // Move MBBI back to the beginning of the function. + MBBI = MBB.begin(); + + // Work out frame sizes. + // FIXME: determineFrameLayout() may change the frame size. This should be + // moved upper, to some hook. + determineFrameLayout(MF); + unsigned FrameSize = MFI->getStackSize(); + + int NegFrameSize = -FrameSize; + + // Get processor type. + bool isPPC64 = Subtarget.isPPC64(); + // Get operating system + bool isDarwinABI = Subtarget.isDarwinABI(); + // Check if the link register (LR) must be saved. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + bool MustSaveLR = FI->mustSaveLR(); + // Do we have a frame pointer for this function? + bool HasFP = hasFP(MF); + + int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); + + int FPOffset = 0; + if (HasFP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = FFI->getObjectOffset(FPIndex); + } else { + FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); + } + } + + if (isPPC64) { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) + .addReg(PPC::X31) + .addImm(FPOffset/4) + .addReg(PPC::X1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) + .addReg(PPC::X0) + .addImm(LROffset / 4) + .addReg(PPC::X1); + } else { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) + .addReg(PPC::R31) + .addImm(FPOffset) + .addReg(PPC::R1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) + .addReg(PPC::R0) + .addImm(LROffset) + .addReg(PPC::R1); + } + + // Skip if a leaf routine. + if (!FrameSize) return; + + // Get stack alignments. + unsigned TargetAlign = getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Adjust stack pointer: r1 += NegFrameSize. + // If there is a preferred stack alignment, align R1 now + if (!isPPC64) { + // PPC32. + if (ALIGN_STACK && MaxAlign > TargetAlign) { + assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && + "Invalid alignment!"); + assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); + + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0) + .addReg(PPC::R1) + .addImm(0) + .addImm(32 - Log2_32(MaxAlign)) + .addImm(31); + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(NegFrameSize); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + .addReg(PPC::R1) + .addReg(PPC::R1) + .addReg(PPC::R0); + } else if (isInt<16>(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) + .addReg(PPC::R1) + .addImm(NegFrameSize) + .addReg(PPC::R1); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + .addReg(PPC::R1) + .addReg(PPC::R1) + .addReg(PPC::R0); + } + } else { // PPC64. + if (ALIGN_STACK && MaxAlign > TargetAlign) { + assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && + "Invalid alignment!"); + assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); + + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0) + .addReg(PPC::X1) + .addImm(0) + .addImm(64 - Log2_32(MaxAlign)); + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) + .addReg(PPC::X0) + .addImm(NegFrameSize); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + .addReg(PPC::X1) + .addReg(PPC::X1) + .addReg(PPC::X0); + } else if (isInt<16>(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) + .addReg(PPC::X1) + .addImm(NegFrameSize / 4) + .addReg(PPC::X1); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) + .addReg(PPC::X0, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + .addReg(PPC::X1) + .addReg(PPC::X1) + .addReg(PPC::X0); + } + } + + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + + // Add the "machine moves" for the instructions we generated above, but in + // reverse order. + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer becomes valid. + FrameLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel); + + // Show update of SP. + if (NegFrameSize) { + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize); + Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); + } else { + MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31); + Moves.push_back(MachineMove(FrameLabel, SP, SP)); + } + + if (HasFP) { + MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset); + MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31); + Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); + } + + if (MustSaveLR) { + MachineLocation LRDst(MachineLocation::VirtualFP, LROffset); + MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR); + Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc)); + } + } + + MCSymbol *ReadyLabel = 0; + + // If there is a frame pointer, copy R1 into R31 + if (HasFP) { + if (!isPPC64) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31) + .addReg(PPC::R1) + .addReg(PPC::R1); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31) + .addReg(PPC::X1) + .addReg(PPC::X1); + } + + if (needsFrameMoves) { + ReadyLabel = MMI.getContext().CreateTempSymbol(); + + // Mark effective beginning of when frame pointer is ready. + BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel); + + MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) : + (isPPC64 ? PPC::X1 : PPC::R1)); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc)); + } + } + + if (needsFrameMoves) { + MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel; + + // Add callee saved registers to move list. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); + unsigned Reg = CSI[I].getReg(); + if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; + + // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just + // subregisters of CR2. We just need to emit a move of CR2. + if (Reg == PPC::CR2LT || Reg == PPC::CR2GT || Reg == PPC::CR2EQ) + continue; + if (Reg == PPC::CR2UN) + Reg = PPC::CR2; + + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(Label, CSDst, CSSrc)); + } + } +} + +void PPCFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI != MBB.end() && "Returning block has no terminator"); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc dl; + + assert((RetOpcode == PPC::BLR || + RetOpcode == PPC::TCRETURNri || + RetOpcode == PPC::TCRETURNdi || + RetOpcode == PPC::TCRETURNai || + RetOpcode == PPC::TCRETURNri8 || + RetOpcode == PPC::TCRETURNdi8 || + RetOpcode == PPC::TCRETURNai8) && + "Can only insert epilog into returning blocks"); + + // Get alignment info so we know how to restore r1 + const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned TargetAlign = getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Get the number of bytes allocated from the FrameInfo. + int FrameSize = MFI->getStackSize(); + + // Get processor type. + bool isPPC64 = Subtarget.isPPC64(); + // Get operating system + bool isDarwinABI = Subtarget.isDarwinABI(); + // Check if the link register (LR) has been saved. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + bool MustSaveLR = FI->mustSaveLR(); + // Do we have a frame pointer for this function? + bool HasFP = hasFP(MF); + + int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); + + int FPOffset = 0; + if (HasFP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = FFI->getObjectOffset(FPIndex); + } else { + FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); + } + } + + bool UsesTCRet = RetOpcode == PPC::TCRETURNri || + RetOpcode == PPC::TCRETURNdi || + RetOpcode == PPC::TCRETURNai || + RetOpcode == PPC::TCRETURNri8 || + RetOpcode == PPC::TCRETURNdi8 || + RetOpcode == PPC::TCRETURNai8; + + if (UsesTCRet) { + int MaxTCRetDelta = FI->getTailCallSPDelta(); + MachineOperand &StackAdjust = MBBI->getOperand(1); + assert(StackAdjust.isImm() && "Expecting immediate value."); + // Adjust stack pointer. + int StackAdj = StackAdjust.getImm(); + int Delta = StackAdj - MaxTCRetDelta; + assert((Delta >= 0) && "Delta must be positive"); + if (MaxTCRetDelta>0) + FrameSize += (StackAdj +Delta); + else + FrameSize += StackAdj; + } + + if (FrameSize) { + // The loaded (or persistent) stack pointer value is offset by the 'stwu' + // on entry to the function. Add this offset back now. + if (!isPPC64) { + // If this function contained a fastcc call and GuaranteedTailCallOpt is + // enabled (=> hasFastCall()==true) the fastcc call might contain a tail + // call which invalidates the stack pointer value in SP(0). So we use the + // value of R31 in this case. + if (FI->hasFastCall() && isInt<16>(FrameSize)) { + assert(hasFP(MF) && "Expecting a valid the frame pointer."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) + .addReg(PPC::R31).addImm(FrameSize); + } else if(FI->hasFastCall()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(FrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4)) + .addReg(PPC::R1) + .addReg(PPC::R31) + .addReg(PPC::R0); + } else if (isInt<16>(FrameSize) && + (!ALIGN_STACK || TargetAlign >= MaxAlign) && + !MFI->hasVarSizedObjects()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) + .addReg(PPC::R1).addImm(FrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1) + .addImm(0).addReg(PPC::R1); + } + } else { + if (FI->hasFastCall() && isInt<16>(FrameSize)) { + assert(hasFP(MF) && "Expecting a valid the frame pointer."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) + .addReg(PPC::X31).addImm(FrameSize); + } else if(FI->hasFastCall()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) + .addReg(PPC::X0, RegState::Kill) + .addImm(FrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8)) + .addReg(PPC::X1) + .addReg(PPC::X31) + .addReg(PPC::X0); + } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign && + !MFI->hasVarSizedObjects()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) + .addReg(PPC::X1).addImm(FrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1) + .addImm(0).addReg(PPC::X1); + } + } + } + + if (isPPC64) { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0) + .addImm(LROffset/4).addReg(PPC::X1); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31) + .addImm(FPOffset/4).addReg(PPC::X1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0); + } else { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0) + .addImm(LROffset).addReg(PPC::R1); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31) + .addImm(FPOffset).addReg(PPC::R1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0); + } + + // Callee pop calling convention. Pop parameter/linkage area. Used for tail + // call optimization + if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR && + MF.getFunction()->getCallingConv() == CallingConv::Fast) { + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + unsigned CallerAllocatedAmt = FI->getMinReservedArea(); + unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1; + unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; + unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI; + + if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { + BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg) + .addReg(StackReg).addImm(CallerAllocatedAmt); + } else { + BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) + .addImm(CallerAllocatedAmt >> 16); + BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) + .addReg(TmpReg, RegState::Kill) + .addImm(CallerAllocatedAmt & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(ADDInstr)) + .addReg(StackReg) + .addReg(FPReg) + .addReg(TmpReg); + } + } else if (RetOpcode == PPC::TCRETURNdi) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri) { + MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); + } else if (RetOpcode == PPC::TCRETURNai) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); + } else if (RetOpcode == PPC::TCRETURNdi8) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri8) { + MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); + } else if (RetOpcode == PPC::TCRETURNai8) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); + } +} + +static bool spillsCR(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->isCRSpilled(); +} + +/// MustSaveLR - Return true if this function requires that we save the LR +/// register onto the stack in the prolog and restore it in the epilog of the +/// function. +static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { + const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); + + // We need a save/restore of LR if there is any def of LR (which is + // defined by calls, including the PIC setup sequence), or if there is + // some use of the LR stack slot (e.g. for builtin_return_address). + // (LR comes in 32 and 64 bit versions.) + MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); + return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); +} + +void +PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + + // Save and clear the LR state. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + unsigned LR = RegInfo->getRARegister(); + FI->setMustSaveLR(MustSaveLR(MF, LR)); + MF.getRegInfo().setPhysRegUnused(LR); + + // Save R31 if necessary + int FPSI = FI->getFramePointerSaveIndex(); + bool isPPC64 = Subtarget.isPPC64(); + bool isDarwinABI = Subtarget.isDarwinABI(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // If the frame pointer save index hasn't been defined yet. + if (!FPSI && needsFP(MF)) { + // Find out what the fix offset of the frame pointer save area. + int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI); + // Allocate the frame index for frame pointer save area. + FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); + // Save the result. + FI->setFramePointerSaveIndex(FPSI); + } + + // Reserve stack space to move the linkage area to in case of a tail call. + int TCSPDelta = 0; + if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { + MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); + } + + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or + // a large stack, which will require scavenging a register to materialize a + // large offset. + // FIXME: this doesn't actually check stack size, so is a bit pessimistic + // FIXME: doesn't detect whether or not we need to spill vXX, which requires + // r0 for now. + + if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable. + if (needsFP(MF) || spillsCR(MF)) { + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } +} + +void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) + const { + // Early exit if not using the SVR4 ABI. + if (!Subtarget.isSVR4ABI()) + return; + + // Get callee saved register information. + MachineFrameInfo *FFI = MF.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); + + // Early exit if no callee saved registers are modified! + if (CSI.empty() && !needsFP(MF)) { + return; + } + + unsigned MinGPR = PPC::R31; + unsigned MinG8R = PPC::X31; + unsigned MinFPR = PPC::F31; + unsigned MinVR = PPC::V31; + + bool HasGPSaveArea = false; + bool HasG8SaveArea = false; + bool HasFPSaveArea = false; + bool HasCRSaveArea = false; + bool HasVRSAVESaveArea = false; + bool HasVRSaveArea = false; + + SmallVector<CalleeSavedInfo, 18> GPRegs; + SmallVector<CalleeSavedInfo, 18> G8Regs; + SmallVector<CalleeSavedInfo, 18> FPRegs; + SmallVector<CalleeSavedInfo, 18> VRegs; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (PPC::GPRCRegisterClass->contains(Reg)) { + HasGPSaveArea = true; + + GPRegs.push_back(CSI[i]); + + if (Reg < MinGPR) { + MinGPR = Reg; + } + } else if (PPC::G8RCRegisterClass->contains(Reg)) { + HasG8SaveArea = true; + + G8Regs.push_back(CSI[i]); + + if (Reg < MinG8R) { + MinG8R = Reg; + } + } else if (PPC::F8RCRegisterClass->contains(Reg)) { + HasFPSaveArea = true; + + FPRegs.push_back(CSI[i]); + + if (Reg < MinFPR) { + MinFPR = Reg; + } +// FIXME SVR4: Disable CR save area for now. + } else if (PPC::CRBITRCRegisterClass->contains(Reg) + || PPC::CRRCRegisterClass->contains(Reg)) { +// HasCRSaveArea = true; + } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) { + HasVRSAVESaveArea = true; + } else if (PPC::VRRCRegisterClass->contains(Reg)) { + HasVRSaveArea = true; + + VRegs.push_back(CSI[i]); + + if (Reg < MinVR) { + MinVR = Reg; + } + } else { + llvm_unreachable("Unknown RegisterClass!"); + } + } + + PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); + + int64_t LowerBound = 0; + + // Take into account stack space reserved for tail calls. + int TCSPDelta = 0; + if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { + LowerBound = TCSPDelta; + } + + // The Floating-point register save area is right below the back chain word + // of the previous stack frame. + if (HasFPSaveArea) { + for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { + int FI = FPRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8; + } + + // Check whether the frame pointer register is allocated. If so, make sure it + // is spilled to the correct offset. + if (needsFP(MF)) { + HasGPSaveArea = true; + + int FI = PFI->getFramePointerSaveIndex(); + assert(FI && "No Frame Pointer Save Slot!"); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + // General register save area starts right below the Floating-point + // register save area. + if (HasGPSaveArea || HasG8SaveArea) { + // Move general register save area spill slots down, taking into account + // the size of the Floating-point register save area. + for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { + int FI = GPRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + // Move general register save area spill slots down, taking into account + // the size of the Floating-point register save area. + for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { + int FI = G8Regs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + unsigned MinReg = + std::min<unsigned>(getPPCRegisterNumbering(MinGPR), + getPPCRegisterNumbering(MinG8R)); + + if (Subtarget.isPPC64()) { + LowerBound -= (31 - MinReg + 1) * 8; + } else { + LowerBound -= (31 - MinReg + 1) * 4; + } + } + + // The CR save area is below the general register save area. + if (HasCRSaveArea) { + // FIXME SVR4: Is it actually possible to have multiple elements in CSI + // which have the CR/CRBIT register class? + // Adjust the frame index of the CR spill slot. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + + if (PPC::CRBITRCRegisterClass->contains(Reg) || + PPC::CRRCRegisterClass->contains(Reg)) { + int FI = CSI[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } + + LowerBound -= 4; // The CR save area is always 4 bytes long. + } + + if (HasVRSAVESaveArea) { + // FIXME SVR4: Is it actually possible to have multiple elements in CSI + // which have the VRSAVE register class? + // Adjust the frame index of the VRSAVE spill slot. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + + if (PPC::VRSAVERCRegisterClass->contains(Reg)) { + int FI = CSI[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } + + LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. + } + + if (HasVRSaveArea) { + // Insert alignment padding, we need 16-byte alignment. + LowerBound = (LowerBound - 15) & ~(15); + + for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { + int FI = VRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h new file mode 100644 index 0000000..20faa71 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -0,0 +1,321 @@ +//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC_FRAMEINFO_H +#define POWERPC_FRAMEINFO_H + +#include "PPC.h" +#include "PPCSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/STLExtras.h" + +namespace llvm { + class PPCSubtarget; + +class PPCFrameLowering: public TargetFrameLowering { + const PPCSubtarget &Subtarget; + +public: + PPCFrameLowering(const PPCSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0), + Subtarget(sti) { + } + + void determineFrameLayout(MachineFunction &MF) const; + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool hasFP(const MachineFunction &MF) const; + bool needsFP(const MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + /// targetHandlesStackFrameRounding - Returns true if the target is + /// responsible for rounding up the stack frame (probably at emitPrologue + /// time). + bool targetHandlesStackFrameRounding() const { return true; } + + /// getReturnSaveOffset - Return the previous frame offset to save the + /// return address. + static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) { + if (isDarwinABI) + return isPPC64 ? 16 : 8; + // SVR4 ABI: + return isPPC64 ? 16 : 4; + } + + /// getFramePointerSaveOffset - Return the previous frame offset to save the + /// frame pointer. + static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) { + // For the Darwin ABI: + // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area + // for saving the frame pointer (if needed.) While the published ABI has + // not used this slot since at least MacOSX 10.2, there is older code + // around that does use it, and that needs to continue to work. + if (isDarwinABI) + return isPPC64 ? -8U : -4U; + + // SVR4 ABI: First slot in the general register save area. + return isPPC64 ? -8U : -4U; + } + + /// getLinkageSize - Return the size of the PowerPC ABI linkage area. + /// + static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) { + if (isDarwinABI || isPPC64) + return 6 * (isPPC64 ? 8 : 4); + + // SVR4 ABI: + return 8; + } + + /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI + /// argument area. + static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) { + // For the Darwin ABI / 64-bit SVR4 ABI: + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + if (isDarwinABI || isPPC64) + return 8 * (isPPC64 ? 8 : 4); + + // 32-bit SVR4 ABI: + // There is no default stack allocated for the 8 first GPR arguments. + return 0; + } + + /// getMinCallFrameSize - Return the minimum size a call frame can be using + /// the PowerPC ABI. + static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) { + // The call frame needs to be at least big enough for linkage and 8 args. + return getLinkageSize(isPPC64, isDarwinABI) + + getMinCallArgumentsSize(isPPC64, isDarwinABI); + } + + // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. + const SpillSlot * + getCalleeSavedSpillSlots(unsigned &NumEntries) const { + if (Subtarget.isDarwinABI()) { + NumEntries = 1; + if (Subtarget.isPPC64()) { + static const SpillSlot darwin64Offsets = {PPC::X31, -8}; + return &darwin64Offsets; + } else { + static const SpillSlot darwinOffsets = {PPC::R31, -4}; + return &darwinOffsets; + } + } + + // Early exit if not using the SVR4 ABI. + if (!Subtarget.isSVR4ABI()) { + NumEntries = 0; + return 0; + } + + static const SpillSlot Offsets[] = { + // Floating-point register save area offsets. + {PPC::F31, -8}, + {PPC::F30, -16}, + {PPC::F29, -24}, + {PPC::F28, -32}, + {PPC::F27, -40}, + {PPC::F26, -48}, + {PPC::F25, -56}, + {PPC::F24, -64}, + {PPC::F23, -72}, + {PPC::F22, -80}, + {PPC::F21, -88}, + {PPC::F20, -96}, + {PPC::F19, -104}, + {PPC::F18, -112}, + {PPC::F17, -120}, + {PPC::F16, -128}, + {PPC::F15, -136}, + {PPC::F14, -144}, + + // General register save area offsets. + {PPC::R31, -4}, + {PPC::R30, -8}, + {PPC::R29, -12}, + {PPC::R28, -16}, + {PPC::R27, -20}, + {PPC::R26, -24}, + {PPC::R25, -28}, + {PPC::R24, -32}, + {PPC::R23, -36}, + {PPC::R22, -40}, + {PPC::R21, -44}, + {PPC::R20, -48}, + {PPC::R19, -52}, + {PPC::R18, -56}, + {PPC::R17, -60}, + {PPC::R16, -64}, + {PPC::R15, -68}, + {PPC::R14, -72}, + + // CR save area offset. + // FIXME SVR4: Disable CR save area for now. +// {PPC::CR2, -4}, +// {PPC::CR3, -4}, +// {PPC::CR4, -4}, +// {PPC::CR2LT, -4}, +// {PPC::CR2GT, -4}, +// {PPC::CR2EQ, -4}, +// {PPC::CR2UN, -4}, +// {PPC::CR3LT, -4}, +// {PPC::CR3GT, -4}, +// {PPC::CR3EQ, -4}, +// {PPC::CR3UN, -4}, +// {PPC::CR4LT, -4}, +// {PPC::CR4GT, -4}, +// {PPC::CR4EQ, -4}, +// {PPC::CR4UN, -4}, + + // VRSAVE save area offset. + {PPC::VRSAVE, -4}, + + // Vector register save area + {PPC::V31, -16}, + {PPC::V30, -32}, + {PPC::V29, -48}, + {PPC::V28, -64}, + {PPC::V27, -80}, + {PPC::V26, -96}, + {PPC::V25, -112}, + {PPC::V24, -128}, + {PPC::V23, -144}, + {PPC::V22, -160}, + {PPC::V21, -176}, + {PPC::V20, -192} + }; + + static const SpillSlot Offsets64[] = { + // Floating-point register save area offsets. + {PPC::F31, -8}, + {PPC::F30, -16}, + {PPC::F29, -24}, + {PPC::F28, -32}, + {PPC::F27, -40}, + {PPC::F26, -48}, + {PPC::F25, -56}, + {PPC::F24, -64}, + {PPC::F23, -72}, + {PPC::F22, -80}, + {PPC::F21, -88}, + {PPC::F20, -96}, + {PPC::F19, -104}, + {PPC::F18, -112}, + {PPC::F17, -120}, + {PPC::F16, -128}, + {PPC::F15, -136}, + {PPC::F14, -144}, + + // General register save area offsets. + // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit + // mode? + {PPC::R31, -4}, + {PPC::R30, -12}, + {PPC::R29, -20}, + {PPC::R28, -28}, + {PPC::R27, -36}, + {PPC::R26, -44}, + {PPC::R25, -52}, + {PPC::R24, -60}, + {PPC::R23, -68}, + {PPC::R22, -76}, + {PPC::R21, -84}, + {PPC::R20, -92}, + {PPC::R19, -100}, + {PPC::R18, -108}, + {PPC::R17, -116}, + {PPC::R16, -124}, + {PPC::R15, -132}, + {PPC::R14, -140}, + + {PPC::X31, -8}, + {PPC::X30, -16}, + {PPC::X29, -24}, + {PPC::X28, -32}, + {PPC::X27, -40}, + {PPC::X26, -48}, + {PPC::X25, -56}, + {PPC::X24, -64}, + {PPC::X23, -72}, + {PPC::X22, -80}, + {PPC::X21, -88}, + {PPC::X20, -96}, + {PPC::X19, -104}, + {PPC::X18, -112}, + {PPC::X17, -120}, + {PPC::X16, -128}, + {PPC::X15, -136}, + {PPC::X14, -144}, + + // CR save area offset. + // FIXME SVR4: Disable CR save area for now. +// {PPC::CR2, -4}, +// {PPC::CR3, -4}, +// {PPC::CR4, -4}, +// {PPC::CR2LT, -4}, +// {PPC::CR2GT, -4}, +// {PPC::CR2EQ, -4}, +// {PPC::CR2UN, -4}, +// {PPC::CR3LT, -4}, +// {PPC::CR3GT, -4}, +// {PPC::CR3EQ, -4}, +// {PPC::CR3UN, -4}, +// {PPC::CR4LT, -4}, +// {PPC::CR4GT, -4}, +// {PPC::CR4EQ, -4}, +// {PPC::CR4UN, -4}, + + // VRSAVE save area offset. + {PPC::VRSAVE, -4}, + + // Vector register save area + {PPC::V31, -16}, + {PPC::V30, -32}, + {PPC::V29, -48}, + {PPC::V28, -64}, + {PPC::V27, -80}, + {PPC::V26, -96}, + {PPC::V25, -112}, + {PPC::V24, -128}, + {PPC::V23, -144}, + {PPC::V22, -160}, + {PPC::V21, -176}, + {PPC::V20, -192} + }; + + if (Subtarget.isPPC64()) { + NumEntries = array_lengthof(Offsets64); + + return Offsets64; + } else { + NumEntries = array_lengthof(Offsets); + + return Offsets; + } + } +}; + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp new file mode 100644 index 0000000..cddc9d8 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -0,0 +1,308 @@ +//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements hazard recognizers for scheduling on PowerPC processors. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "PPCHazardRecognizers.h" +#include "PPC.h" +#include "PPCInstrInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// PowerPC 970 Hazard Recognizer +// +// This models the dispatch group formation of the PPC970 processor. Dispatch +// groups are bundles of up to five instructions that can contain various mixes +// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one +// branch instruction per-cycle. +// +// There are a number of restrictions to dispatch group formation: some +// instructions can only be issued in the first slot of a dispatch group, & some +// instructions fill an entire dispatch group. Additionally, only branches can +// issue in the 5th (last) slot. +// +// Finally, there are a number of "structural" hazards on the PPC970. These +// conditions cause large performance penalties due to misprediction, recovery, +// and replay logic that has to happen. These cases include setting a CTR and +// branching through it in the same dispatch group, and storing to an address, +// then loading from the same address within a dispatch group. To avoid these +// conditions, we insert no-op instructions when appropriate. +// +// FIXME: This is missing some significant cases: +// 1. Modeling of microcoded instructions. +// 2. Handling of serialized operations. +// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". +// + +PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) + : TII(tii) { + EndDispatchGroup(); +} + +void PPCHazardRecognizer970::EndDispatchGroup() { + DEBUG(errs() << "=== Start of dispatch group\n"); + NumIssued = 0; + + // Structural hazard info. + HasCTRSet = false; + NumStores = 0; +} + + +PPCII::PPC970_Unit +PPCHazardRecognizer970::GetInstrType(unsigned Opcode, + bool &isFirst, bool &isSingle, + bool &isCracked, + bool &isLoad, bool &isStore) { + if ((int)Opcode >= 0) { + isFirst = isSingle = isCracked = isLoad = isStore = false; + return PPCII::PPC970_Pseudo; + } + Opcode = ~Opcode; + + const MCInstrDesc &MCID = TII.get(Opcode); + + isLoad = MCID.mayLoad(); + isStore = MCID.mayStore(); + + uint64_t TSFlags = MCID.TSFlags; + + isFirst = TSFlags & PPCII::PPC970_First; + isSingle = TSFlags & PPCII::PPC970_Single; + isCracked = TSFlags & PPCII::PPC970_Cracked; + return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask); +} + +/// isLoadOfStoredAddress - If we have a load from the previously stored pointer +/// as indicated by StorePtr1/StorePtr2/StoreSize, return true. +bool PPCHazardRecognizer970:: +isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { + for (unsigned i = 0, e = NumStores; i != e; ++i) { + // Handle exact and commuted addresses. + if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i]) + return true; + if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i]) + return true; + + // Okay, we don't have an exact match, if this is an indexed offset, see if + // we have overlap (which happens during fp->int conversion for example). + if (StorePtr2[i] == Ptr2) { + if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i])) + if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) { + // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check + // to see if the load and store actually overlap. + int StoreOffs = StoreOffset->getZExtValue(); + int LoadOffs = LoadOffset->getZExtValue(); + if (StoreOffs < LoadOffs) { + if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true; + } else { + if (int(LoadOffs+LoadSize) > StoreOffs) return true; + } + } + } + } + return false; +} + +/// getHazardType - We return hazard for any non-branch instruction that would +/// terminate the dispatch group. We turn NoopHazard for any +/// instructions that wouldn't terminate the dispatch group that would cause a +/// pipeline flush. +ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: +getHazardType(SUnit *SU, int Stalls) { + assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); + + const SDNode *Node = SU->getNode()->getGluedMachineNode(); + bool isFirst, isSingle, isCracked, isLoad, isStore; + PPCII::PPC970_Unit InstrType = + GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, + isLoad, isStore); + if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; + unsigned Opcode = Node->getMachineOpcode(); + + // We can only issue a PPC970_First/PPC970_Single instruction (such as + // crand/mtspr/etc) if this is the first cycle of the dispatch group. + if (NumIssued != 0 && (isFirst || isSingle)) + return Hazard; + + // If this instruction is cracked into two ops by the decoder, we know that + // it is not a branch and that it cannot issue if 3 other instructions are + // already in the dispatch group. + if (isCracked && NumIssued > 2) + return Hazard; + + switch (InstrType) { + default: llvm_unreachable("Unknown instruction type!"); + case PPCII::PPC970_FXU: + case PPCII::PPC970_LSU: + case PPCII::PPC970_FPU: + case PPCII::PPC970_VALU: + case PPCII::PPC970_VPERM: + // We can only issue a branch as the last instruction in a group. + if (NumIssued == 4) return Hazard; + break; + case PPCII::PPC970_CRU: + // We can only issue a CR instruction in the first two slots. + if (NumIssued >= 2) return Hazard; + break; + case PPCII::PPC970_BRU: + break; + } + + // Do not allow MTCTR and BCTRL to be in the same dispatch group. + if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4)) + return NoopHazard; + + // If this is a load following a store, make sure it's not to the same or + // overlapping address. + if (isLoad && NumStores) { + unsigned LoadSize; + switch (Opcode) { + default: llvm_unreachable("Unknown load!"); + case PPC::LBZ: case PPC::LBZU: + case PPC::LBZX: + case PPC::LBZ8: case PPC::LBZU8: + case PPC::LBZX8: + case PPC::LVEBX: + LoadSize = 1; + break; + case PPC::LHA: case PPC::LHAU: + case PPC::LHAX: + case PPC::LHZ: case PPC::LHZU: + case PPC::LHZX: + case PPC::LVEHX: + case PPC::LHBRX: + case PPC::LHA8: case PPC::LHAU8: + case PPC::LHAX8: + case PPC::LHZ8: case PPC::LHZU8: + case PPC::LHZX8: + LoadSize = 2; + break; + case PPC::LFS: case PPC::LFSU: + case PPC::LFSX: + case PPC::LWZ: case PPC::LWZU: + case PPC::LWZX: + case PPC::LWA: + case PPC::LWAX: + case PPC::LVEWX: + case PPC::LWBRX: + case PPC::LWZ8: + case PPC::LWZX8: + LoadSize = 4; + break; + case PPC::LFD: case PPC::LFDU: + case PPC::LFDX: + case PPC::LD: case PPC::LDU: + case PPC::LDX: + LoadSize = 8; + break; + case PPC::LVX: + case PPC::LVXL: + LoadSize = 16; + break; + } + + if (isLoadOfStoredAddress(LoadSize, + Node->getOperand(0), Node->getOperand(1))) + return NoopHazard; + } + + return NoHazard; +} + +void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { + const SDNode *Node = SU->getNode()->getGluedMachineNode(); + bool isFirst, isSingle, isCracked, isLoad, isStore; + PPCII::PPC970_Unit InstrType = + GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, + isLoad, isStore); + if (InstrType == PPCII::PPC970_Pseudo) return; + unsigned Opcode = Node->getMachineOpcode(); + + // Update structural hazard information. + if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true; + + // Track the address stored to. + if (isStore) { + unsigned ThisStoreSize; + switch (Opcode) { + default: llvm_unreachable("Unknown store instruction!"); + case PPC::STB: case PPC::STB8: + case PPC::STBU: case PPC::STBU8: + case PPC::STBX: case PPC::STBX8: + case PPC::STVEBX: + ThisStoreSize = 1; + break; + case PPC::STH: case PPC::STH8: + case PPC::STHU: case PPC::STHU8: + case PPC::STHX: case PPC::STHX8: + case PPC::STVEHX: + case PPC::STHBRX: + ThisStoreSize = 2; + break; + case PPC::STFS: + case PPC::STFSU: + case PPC::STFSX: + case PPC::STWX: case PPC::STWX8: + case PPC::STWUX: + case PPC::STW: case PPC::STW8: + case PPC::STWU: + case PPC::STVEWX: + case PPC::STFIWX: + case PPC::STWBRX: + ThisStoreSize = 4; + break; + case PPC::STD_32: + case PPC::STDX_32: + case PPC::STD: + case PPC::STDU: + case PPC::STFD: + case PPC::STFDX: + case PPC::STDX: + case PPC::STDUX: + ThisStoreSize = 8; + break; + case PPC::STVX: + case PPC::STVXL: + ThisStoreSize = 16; + break; + } + + StoreSize[NumStores] = ThisStoreSize; + StorePtr1[NumStores] = Node->getOperand(1); + StorePtr2[NumStores] = Node->getOperand(2); + ++NumStores; + } + + if (InstrType == PPCII::PPC970_BRU || isSingle) + NumIssued = 4; // Terminate a d-group. + ++NumIssued; + + // If this instruction is cracked into two ops by the decoder, remember that + // we issued two pieces. + if (isCracked) + ++NumIssued; + + if (NumIssued == 5) + EndDispatchGroup(); +} + +void PPCHazardRecognizer970::AdvanceCycle() { + assert(NumIssued < 5 && "Illegal dispatch group!"); + ++NumIssued; + if (NumIssued == 5) + EndDispatchGroup(); +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h new file mode 100644 index 0000000..2f81f0f --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -0,0 +1,73 @@ +//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines hazard recognizers for scheduling on PowerPC processors. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCHAZRECS_H +#define PPCHAZRECS_H + +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "PPCInstrInfo.h" + +namespace llvm { + +/// PPCHazardRecognizer970 - This class defines a finite state automata that +/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This +/// promotes good dispatch group formation and implements noop insertion to +/// avoid structural hazards that cause significant performance penalties (e.g. +/// setting the CTR register then branching through it within a dispatch group), +/// or storing then loading from the same address within a dispatch group. +class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { + const TargetInstrInfo &TII; + + unsigned NumIssued; // Number of insts issued, including advanced cycles. + + // Various things that can cause a structural hazard. + + // HasCTRSet - If the CTR register is set in this group, disallow BCTRL. + bool HasCTRSet; + + // StoredPtr - Keep track of the address of any store. If we see a load from + // the same address (or one that aliases it), disallow the store. We can have + // up to four stores in one dispatch group, hence we track up to 4. + // + // This is null if we haven't seen a store yet. We keep track of both + // operands of the store here, since we support [r+r] and [r+i] addressing. + SDValue StorePtr1[4], StorePtr2[4]; + unsigned StoreSize[4]; + unsigned NumStores; + +public: + PPCHazardRecognizer970(const TargetInstrInfo &TII); + virtual HazardType getHazardType(SUnit *SU, int Stalls); + virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + +private: + /// EndDispatchGroup - Called when we are finishing a new dispatch group. + /// + void EndDispatchGroup(); + + /// GetInstrType - Classify the specified powerpc opcode according to its + /// pipeline. + PPCII::PPC970_Unit GetInstrType(unsigned Opcode, + bool &isFirst, bool &isSingle,bool &isCracked, + bool &isLoad, bool &isStore); + + bool isLoadOfStoredAddress(unsigned LoadSize, + SDValue Ptr1, SDValue Ptr2) const; +}; + +} // end namespace llvm + +#endif + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp new file mode 100644 index 0000000..6f204cc --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -0,0 +1,1087 @@ +//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pattern matching instruction selector for PowerPC, +// converting from a legalized dag to a PPC dag. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ppc-codegen" +#include "PPC.h" +#include "PPCTargetMachine.h" +#include "MCTargetDesc/PPCPredicates.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/GlobalValue.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + //===--------------------------------------------------------------------===// + /// PPCDAGToDAGISel - PPC specific code to select PPC machine + /// instructions for SelectionDAG operations. + /// + class PPCDAGToDAGISel : public SelectionDAGISel { + const PPCTargetMachine &TM; + const PPCTargetLowering &PPCLowering; + const PPCSubtarget &PPCSubTarget; + unsigned GlobalBaseReg; + public: + explicit PPCDAGToDAGISel(PPCTargetMachine &tm) + : SelectionDAGISel(tm), TM(tm), + PPCLowering(*TM.getTargetLowering()), + PPCSubTarget(*TM.getSubtargetImpl()) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + // Make sure we re-emit a set of the global base reg if necessary + GlobalBaseReg = 0; + SelectionDAGISel::runOnMachineFunction(MF); + + InsertVRSaveCode(MF); + return true; + } + + /// getI32Imm - Return a target constant with the specified value, of type + /// i32. + inline SDValue getI32Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + + /// getI64Imm - Return a target constant with the specified value, of type + /// i64. + inline SDValue getI64Imm(uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i64); + } + + /// getSmallIPtrImm - Return a target constant of pointer type. + inline SDValue getSmallIPtrImm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy()); + } + + /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s + /// with any number of 0s on either side. The 1s are allowed to wrap from + /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. + /// 0x0F0F0000 is not, since all 1s are not contiguous. + static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME); + + + /// isRotateAndMask - Returns true if Mask and Shift can be folded into a + /// rotate and mask opcode and mask operation. + static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, + unsigned &SH, unsigned &MB, unsigned &ME); + + /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC + /// base register. Return the virtual register that holds this value. + SDNode *getGlobalBaseReg(); + + // Select - Convert the specified operand from a target-independent to a + // target-specific node if it hasn't already been changed. + SDNode *Select(SDNode *N); + + SDNode *SelectBitfieldInsert(SDNode *N); + + /// SelectCC - Select a comparison of the specified values with the + /// specified condition code, returning the CR# of the expression. + SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl); + + /// SelectAddrImm - Returns true if the address N can be represented by + /// a base register plus a signed 16-bit displacement [r+imm]. + bool SelectAddrImm(SDValue N, SDValue &Disp, + SDValue &Base) { + return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG); + } + + /// SelectAddrImmOffs - Return true if the operand is valid for a preinc + /// immediate field. Because preinc imms have already been validated, just + /// accept it. + bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { + Out = N; + return true; + } + + /// SelectAddrIdx - Given the specified addressed, check to see if it can be + /// represented as an indexed [r+r] operation. Returns false if it can + /// be represented by [r+imm], which are preferred. + bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { + return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG); + } + + /// SelectAddrIdxOnly - Given the specified addressed, force it to be + /// represented as an indexed [r+r] operation. + bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { + return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG); + } + + /// SelectAddrImmShift - Returns true if the address N can be represented by + /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable + /// for use by STD and friends. + bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) { + return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); + } + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. It is always correct to compute the value into + /// a register. The case of adding a (possibly relocatable) constant to a + /// register can be improved, but it is wrong to substitute Reg+Reg for + /// Reg in an asm, because the load or store opcode would have to change. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps) { + OutOps.push_back(Op); + return false; + } + + void InsertVRSaveCode(MachineFunction &MF); + + virtual const char *getPassName() const { + return "PowerPC DAG->DAG Pattern Instruction Selection"; + } + +// Include the pieces autogenerated from the target description. +#include "PPCGenDAGISel.inc" + +private: + SDNode *SelectSETCC(SDNode *N); + }; +} + +/// InsertVRSaveCode - Once the entire function has been instruction selected, +/// all virtual registers are created and all machine instructions are built, +/// check to see if we need to save/restore VRSAVE. If so, do it. +void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { + // Check to see if this function uses vector registers, which means we have to + // save and restore the VRSAVE register and update it with the regs we use. + // + // In this case, there will be virtual registers of vector type created + // by the scheduler. Detect them now. + bool HasVectorVReg = false; + for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { + HasVectorVReg = true; + break; + } + } + if (!HasVectorVReg) return; // nothing to do. + + // If we have a vector register, we want to emit code into the entry and exit + // blocks to save and restore the VRSAVE register. We do this here (instead + // of marking all vector instructions as clobbering VRSAVE) for two reasons: + // + // 1. This (trivially) reduces the load on the register allocator, by not + // having to represent the live range of the VRSAVE register. + // 2. This (more significantly) allows us to create a temporary virtual + // register to hold the saved VRSAVE value, allowing this temporary to be + // register allocated, instead of forcing it to be spilled to the stack. + + // Create two vregs - one to hold the VRSAVE register that is live-in to the + // function and one for the value after having bits or'd into it. + unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + + const TargetInstrInfo &TII = *TM.getInstrInfo(); + MachineBasicBlock &EntryBB = *Fn.begin(); + DebugLoc dl; + // Emit the following code into the entry block: + // InVRSAVE = MFVRSAVE + // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE + // MTVRSAVE UpdatedVRSAVE + MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point + BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE); + BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), + UpdatedVRSAVE).addReg(InVRSAVE); + BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); + + // Find all return blocks, outputting a restore in each epilog. + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + if (!BB->empty() && BB->back().getDesc().isReturn()) { + IP = BB->end(); --IP; + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = IP; + while (I2 != BB->begin() && (--I2)->getDesc().isTerminator()) + IP = I2; + + // Emit: MTVRSAVE InVRSave + BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); + } + } +} + + +/// getGlobalBaseReg - Output the instructions required to put the +/// base address to use for accessing globals into a register. +/// +SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { + if (!GlobalBaseReg) { + const TargetInstrInfo &TII = *TM.getInstrInfo(); + // Insert the set of GlobalBaseReg into the first MBB of the function + MachineBasicBlock &FirstMBB = MF->front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc dl; + + if (PPCLowering.getPointerTy() == MVT::i32) { + GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); + } else { + GlobalBaseReg = RegInfo->createVirtualRegister(PPC::G8RCRegisterClass); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); + BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); + } + } + return CurDAG->getRegister(GlobalBaseReg, + PPCLowering.getPointerTy()).getNode(); +} + +/// isIntS16Immediate - This method tests to see if the node is either a 32-bit +/// or 64-bit immediate, and if the value can be accurately represented as a +/// sign extension from a 16-bit value. If so, this returns true and the +/// immediate. +static bool isIntS16Immediate(SDNode *N, short &Imm) { + if (N->getOpcode() != ISD::Constant) + return false; + + Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); + if (N->getValueType(0) == MVT::i32) + return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); + else + return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); +} + +static bool isIntS16Immediate(SDValue Op, short &Imm) { + return isIntS16Immediate(Op.getNode(), Imm); +} + + +/// isInt32Immediate - This method tests to see if the node is a 32-bit constant +/// operand. If so Imm will receive the 32-bit value. +static bool isInt32Immediate(SDNode *N, unsigned &Imm) { + if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { + Imm = cast<ConstantSDNode>(N)->getZExtValue(); + return true; + } + return false; +} + +/// isInt64Immediate - This method tests to see if the node is a 64-bit constant +/// operand. If so Imm will receive the 64-bit value. +static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { + if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { + Imm = cast<ConstantSDNode>(N)->getZExtValue(); + return true; + } + return false; +} + +// isInt32Immediate - This method tests to see if a constant operand. +// If so Imm will receive the 32 bit value. +static bool isInt32Immediate(SDValue N, unsigned &Imm) { + return isInt32Immediate(N.getNode(), Imm); +} + + +// isOpcWithIntImmediate - This method tests to see if the node is a specific +// opcode and that it has a immediate integer right operand. +// If so Imm will receive the 32 bit value. +static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { + return N->getOpcode() == Opc + && isInt32Immediate(N->getOperand(1).getNode(), Imm); +} + +bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { + if (isShiftedMask_32(Val)) { + // look for the first non-zero bit + MB = CountLeadingZeros_32(Val); + // look for the first zero bit after the run of ones + ME = CountLeadingZeros_32((Val - 1) ^ Val); + return true; + } else { + Val = ~Val; // invert mask + if (isShiftedMask_32(Val)) { + // effectively look for the first zero bit + ME = CountLeadingZeros_32(Val) - 1; + // effectively look for the first one bit after the run of zeros + MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1; + return true; + } + } + // no run present + return false; +} + +bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, + bool isShiftMask, unsigned &SH, + unsigned &MB, unsigned &ME) { + // Don't even go down this path for i64, since different logic will be + // necessary for rldicl/rldicr/rldimi. + if (N->getValueType(0) != MVT::i32) + return false; + + unsigned Shift = 32; + unsigned Indeterminant = ~0; // bit mask marking indeterminant results + unsigned Opcode = N->getOpcode(); + if (N->getNumOperands() != 2 || + !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) + return false; + + if (Opcode == ISD::SHL) { + // apply shift left to mask if it comes first + if (isShiftMask) Mask = Mask << Shift; + // determine which bits are made indeterminant by shift + Indeterminant = ~(0xFFFFFFFFu << Shift); + } else if (Opcode == ISD::SRL) { + // apply shift right to mask if it comes first + if (isShiftMask) Mask = Mask >> Shift; + // determine which bits are made indeterminant by shift + Indeterminant = ~(0xFFFFFFFFu >> Shift); + // adjust for the left rotate + Shift = 32 - Shift; + } else if (Opcode == ISD::ROTL) { + Indeterminant = 0; + } else { + return false; + } + + // if the mask doesn't intersect any Indeterminant bits + if (Mask && !(Mask & Indeterminant)) { + SH = Shift & 31; + // make sure the mask is still a mask (wrap arounds may not be) + return isRunOfOnes(Mask, MB, ME); + } + return false; +} + +/// SelectBitfieldInsert - turn an or of two masked values into +/// the rotate left word immediate then mask insert (rlwimi) instruction. +SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + APInt LKZ, LKO, RKZ, RKO; + CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO); + CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO); + + unsigned TargetMask = LKZ.getZExtValue(); + unsigned InsertMask = RKZ.getZExtValue(); + + if ((TargetMask | InsertMask) == 0xFFFFFFFF) { + unsigned Op0Opc = Op0.getOpcode(); + unsigned Op1Opc = Op1.getOpcode(); + unsigned Value, SH = 0; + TargetMask = ~TargetMask; + InsertMask = ~InsertMask; + + // If the LHS has a foldable shift and the RHS does not, then swap it to the + // RHS so that we can fold the shift into the insert. + if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { + if (Op0.getOperand(0).getOpcode() == ISD::SHL || + Op0.getOperand(0).getOpcode() == ISD::SRL) { + if (Op1.getOperand(0).getOpcode() != ISD::SHL && + Op1.getOperand(0).getOpcode() != ISD::SRL) { + std::swap(Op0, Op1); + std::swap(Op0Opc, Op1Opc); + std::swap(TargetMask, InsertMask); + } + } + } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { + if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && + Op1.getOperand(0).getOpcode() != ISD::SRL) { + std::swap(Op0, Op1); + std::swap(Op0Opc, Op1Opc); + std::swap(TargetMask, InsertMask); + } + } + + unsigned MB, ME; + if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) { + SDValue Tmp1, Tmp2; + + if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && + isInt32Immediate(Op1.getOperand(1), Value)) { + Op1 = Op1.getOperand(0); + SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; + } + if (Op1Opc == ISD::AND) { + unsigned SHOpc = Op1.getOperand(0).getOpcode(); + if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && + isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { + Op1 = Op1.getOperand(0).getOperand(0); + SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; + } else { + Op1 = Op1.getOperand(0); + } + } + + SH &= 31; + SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB), + getI32Imm(ME) }; + return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); + } + } + return 0; +} + +/// SelectCC - Select a comparison of the specified values with the specified +/// condition code, returning the CR# of the expression. +SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, + ISD::CondCode CC, DebugLoc dl) { + // Always select the LHS. + unsigned Opc; + + if (LHS.getValueType() == MVT::i32) { + unsigned Imm; + if (CC == ISD::SETEQ || CC == ISD::SETNE) { + if (isInt32Immediate(RHS, Imm)) { + // SETEQ/SETNE comparison with 16-bit immediate, fold it. + if (isUInt<16>(Imm)) + return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, + getI32Imm(Imm & 0xFFFF)), 0); + // If this is a 16-bit signed immediate, fold it. + if (isInt<16>((int)Imm)) + return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, + getI32Imm(Imm & 0xFFFF)), 0); + + // For non-equality comparisons, the default code would materialize the + // constant, then compare against it, like this: + // lis r2, 4660 + // ori r2, r2, 22136 + // cmpw cr0, r3, r2 + // Since we are just comparing for equality, we can emit this instead: + // xoris r0,r3,0x1234 + // cmplwi cr0,r0,0x5678 + // beq cr0,L6 + SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, + getI32Imm(Imm >> 16)), 0); + return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, + getI32Imm(Imm & 0xFFFF)), 0); + } + Opc = PPC::CMPLW; + } else if (ISD::isUnsignedIntSetCC(CC)) { + if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) + return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, + getI32Imm(Imm & 0xFFFF)), 0); + Opc = PPC::CMPLW; + } else { + short SImm; + if (isIntS16Immediate(RHS, SImm)) + return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, + getI32Imm((int)SImm & 0xFFFF)), + 0); + Opc = PPC::CMPW; + } + } else if (LHS.getValueType() == MVT::i64) { + uint64_t Imm; + if (CC == ISD::SETEQ || CC == ISD::SETNE) { + if (isInt64Immediate(RHS.getNode(), Imm)) { + // SETEQ/SETNE comparison with 16-bit immediate, fold it. + if (isUInt<16>(Imm)) + return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, + getI32Imm(Imm & 0xFFFF)), 0); + // If this is a 16-bit signed immediate, fold it. + if (isInt<16>(Imm)) + return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, + getI32Imm(Imm & 0xFFFF)), 0); + + // For non-equality comparisons, the default code would materialize the + // constant, then compare against it, like this: + // lis r2, 4660 + // ori r2, r2, 22136 + // cmpd cr0, r3, r2 + // Since we are just comparing for equality, we can emit this instead: + // xoris r0,r3,0x1234 + // cmpldi cr0,r0,0x5678 + // beq cr0,L6 + if (isUInt<32>(Imm)) { + SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, + getI64Imm(Imm >> 16)), 0); + return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, + getI64Imm(Imm & 0xFFFF)), 0); + } + } + Opc = PPC::CMPLD; + } else if (ISD::isUnsignedIntSetCC(CC)) { + if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) + return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, + getI64Imm(Imm & 0xFFFF)), 0); + Opc = PPC::CMPLD; + } else { + short SImm; + if (isIntS16Immediate(RHS, SImm)) + return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, + getI64Imm(SImm & 0xFFFF)), + 0); + Opc = PPC::CMPD; + } + } else if (LHS.getValueType() == MVT::f32) { + Opc = PPC::FCMPUS; + } else { + assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); + Opc = PPC::FCMPUD; + } + return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); +} + +static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) { + switch (CC) { + case ISD::SETUEQ: + case ISD::SETONE: + case ISD::SETOLE: + case ISD::SETOGE: + llvm_unreachable("Should be lowered by legalize!"); + default: llvm_unreachable("Unknown condition!"); + case ISD::SETOEQ: + case ISD::SETEQ: return PPC::PRED_EQ; + case ISD::SETUNE: + case ISD::SETNE: return PPC::PRED_NE; + case ISD::SETOLT: + case ISD::SETLT: return PPC::PRED_LT; + case ISD::SETULE: + case ISD::SETLE: return PPC::PRED_LE; + case ISD::SETOGT: + case ISD::SETGT: return PPC::PRED_GT; + case ISD::SETUGE: + case ISD::SETGE: return PPC::PRED_GE; + case ISD::SETO: return PPC::PRED_NU; + case ISD::SETUO: return PPC::PRED_UN; + // These two are invalid for floating point. Assume we have int. + case ISD::SETULT: return PPC::PRED_LT; + case ISD::SETUGT: return PPC::PRED_GT; + } +} + +/// getCRIdxForSetCC - Return the index of the condition register field +/// associated with the SetCC condition, and whether or not the field is +/// treated as inverted. That is, lt = 0; ge = 0 inverted. +/// +/// If this returns with Other != -1, then the returned comparison is an or of +/// two simpler comparisons. In this case, Invert is guaranteed to be false. +static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { + Invert = false; + Other = -1; + switch (CC) { + default: llvm_unreachable("Unknown condition!"); + case ISD::SETOLT: + case ISD::SETLT: return 0; // Bit #0 = SETOLT + case ISD::SETOGT: + case ISD::SETGT: return 1; // Bit #1 = SETOGT + case ISD::SETOEQ: + case ISD::SETEQ: return 2; // Bit #2 = SETOEQ + case ISD::SETUO: return 3; // Bit #3 = SETUO + case ISD::SETUGE: + case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE + case ISD::SETULE: + case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE + case ISD::SETUNE: + case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE + case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO + case ISD::SETUEQ: + case ISD::SETOGE: + case ISD::SETOLE: + case ISD::SETONE: + llvm_unreachable("Invalid branch code: should be expanded by legalize"); + // These are invalid for floating point. Assume integer. + case ISD::SETULT: return 0; + case ISD::SETUGT: return 1; + } + return 0; +} + +SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + unsigned Imm; + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); + EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = (PtrVT == MVT::i64); + + if (isInt32Immediate(N->getOperand(1), Imm)) { + // We can codegen setcc op, imm very efficiently compared to a brcond. + // Check for those cases here. + // setcc op, 0 + if (Imm == 0) { + SDValue Op = N->getOperand(0); + switch (CC) { + default: break; + case ISD::SETEQ: { + Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); + SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + case ISD::SETNE: { + if (isPPC64) break; + SDValue AD = + SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, + Op, getI32Imm(~0U)), 0); + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, + AD.getValue(1)); + } + case ISD::SETLT: { + SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + case ISD::SETGT: { + SDValue T = + SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); + T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); + SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + } + } else if (Imm == ~0U) { // setcc op, -1 + SDValue Op = N->getOperand(0); + switch (CC) { + default: break; + case ISD::SETEQ: + if (isPPC64) break; + Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, + Op, getI32Imm(1)), 0); + return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, + SDValue(CurDAG->getMachineNode(PPC::LI, dl, + MVT::i32, + getI32Imm(0)), 0), + Op.getValue(1)); + case ISD::SETNE: { + if (isPPC64) break; + Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); + SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, + Op, getI32Imm(~0U)); + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), + Op, SDValue(AD, 1)); + } + case ISD::SETLT: { + SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, + getI32Imm(1)), 0); + SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, + Op), 0); + SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + case ISD::SETGT: { + SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; + Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), + 0); + return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, + getI32Imm(1)); + } + } + } + } + + bool Inv; + int OtherCondIdx; + unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx); + SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); + SDValue IntCR; + + // Force the ccreg into CR7. + SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); + + SDValue InFlag(0, 0); // Null incoming flag value. + CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, + InFlag).getValue(1); + + if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1) + IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, + CCReg), 0); + else + IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, + CR7Reg, CCReg), 0); + + SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31), + getI32Imm(31), getI32Imm(31) }; + if (OtherCondIdx == -1 && !Inv) + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + + // Get the specified bit. + SDValue Tmp = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); + if (Inv) { + assert(OtherCondIdx == -1 && "Can't have split plus negation"); + return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1)); + } + + // Otherwise, we have to turn an operation like SETONE -> SETOLT | SETOGT. + // We already got the bit for the first part of the comparison (e.g. SETULE). + + // Get the other bit of the comparison. + Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31); + SDValue OtherCond = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); + + return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond); +} + + +// Select - Convert the specified operand from a target-independent to a +// target-specific node if it hasn't already been changed. +SDNode *PPCDAGToDAGISel::Select(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->isMachineOpcode()) + return NULL; // Already selected. + + switch (N->getOpcode()) { + default: break; + + case ISD::Constant: { + if (N->getValueType(0) == MVT::i64) { + // Get 64 bit value. + int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); + // Assume no remaining bits. + unsigned Remainder = 0; + // Assume no shift required. + unsigned Shift = 0; + + // If it can't be represented as a 32 bit value. + if (!isInt<32>(Imm)) { + Shift = CountTrailingZeros_64(Imm); + int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; + + // If the shifted value fits 32 bits. + if (isInt<32>(ImmSh)) { + // Go with the shifted value. + Imm = ImmSh; + } else { + // Still stuck with a 64 bit value. + Remainder = Imm; + Shift = 32; + Imm >>= 32; + } + } + + // Intermediate operand. + SDNode *Result; + + // Handle first 32 bits. + unsigned Lo = Imm & 0xFFFF; + unsigned Hi = (Imm >> 16) & 0xFFFF; + + // Simple value. + if (isInt<16>(Imm)) { + // Just the Lo bits. + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo)); + } else if (Lo) { + // Handle the Hi bits. + unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; + Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi)); + // And Lo bits. + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Lo)); + } else { + // Just the Hi bits. + Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); + } + + // If no shift, we're done. + if (!Shift) return Result; + + // Shift for next step if the upper 32-bits were not zero. + if (Imm) { + Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, + SDValue(Result, 0), + getI32Imm(Shift), + getI32Imm(63 - Shift)); + } + + // Add in the last bits as required. + if ((Hi = (Remainder >> 16) & 0xFFFF)) { + Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Hi)); + } + if ((Lo = Remainder & 0xFFFF)) { + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Lo)); + } + + return Result; + } + break; + } + + case ISD::SETCC: + return SelectSETCC(N); + case PPCISD::GlobalBaseReg: + return getGlobalBaseReg(); + + case ISD::FrameIndex: { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); + unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; + if (N->hasOneUse()) + return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI, + getSmallIPtrImm(0)); + return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, + getSmallIPtrImm(0)); + } + + case PPCISD::MFCR: { + SDValue InFlag = N->getOperand(1); + // Use MFOCRF if supported. + if (PPCSubTarget.isGigaProcessor()) + return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, + N->getOperand(0), InFlag); + else + return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, + N->getOperand(0), InFlag); + } + + case ISD::SDIV: { + // FIXME: since this depends on the setting of the carry flag from the srawi + // we should really be making notes about that for the scheduler. + // FIXME: It sure would be nice if we could cheaply recognize the + // srl/add/sra pattern the dag combiner will generate for this as + // sra/addze rather than having to handle sdiv ourselves. oh well. + unsigned Imm; + if (isInt32Immediate(N->getOperand(1), Imm)) { + SDValue N0 = N->getOperand(0); + if ((signed)Imm > 0 && isPowerOf2_32(Imm)) { + SDNode *Op = + CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, + N0, getI32Imm(Log2_32(Imm))); + return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, + SDValue(Op, 0), SDValue(Op, 1)); + } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) { + SDNode *Op = + CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, + N0, getI32Imm(Log2_32(-Imm))); + SDValue PT = + SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32, + SDValue(Op, 0), SDValue(Op, 1)), + 0); + return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT); + } + } + + // Other cases are autogenerated. + break; + } + + case ISD::LOAD: { + // Handle preincrement loads. + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT LoadedVT = LD->getMemoryVT(); + + // Normal loads are handled by code generated from the .td file. + if (LD->getAddressingMode() != ISD::PRE_INC) + break; + + SDValue Offset = LD->getOffset(); + if (isa<ConstantSDNode>(Offset) || + Offset.getOpcode() == ISD::TargetGlobalAddress) { + + unsigned Opcode; + bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; + if (LD->getValueType(0) != MVT::i64) { + // Handle PPC32 integer and normal FP loads. + assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); + switch (LoadedVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid PPC load type!"); + case MVT::f64: Opcode = PPC::LFDU; break; + case MVT::f32: Opcode = PPC::LFSU; break; + case MVT::i32: Opcode = PPC::LWZU; break; + case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; + case MVT::i1: + case MVT::i8: Opcode = PPC::LBZU; break; + } + } else { + assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); + assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); + switch (LoadedVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid PPC load type!"); + case MVT::i64: Opcode = PPC::LDU; break; + case MVT::i32: Opcode = PPC::LWZU8; break; + case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; + case MVT::i1: + case MVT::i8: Opcode = PPC::LBZU8; break; + } + } + + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[] = { Offset, Base, Chain }; + // FIXME: PPC64 + return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), + PPCLowering.getPointerTy(), + MVT::Other, Ops, 3); + } else { + llvm_unreachable("R+R preindex loads not supported yet!"); + } + } + + case ISD::AND: { + unsigned Imm, Imm2, SH, MB, ME; + + // If this is an and of a value rotated between 0 and 31 bits and then and'd + // with a mask, emit rlwinm + if (isInt32Immediate(N->getOperand(1), Imm) && + isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) { + SDValue Val = N->getOperand(0).getOperand(0); + SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + // If this is just a masked value where the input is not handled above, and + // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm + if (isInt32Immediate(N->getOperand(1), Imm) && + isRunOfOnes(Imm, MB, ME) && + N->getOperand(0).getOpcode() != ISD::ROTL) { + SDValue Val = N->getOperand(0); + SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + // AND X, 0 -> 0, not "rlwinm 32". + if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { + ReplaceUses(SDValue(N, 0), N->getOperand(1)); + return NULL; + } + // ISD::OR doesn't get all the bitfield insertion fun. + // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert + if (isInt32Immediate(N->getOperand(1), Imm) && + N->getOperand(0).getOpcode() == ISD::OR && + isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { + unsigned MB, ME; + Imm = ~(Imm^Imm2); + if (isRunOfOnes(Imm, MB, ME)) { + SDValue Ops[] = { N->getOperand(0).getOperand(0), + N->getOperand(0).getOperand(1), + getI32Imm(0), getI32Imm(MB),getI32Imm(ME) }; + return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); + } + } + + // Other cases are autogenerated. + break; + } + case ISD::OR: + if (N->getValueType(0) == MVT::i32) + if (SDNode *I = SelectBitfieldInsert(N)) + return I; + + // Other cases are autogenerated. + break; + case ISD::SHL: { + unsigned Imm, SH, MB, ME; + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && + isRotateAndMask(N, Imm, true, SH, MB, ME)) { + SDValue Ops[] = { N->getOperand(0).getOperand(0), + getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + + // Other cases are autogenerated. + break; + } + case ISD::SRL: { + unsigned Imm, SH, MB, ME; + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && + isRotateAndMask(N, Imm, true, SH, MB, ME)) { + SDValue Ops[] = { N->getOperand(0).getOperand(0), + getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + } + + // Other cases are autogenerated. + break; + } + case ISD::SELECT_CC: { + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); + EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = (PtrVT == MVT::i64); + + // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc + if (!isPPC64) + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) + if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) + if (N1C->isNullValue() && N3C->isNullValue() && + N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && + // FIXME: Implement this optzn for PPC64. + N->getValueType(0) == MVT::i32) { + SDNode *Tmp = + CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, + N->getOperand(0), getI32Imm(~0U)); + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, + SDValue(Tmp, 0), N->getOperand(0), + SDValue(Tmp, 1)); + } + + SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); + unsigned BROpc = getPredicateForSetCC(CC); + + unsigned SelectCCOp; + if (N->getValueType(0) == MVT::i32) + SelectCCOp = PPC::SELECT_CC_I4; + else if (N->getValueType(0) == MVT::i64) + SelectCCOp = PPC::SELECT_CC_I8; + else if (N->getValueType(0) == MVT::f32) + SelectCCOp = PPC::SELECT_CC_F4; + else if (N->getValueType(0) == MVT::f64) + SelectCCOp = PPC::SELECT_CC_F8; + else + SelectCCOp = PPC::SELECT_CC_VRRC; + + SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), + getI32Imm(BROpc) }; + return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4); + } + case PPCISD::COND_BRANCH: { + // Op #0 is the Chain. + // Op #1 is the PPC::PRED_* number. + // Op #2 is the CR# + // Op #3 is the Dest MBB + // Op #4 is the Flag. + // Prevent PPC::PRED_* from being selected into LI. + SDValue Pred = + getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()); + SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), + N->getOperand(0), N->getOperand(4) }; + return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5); + } + case ISD::BR_CC: { + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); + SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); + SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode, + N->getOperand(4), N->getOperand(0) }; + return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4); + } + case ISD::BRIND: { + // FIXME: Should custom lower this. + SDValue Chain = N->getOperand(0); + SDValue Target = N->getOperand(1); + unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; + unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; + Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target, + Chain), 0); + return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); + } + } + + return SelectCode(N); +} + + + +/// createPPCISelDag - This pass converts a legalized DAG into a +/// PowerPC-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { + return new PPCDAGToDAGISel(TM); +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp new file mode 100644 index 0000000..d6b8a9e --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -0,0 +1,5793 @@ +//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPCISelLowering class. +// +//===----------------------------------------------------------------------===// + +#include "PPCISelLowering.h" +#include "PPCMachineFunctionInfo.h" +#include "PPCPerfectShuffle.h" +#include "PPCTargetMachine.h" +#include "MCTargetDesc/PPCPredicates.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/DerivedTypes.h" +using namespace llvm; + +static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); + +static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc", +cl::desc("enable preincrement load/store generation on PPC (experimental)"), + cl::Hidden); + +static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { + if (TM.getSubtargetImpl()->isDarwin()) + return new TargetLoweringObjectFileMachO(); + + return new TargetLoweringObjectFileELF(); +} + +PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) + : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { + + setPow2DivIsCheap(); + + // Use _setjmp/_longjmp instead of setjmp/longjmp. + setUseUnderscoreSetJmp(true); + setUseUnderscoreLongJmp(true); + + // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all + // arguments are at least 4/8 bytes aligned. + setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4); + + // Set up the register classes. + addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); + addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); + addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); + + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // PowerPC has pre-inc load and store's. + setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); + setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); + setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); + setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); + setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); + + // This is used in the ppcf128->int sequence. Note it has different semantics + // from FP_ROUND: that rounds to nearest, this rounds to zero. + setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); + + // PowerPC has no SREM/UREM instructions + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + + // We don't support sin/cos/sqrt/fmod/pow + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FMA , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FPOW , MVT::f32, Expand); + setOperationAction(ISD::FMA , MVT::f32, Expand); + + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); + + // If we're enabling GP optimizations, use hardware square root + if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + } + + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + + // PowerPC does not have BSWAP, CTPOP or CTTZ + setOperationAction(ISD::BSWAP, MVT::i32 , Expand); + setOperationAction(ISD::CTPOP, MVT::i32 , Expand); + setOperationAction(ISD::CTTZ , MVT::i32 , Expand); + setOperationAction(ISD::BSWAP, MVT::i64 , Expand); + setOperationAction(ISD::CTPOP, MVT::i64 , Expand); + setOperationAction(ISD::CTTZ , MVT::i64 , Expand); + + // PowerPC does not have ROTR + setOperationAction(ISD::ROTR, MVT::i32 , Expand); + setOperationAction(ISD::ROTR, MVT::i64 , Expand); + + // PowerPC does not have Select + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + + // PowerPC wants to turn select_cc of FP into fsel when possible. + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + + // PowerPC wants to optimize integer setcc a bit + setOperationAction(ISD::SETCC, MVT::i32, Custom); + + // PowerPC does not have BRCOND which requires SetCC + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + + // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + + // PowerPC does not have [U|S]INT_TO_FP + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + + setOperationAction(ISD::BITCAST, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i32, Expand); + setOperationAction(ISD::BITCAST, MVT::i64, Expand); + setOperationAction(ISD::BITCAST, MVT::f64, Expand); + + // We cannot sextinreg(i1). Expand to shifts. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + + + // We want to legalize GlobalAddress and ConstantPool nodes into the + // appropriate instructions to materialize the address. + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + + // TRAP is legal. + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + // TRAMPOLINE is custom lowered. + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex + setOperationAction(ISD::VASTART , MVT::Other, Custom); + + // VAARG is custom lowered with the 32-bit SVR4 ABI. + if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI() + && !TM.getSubtarget<PPCSubtarget>().isPPC64()) { + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::i64, Custom); + } else + setOperationAction(ISD::VAARG, MVT::Other, Expand); + + // Use the default implementation. + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); + + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // Comparisons that require checking two conditions. + setCondCodeAction(ISD::SETULT, MVT::f32, Expand); + setCondCodeAction(ISD::SETULT, MVT::f64, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); + setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); + setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); + setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); + setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); + setCondCodeAction(ISD::SETONE, MVT::f32, Expand); + setCondCodeAction(ISD::SETONE, MVT::f64, Expand); + + if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { + // They also have instructions for converting between i64 and fp. + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + // This is just the low 32 bits of a (signed) fp->i64 conversion. + // We cannot do this with Promote because i64 is not a legal type. + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + + // FIXME: disable this lowered code. This generates 64-bit register values, + // and we don't model the fact that the top part is clobbered by calls. We + // need to flag these together so that the value isn't live across a call. + //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + } else { + // PowerPC does not have FP_TO_UINT on 32-bit implementations. + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + } + + if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) { + // 64-bit PowerPC implementations can support i64 types directly + addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); + // BUILD_PAIR can't be handled natively, and should be expanded to shl/or + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + // 64-bit PowerPC wants to expand i128 shifts itself. + setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); + } else { + // 32-bit PowerPC wants to expand i64 shifts itself. + setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); + } + + if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { + // First set operation action for all vector types to expand. Then we + // will selectively turn on ones that can be effectively codegen'd. + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; + + // add/sub are legal for all supported vector VT's. + setOperationAction(ISD::ADD , VT, Legal); + setOperationAction(ISD::SUB , VT, Legal); + + // We promote all shuffles to v16i8. + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); + AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); + + // We promote all non-typed operations to v4i32. + setOperationAction(ISD::AND , VT, Promote); + AddPromotedToType (ISD::AND , VT, MVT::v4i32); + setOperationAction(ISD::OR , VT, Promote); + AddPromotedToType (ISD::OR , VT, MVT::v4i32); + setOperationAction(ISD::XOR , VT, Promote); + AddPromotedToType (ISD::XOR , VT, MVT::v4i32); + setOperationAction(ISD::LOAD , VT, Promote); + AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); + setOperationAction(ISD::STORE, VT, Promote); + AddPromotedToType (ISD::STORE, VT, MVT::v4i32); + + // No other operations are legal. + setOperationAction(ISD::MUL , VT, Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FNEG, VT, Expand); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); + setOperationAction(ISD::BUILD_VECTOR, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTLZ, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + } + + // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle + // with merges, splats, etc. + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); + + setOperationAction(ISD::AND , MVT::v4i32, Legal); + setOperationAction(ISD::OR , MVT::v4i32, Legal); + setOperationAction(ISD::XOR , MVT::v4i32, Legal); + setOperationAction(ISD::LOAD , MVT::v4i32, Legal); + setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + setOperationAction(ISD::STORE , MVT::v4i32, Legal); + + addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); + addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); + addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); + addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); + + setOperationAction(ISD::MUL, MVT::v4f32, Legal); + setOperationAction(ISD::MUL, MVT::v4i32, Custom); + setOperationAction(ISD::MUL, MVT::v8i16, Custom); + setOperationAction(ISD::MUL, MVT::v16i8, Custom); + + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); + + setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + } + + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? + + if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + setStackPointerRegisterToSaveRestore(PPC::X1); + setExceptionPointerRegister(PPC::X3); + setExceptionSelectorRegister(PPC::X4); + } else { + setStackPointerRegisterToSaveRestore(PPC::R1); + setExceptionPointerRegister(PPC::R3); + setExceptionSelectorRegister(PPC::R4); + } + + // We have target-specific dag combine patterns for the following nodes: + setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::BR_CC); + setTargetDAGCombine(ISD::BSWAP); + + // Darwin long double math library functions have $LDBL128 appended. + if (TM.getSubtarget<PPCSubtarget>().isDarwin()) { + setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); + setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); + setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); + setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); + setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); + setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); + setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); + setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); + setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); + setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); + } + + setMinFunctionAlignment(2); + if (PPCSubTarget.isDarwin()) + setPrefFunctionAlignment(4); + + setInsertFencesForAtomic(true); + + computeRegisterProperties(); +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. +unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { + const TargetMachine &TM = getTargetMachine(); + // Darwin passes everything on 4 byte boundary. + if (TM.getSubtarget<PPCSubtarget>().isDarwin()) + return 4; + // FIXME SVR4 TBD + return 4; +} + +const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case PPCISD::FSEL: return "PPCISD::FSEL"; + case PPCISD::FCFID: return "PPCISD::FCFID"; + case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; + case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; + case PPCISD::STFIWX: return "PPCISD::STFIWX"; + case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; + case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; + case PPCISD::VPERM: return "PPCISD::VPERM"; + case PPCISD::Hi: return "PPCISD::Hi"; + case PPCISD::Lo: return "PPCISD::Lo"; + case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; + case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE"; + case PPCISD::LOAD: return "PPCISD::LOAD"; + case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; + case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; + case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; + case PPCISD::SRL: return "PPCISD::SRL"; + case PPCISD::SRA: return "PPCISD::SRA"; + case PPCISD::SHL: return "PPCISD::SHL"; + case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; + case PPCISD::STD_32: return "PPCISD::STD_32"; + case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; + case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; + case PPCISD::NOP: return "PPCISD::NOP"; + case PPCISD::MTCTR: return "PPCISD::MTCTR"; + case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; + case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; + case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; + case PPCISD::MFCR: return "PPCISD::MFCR"; + case PPCISD::VCMP: return "PPCISD::VCMP"; + case PPCISD::VCMPo: return "PPCISD::VCMPo"; + case PPCISD::LBRX: return "PPCISD::LBRX"; + case PPCISD::STBRX: return "PPCISD::STBRX"; + case PPCISD::LARX: return "PPCISD::LARX"; + case PPCISD::STCX: return "PPCISD::STCX"; + case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; + case PPCISD::MFFS: return "PPCISD::MFFS"; + case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; + case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; + case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; + case PPCISD::MTFSF: return "PPCISD::MTFSF"; + case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; + } +} + +EVT PPCTargetLowering::getSetCCResultType(EVT VT) const { + return MVT::i32; +} + +//===----------------------------------------------------------------------===// +// Node matching predicates, for use by the tblgen matching code. +//===----------------------------------------------------------------------===// + +/// isFloatingPointZero - Return true if this is 0.0 or -0.0. +static bool isFloatingPointZero(SDValue Op) { + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) + return CFP->getValueAPF().isZero(); + else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { + // Maybe this has already been legalized into the constant pool? + if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) + return CFP->getValueAPF().isZero(); + } + return false; +} + +/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return +/// true if Op is undef or if it matches the specified value. +static bool isConstantOrUndef(int Op, int Val) { + return Op < 0 || Op == Val; +} + +/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a +/// VPKUHUM instruction. +bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { + if (!isUnary) { + for (unsigned i = 0; i != 16; ++i) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) + return false; + } else { + for (unsigned i = 0; i != 8; ++i) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) + return false; + } + return true; +} + +/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a +/// VPKUWUM instruction. +bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { + if (!isUnary) { + for (unsigned i = 0; i != 16; i += 2) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) + return false; + } else { + for (unsigned i = 0; i != 8; i += 2) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) + return false; + } + return true; +} + +/// isVMerge - Common function, used to match vmrg* shuffles. +/// +static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, + unsigned LHSStart, unsigned RHSStart) { + assert(N->getValueType(0) == MVT::v16i8 && + "PPC only supports shuffles by bytes!"); + assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && + "Unsupported merge size!"); + + for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units + for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit + if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), + LHSStart+j+i*UnitSize) || + !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), + RHSStart+j+i*UnitSize)) + return false; + } + return true; +} + +/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for +/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). +bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary) { + if (!isUnary) + return isVMerge(N, UnitSize, 8, 24); + return isVMerge(N, UnitSize, 8, 8); +} + +/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for +/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). +bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary) { + if (!isUnary) + return isVMerge(N, UnitSize, 0, 16); + return isVMerge(N, UnitSize, 0, 0); +} + + +/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift +/// amount, otherwise return -1. +int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { + assert(N->getValueType(0) == MVT::v16i8 && + "PPC only supports shuffles by bytes!"); + + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + + // Find the first non-undef value in the shuffle mask. + unsigned i; + for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) + /*search*/; + + if (i == 16) return -1; // all undef. + + // Otherwise, check to see if the rest of the elements are consecutively + // numbered from this value. + unsigned ShiftAmt = SVOp->getMaskElt(i); + if (ShiftAmt < i) return -1; + ShiftAmt -= i; + + if (!isUnary) { + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 16; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) + return -1; + } else { + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 16; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) + return -1; + } + return ShiftAmt; +} + +/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a splat of a single element that is suitable for input to +/// VSPLTB/VSPLTH/VSPLTW. +bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { + assert(N->getValueType(0) == MVT::v16i8 && + (EltSize == 1 || EltSize == 2 || EltSize == 4)); + + // This is a splat operation if each element of the permute is the same, and + // if the value doesn't reference the second vector. + unsigned ElementBase = N->getMaskElt(0); + + // FIXME: Handle UNDEF elements too! + if (ElementBase >= 16) + return false; + + // Check that the indices are consecutive, in the case of a multi-byte element + // splatted with a v16i8 mask. + for (unsigned i = 1; i != EltSize; ++i) + if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) + return false; + + for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { + if (N->getMaskElt(i) < 0) continue; + for (unsigned j = 0; j != EltSize; ++j) + if (N->getMaskElt(i+j) != N->getMaskElt(j)) + return false; + } + return true; +} + +/// isAllNegativeZeroVector - Returns true if all elements of build_vector +/// are -0.0. +bool PPC::isAllNegativeZeroVector(SDNode *N) { + BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); + + APInt APVal, APUndef; + unsigned BitSize; + bool HasAnyUndefs; + + if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) + return CFP->getValueAPF().isNegZero(); + + return false; +} + +/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the +/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. +unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + assert(isSplatShuffleMask(SVOp, EltSize)); + return SVOp->getMaskElt(0) / EltSize; +} + +/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed +/// by using a vspltis[bhw] instruction of the specified element size, return +/// the constant being splatted. The ByteSize field indicates the number of +/// bytes of each element [124] -> [bhw]. +SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { + SDValue OpVal(0, 0); + + // If ByteSize of the splat is bigger than the element size of the + // build_vector, then we have a case where we are checking for a splat where + // multiple elements of the buildvector are folded together into a single + // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). + unsigned EltSize = 16/N->getNumOperands(); + if (EltSize < ByteSize) { + unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. + SDValue UniquedVals[4]; + assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); + + // See if all of the elements in the buildvector agree across. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + // If the element isn't a constant, bail fully out. + if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); + + + if (UniquedVals[i&(Multiple-1)].getNode() == 0) + UniquedVals[i&(Multiple-1)] = N->getOperand(i); + else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) + return SDValue(); // no match. + } + + // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains + // either constant or undef values that are identical for each chunk. See + // if these chunks can form into a larger vspltis*. + + // Check to see if all of the leading entries are either 0 or -1. If + // neither, then this won't fit into the immediate field. + bool LeadingZero = true; + bool LeadingOnes = true; + for (unsigned i = 0; i != Multiple-1; ++i) { + if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. + + LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); + LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); + } + // Finally, check the least significant entry. + if (LeadingZero) { + if (UniquedVals[Multiple-1].getNode() == 0) + return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef + int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); + if (Val < 16) + return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) + } + if (LeadingOnes) { + if (UniquedVals[Multiple-1].getNode() == 0) + return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef + int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); + if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) + return DAG.getTargetConstant(Val, MVT::i32); + } + + return SDValue(); + } + + // Check to see if this buildvec has a single non-undef value in its elements. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.getNode() == 0) + OpVal = N->getOperand(i); + else if (OpVal != N->getOperand(i)) + return SDValue(); + } + + if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. + + unsigned ValSizeInBytes = EltSize; + uint64_t Value = 0; + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { + Value = CN->getZExtValue(); + } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { + assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); + Value = FloatToBits(CN->getValueAPF().convertToFloat()); + } + + // If the splat value is larger than the element value, then we can never do + // this splat. The only case that we could fit the replicated bits into our + // immediate field for would be zero, and we prefer to use vxor for it. + if (ValSizeInBytes < ByteSize) return SDValue(); + + // If the element value is larger than the splat value, cut it in half and + // check to see if the two halves are equal. Continue doing this until we + // get to ByteSize. This allows us to handle 0x01010101 as 0x01. + while (ValSizeInBytes > ByteSize) { + ValSizeInBytes >>= 1; + + // If the top half equals the bottom half, we're still ok. + if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != + (Value & ((1 << (8*ValSizeInBytes))-1))) + return SDValue(); + } + + // Properly sign extend the value. + int ShAmt = (4-ByteSize)*8; + int MaskVal = ((int)Value << ShAmt) >> ShAmt; + + // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. + if (MaskVal == 0) return SDValue(); + + // Finally, if this value fits in a 5 bit sext field, return it + if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) + return DAG.getTargetConstant(MaskVal, MVT::i32); + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Addressing Mode Selection +//===----------------------------------------------------------------------===// + +/// isIntS16Immediate - This method tests to see if the node is either a 32-bit +/// or 64-bit immediate, and if the value can be accurately represented as a +/// sign extension from a 16-bit value. If so, this returns true and the +/// immediate. +static bool isIntS16Immediate(SDNode *N, short &Imm) { + if (N->getOpcode() != ISD::Constant) + return false; + + Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); + if (N->getValueType(0) == MVT::i32) + return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); + else + return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); +} +static bool isIntS16Immediate(SDValue Op, short &Imm) { + return isIntS16Immediate(Op.getNode(), Imm); +} + + +/// SelectAddressRegReg - Given the specified addressed, check to see if it +/// can be represented as an indexed [r+r] operation. Returns false if it +/// can be more efficiently represented with [r+imm]. +bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, + SDValue &Index, + SelectionDAG &DAG) const { + short imm = 0; + if (N.getOpcode() == ISD::ADD) { + if (isIntS16Immediate(N.getOperand(1), imm)) + return false; // r+i + if (N.getOperand(1).getOpcode() == PPCISD::Lo) + return false; // r+i + + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + } else if (N.getOpcode() == ISD::OR) { + if (isIntS16Immediate(N.getOperand(1), imm)) + return false; // r+i can fold it if we can. + + // If this is an or of disjoint bitfields, we can codegen this as an add + // (for better address arithmetic) if the LHS and RHS of the OR are provably + // disjoint. + APInt LHSKnownZero, LHSKnownOne; + APInt RHSKnownZero, RHSKnownOne; + DAG.ComputeMaskedBits(N.getOperand(0), + APInt::getAllOnesValue(N.getOperand(0) + .getValueSizeInBits()), + LHSKnownZero, LHSKnownOne); + + if (LHSKnownZero.getBoolValue()) { + DAG.ComputeMaskedBits(N.getOperand(1), + APInt::getAllOnesValue(N.getOperand(1) + .getValueSizeInBits()), + RHSKnownZero, RHSKnownOne); + // If all of the bits are known zero on the LHS or RHS, the add won't + // carry. + if (~(LHSKnownZero | RHSKnownZero) == 0) { + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + } + } + } + + return false; +} + +/// Returns true if the address N can be represented by a base register plus +/// a signed 16-bit displacement [r+imm], and if it is not better +/// represented as reg+reg. +bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, + SDValue &Base, + SelectionDAG &DAG) const { + // FIXME dl should come from parent load or store, not from address + DebugLoc dl = N.getDebugLoc(); + // If this can be more profitably realized as r+r, fail. + if (SelectAddressRegReg(N, Disp, Base, DAG)) + return false; + + if (N.getOpcode() == ISD::ADD) { + short imm = 0; + if (isIntS16Immediate(N.getOperand(1), imm)) { + Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + } else { + Base = N.getOperand(0); + } + return true; // [r+i] + } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { + // Match LOAD (ADD (X, Lo(G))). + assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() + && "Cannot handle constant offsets yet!"); + Disp = N.getOperand(1).getOperand(0); // The global address. + assert(Disp.getOpcode() == ISD::TargetGlobalAddress || + Disp.getOpcode() == ISD::TargetConstantPool || + Disp.getOpcode() == ISD::TargetJumpTable); + Base = N.getOperand(0); + return true; // [&g+r] + } + } else if (N.getOpcode() == ISD::OR) { + short imm = 0; + if (isIntS16Immediate(N.getOperand(1), imm)) { + // If this is an or of disjoint bitfields, we can codegen this as an add + // (for better address arithmetic) if the LHS and RHS of the OR are + // provably disjoint. + APInt LHSKnownZero, LHSKnownOne; + DAG.ComputeMaskedBits(N.getOperand(0), + APInt::getAllOnesValue(N.getOperand(0) + .getValueSizeInBits()), + LHSKnownZero, LHSKnownOne); + + if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { + // If all of the bits are known zero on the LHS or RHS, the add won't + // carry. + Base = N.getOperand(0); + Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); + return true; + } + } + } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { + // Loading from a constant address. + + // If this address fits entirely in a 16-bit sext immediate field, codegen + // this as "d, 0" + short Imm; + if (isIntS16Immediate(CN, Imm)) { + Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + CN->getValueType(0)); + return true; + } + + // Handle 32-bit sext immediates with LIS + addr mode. + if (CN->getValueType(0) == MVT::i32 || + (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { + int Addr = (int)CN->getZExtValue(); + + // Otherwise, break this down into an LIS + disp. + Disp = DAG.getTargetConstant((short)Addr, MVT::i32); + + Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); + unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; + Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); + return true; + } + } + + Disp = DAG.getTargetConstant(0, getPointerTy()); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + else + Base = N; + return true; // [r+0] +} + +/// SelectAddressRegRegOnly - Given the specified addressed, force it to be +/// represented as an indexed [r+r] operation. +bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, + SDValue &Index, + SelectionDAG &DAG) const { + // Check to see if we can easily represent this as an [r+r] address. This + // will fail if it thinks that the address is more profitably represented as + // reg+imm, e.g. where imm = 0. + if (SelectAddressRegReg(N, Base, Index, DAG)) + return true; + + // If the operand is an addition, always emit this as [r+r], since this is + // better (for code size, and execution, as the memop does the add for free) + // than emitting an explicit add. + if (N.getOpcode() == ISD::ADD) { + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + } + + // Otherwise, do it the hard way, using R0 as the base register. + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + N.getValueType()); + Index = N; + return true; +} + +/// SelectAddressRegImmShift - Returns true if the address N can be +/// represented by a base register plus a signed 14-bit displacement +/// [r+imm*4]. Suitable for use by STD and friends. +bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, + SDValue &Base, + SelectionDAG &DAG) const { + // FIXME dl should come from the parent load or store, not the address + DebugLoc dl = N.getDebugLoc(); + // If this can be more profitably realized as r+r, fail. + if (SelectAddressRegReg(N, Disp, Base, DAG)) + return false; + + if (N.getOpcode() == ISD::ADD) { + short imm = 0; + if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { + Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + } else { + Base = N.getOperand(0); + } + return true; // [r+i] + } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { + // Match LOAD (ADD (X, Lo(G))). + assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() + && "Cannot handle constant offsets yet!"); + Disp = N.getOperand(1).getOperand(0); // The global address. + assert(Disp.getOpcode() == ISD::TargetGlobalAddress || + Disp.getOpcode() == ISD::TargetConstantPool || + Disp.getOpcode() == ISD::TargetJumpTable); + Base = N.getOperand(0); + return true; // [&g+r] + } + } else if (N.getOpcode() == ISD::OR) { + short imm = 0; + if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { + // If this is an or of disjoint bitfields, we can codegen this as an add + // (for better address arithmetic) if the LHS and RHS of the OR are + // provably disjoint. + APInt LHSKnownZero, LHSKnownOne; + DAG.ComputeMaskedBits(N.getOperand(0), + APInt::getAllOnesValue(N.getOperand(0) + .getValueSizeInBits()), + LHSKnownZero, LHSKnownOne); + if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { + // If all of the bits are known zero on the LHS or RHS, the add won't + // carry. + Base = N.getOperand(0); + Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); + return true; + } + } + } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { + // Loading from a constant address. Verify low two bits are clear. + if ((CN->getZExtValue() & 3) == 0) { + // If this address fits entirely in a 14-bit sext immediate field, codegen + // this as "d, 0" + short Imm; + if (isIntS16Immediate(CN, Imm)) { + Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + CN->getValueType(0)); + return true; + } + + // Fold the low-part of 32-bit absolute addresses into addr mode. + if (CN->getValueType(0) == MVT::i32 || + (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { + int Addr = (int)CN->getZExtValue(); + + // Otherwise, break this down into an LIS + disp. + Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); + Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); + unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; + Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0); + return true; + } + } + } + + Disp = DAG.getTargetConstant(0, getPointerTy()); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + else + Base = N; + return true; // [r+0] +} + + +/// getPreIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if the node's address +/// can be legally represented as pre-indexed load / store address. +bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + // Disabled by default for now. + if (!EnablePPCPreinc) return false; + + SDValue Ptr; + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + Ptr = LD->getBasePtr(); + VT = LD->getMemoryVT(); + + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + Ptr = ST->getBasePtr(); + VT = ST->getMemoryVT(); + } else + return false; + + // PowerPC doesn't have preinc load/store instructions for vectors. + if (VT.isVector()) + return false; + + // TODO: Check reg+reg first. + + // LDU/STU use reg+imm*4, others use reg+imm. + if (VT != MVT::i64) { + // reg + imm + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) + return false; + } else { + // reg + imm * 4. + if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) + return false; + } + + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of + // sext i32 to i64 when addr mode is r+i. + if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && + LD->getExtensionType() == ISD::SEXTLOAD && + isa<ConstantSDNode>(Offset)) + return false; + } + + AM = ISD::PRE_INC; + return true; +} + +//===----------------------------------------------------------------------===// +// LowerOperation implementation +//===----------------------------------------------------------------------===// + +/// GetLabelAccessInfo - Return true if we should reference labels using a +/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. +static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, + unsigned &LoOpFlags, const GlobalValue *GV = 0) { + HiOpFlags = PPCII::MO_HA16; + LoOpFlags = PPCII::MO_LO16; + + // Don't use the pic base if not in PIC relocation model. Or if we are on a + // non-darwin platform. We don't support PIC on other platforms yet. + bool isPIC = TM.getRelocationModel() == Reloc::PIC_ && + TM.getSubtarget<PPCSubtarget>().isDarwin(); + if (isPIC) { + HiOpFlags |= PPCII::MO_PIC_FLAG; + LoOpFlags |= PPCII::MO_PIC_FLAG; + } + + // If this is a reference to a global value that requires a non-lazy-ptr, make + // sure that instruction lowering adds it. + if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { + HiOpFlags |= PPCII::MO_NLP_FLAG; + LoOpFlags |= PPCII::MO_NLP_FLAG; + + if (GV->hasHiddenVisibility()) { + HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; + LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; + } + } + + return isPIC; +} + +static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, + SelectionDAG &DAG) { + EVT PtrVT = HiPart.getValueType(); + SDValue Zero = DAG.getConstant(0, PtrVT); + DebugLoc DL = HiPart.getDebugLoc(); + + SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); + SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); + + // With PIC, the first instruction is actually "GR+hi(&G)". + if (isPIC) + Hi = DAG.getNode(ISD::ADD, DL, PtrVT, + DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); + + // Generate non-pic code that has direct accesses to the constant pool. + // The address of the global is just (hi(&g)+lo(&g)). + return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); +} + +SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + EVT PtrVT = Op.getValueType(); + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); + const Constant *C = CP->getConstVal(); + + unsigned MOHiFlag, MOLoFlag; + bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); + SDValue CPIHi = + DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); + SDValue CPILo = + DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); + return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); +} + +SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + EVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + + unsigned MOHiFlag, MOLoFlag; + bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); + SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); + SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); + return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); +} + +SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + EVT PtrVT = Op.getValueType(); + + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + + unsigned MOHiFlag, MOLoFlag; + bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); + SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag); + SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag); + return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); +} + +SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + EVT PtrVT = Op.getValueType(); + GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); + DebugLoc DL = GSDN->getDebugLoc(); + const GlobalValue *GV = GSDN->getGlobal(); + + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); + return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + + unsigned MOHiFlag, MOLoFlag; + bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); + + SDValue GAHi = + DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); + SDValue GALo = + DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); + + SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); + + // If the global reference is actually to a non-lazy-pointer, we have to do an + // extra load to get the address of the global. + if (MOHiFlag & PPCII::MO_NLP_FLAG) + Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), + false, false, 0); + return Ptr; +} + +SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + DebugLoc dl = Op.getDebugLoc(); + + // If we're comparing for equality to zero, expose the fact that this is + // implented as a ctlz/srl pair on ppc, so that the dag combiner can + // fold the new nodes. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (C->isNullValue() && CC == ISD::SETEQ) { + EVT VT = Op.getOperand(0).getValueType(); + SDValue Zext = Op.getOperand(0); + if (VT.bitsLT(MVT::i32)) { + VT = MVT::i32; + Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); + } + unsigned Log2b = Log2_32(VT.getSizeInBits()); + SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); + SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, + DAG.getConstant(Log2b, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); + } + // Leave comparisons against 0 and -1 alone for now, since they're usually + // optimized. FIXME: revisit this when we can custom lower all setcc + // optimizations. + if (C->isAllOnesValue() || C->isNullValue()) + return SDValue(); + } + + // If we have an integer seteq/setne, turn it into a compare against zero + // by xor'ing the rhs with the lhs, which is faster than setting a + // condition register, reading it back out, and masking the correct bit. The + // normal approach here uses sub to do this instead of xor. Using xor exposes + // the result to other bit-twiddling opportunities. + EVT LHSVT = Op.getOperand(0).getValueType(); + if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + EVT VT = Op.getValueType(); + SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), + Op.getOperand(1)); + return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); + } + return SDValue(); +} + +SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + DebugLoc dl = Node->getDebugLoc(); + + assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); + + // gpr_index + SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, + VAListPtr, MachinePointerInfo(SV), MVT::i8, + false, false, 0); + InChain = GprIndex.getValue(1); + + if (VT == MVT::i64) { + // Check if GprIndex is even + SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, + DAG.getConstant(1, MVT::i32)); + SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, + DAG.getConstant(0, MVT::i32), ISD::SETNE); + SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, + DAG.getConstant(1, MVT::i32)); + // Align GprIndex to be even if it isn't + GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, + GprIndex); + } + + // fpr index is 1 byte after gpr + SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(1, MVT::i32)); + + // fpr + SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, + FprPtr, MachinePointerInfo(SV), MVT::i8, + false, false, 0); + InChain = FprIndex.getValue(1); + + SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(8, MVT::i32)); + + SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(4, MVT::i32)); + + // areas + SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, + MachinePointerInfo(), false, false, 0); + InChain = OverflowArea.getValue(1); + + SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, + MachinePointerInfo(), false, false, 0); + InChain = RegSaveArea.getValue(1); + + // select overflow_area if index > 8 + SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(8, MVT::i32), ISD::SETLT); + + // adjustment constant gpr_index * 4/8 + SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, + VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(VT.isInteger() ? 4 : 8, + MVT::i32)); + + // OurReg = RegSaveArea + RegConstant + SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, + RegConstant); + + // Floating types are 32 bytes into RegSaveArea + if (VT.isFloatingPoint()) + OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, + DAG.getConstant(32, MVT::i32)); + + // increase {f,g}pr_index by 1 (or 2 if VT is i64) + SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, + VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(VT == MVT::i64 ? 2 : 1, + MVT::i32)); + + InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, + VT.isInteger() ? VAListPtr : FprPtr, + MachinePointerInfo(SV), + MVT::i8, false, false, 0); + + // determine if we should load from reg_save_area or overflow_area + SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); + + // increase overflow_area by 4/8 if gpr/fpr > 8 + SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, + DAG.getConstant(VT.isInteger() ? 4 : 8, + MVT::i32)); + + OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, + OverflowAreaPlusN); + + InChain = DAG.getTruncStore(InChain, dl, OverflowArea, + OverflowAreaPtr, + MachinePointerInfo(), + MVT::i32, false, false, 0); + + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0); +} + +SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Trmp = Op.getOperand(1); // trampoline + SDValue FPtr = Op.getOperand(2); // nested function + SDValue Nest = Op.getOperand(3); // 'nest' parameter value + DebugLoc dl = Op.getDebugLoc(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = (PtrVT == MVT::i64); + Type *IntPtrTy = + DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType( + *DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + Entry.Ty = IntPtrTy; + Entry.Node = Trmp; Args.push_back(Entry); + + // TrampSize == (isPPC64 ? 48 : 40); + Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, + isPPC64 ? MVT::i64 : MVT::i32); + Args.push_back(Entry); + + Entry.Node = FPtr; Args.push_back(Entry); + Entry.Node = Nest; Args.push_back(Entry); + + // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) + std::pair<SDValue, SDValue> CallResult = + LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, + DAG.getExternalSymbol("__trampoline_setup", PtrVT), + Args, DAG, dl); + + return CallResult.second; +} + +SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const { + MachineFunction &MF = DAG.getMachineFunction(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + DebugLoc dl = Op.getDebugLoc(); + + if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), + MachinePointerInfo(SV), + false, false, 0); + } + + // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. + // We suppose the given va_list is already allocated. + // + // typedef struct { + // char gpr; /* index into the array of 8 GPRs + // * stored in the register save area + // * gpr=0 corresponds to r3, + // * gpr=1 to r4, etc. + // */ + // char fpr; /* index into the array of 8 FPRs + // * stored in the register save area + // * fpr=0 corresponds to f1, + // * fpr=1 to f2, etc. + // */ + // char *overflow_arg_area; + // /* location on stack that holds + // * the next overflow argument + // */ + // char *reg_save_area; + // /* where r3:r10 and f1:f8 (if saved) + // * are stored + // */ + // } va_list[1]; + + + SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); + SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); + + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), + PtrVT); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + PtrVT); + + uint64_t FrameOffset = PtrVT.getSizeInBits()/8; + SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); + + uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; + SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); + + uint64_t FPROffset = 1; + SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); + + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + + // Store first byte : number of int regs + SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, + Op.getOperand(1), + MachinePointerInfo(SV), + MVT::i8, false, false, 0); + uint64_t nextOffset = FPROffset; + SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), + ConstFPROffset); + + // Store second byte : number of float regs + SDValue secondStore = + DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, + MachinePointerInfo(SV, nextOffset), MVT::i8, + false, false, 0); + nextOffset += StackOffset; + nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); + + // Store second word : arguments given on stack + SDValue thirdStore = + DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, + MachinePointerInfo(SV, nextOffset), + false, false, 0); + nextOffset += FrameOffset; + nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); + + // Store third word : arguments given in registers + return DAG.getStore(thirdStore, dl, FR, nextPtr, + MachinePointerInfo(SV, nextOffset), + false, false, 0); + +} + +#include "PPCGenCallingConv.inc" + +static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + return true; +} + +static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned ArgRegs[] = { + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + const unsigned NumArgRegs = array_lengthof(ArgRegs); + + unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); + + // Skip one register if the first unallocated register has an even register + // number and there are still argument registers available which have not been + // allocated yet. RegNum is actually an index into ArgRegs, which means we + // need to skip a register if RegNum is odd. + if (RegNum != NumArgRegs && RegNum % 2 == 1) { + State.AllocateReg(ArgRegs[RegNum]); + } + + // Always return false here, as this function only makes sure that the first + // unallocated register has an odd register number and does not actually + // allocate a register for the current argument. + return false; +} + +static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned ArgRegs[] = { + PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8 + }; + + const unsigned NumArgRegs = array_lengthof(ArgRegs); + + unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); + + // If there is only one Floating-point register left we need to put both f64 + // values of a split ppc_fp128 value on the stack. + if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { + State.AllocateReg(ArgRegs[RegNum]); + } + + // Always return false here, as this function only makes sure that the two f64 + // values a ppc_fp128 value is split into are both passed in registers or both + // passed on the stack and does not actually allocate a register for the + // current argument. + return false; +} + +/// GetFPR - Get the set of FP registers that should be allocated for arguments, +/// on Darwin. +static const unsigned *GetFPR() { + static const unsigned FPR[] = { + PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 + }; + + return FPR; +} + +/// CalculateStackSlotSize - Calculates the size reserved for this argument on +/// the stack. +static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, + unsigned PtrByteSize) { + unsigned ArgSize = ArgVT.getSizeInBits()/8; + if (Flags.isByVal()) + ArgSize = Flags.getByValSize(); + ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + + return ArgSize; +} + +SDValue +PPCTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) + const { + if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { + return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins, + dl, DAG, InVals); + } else { + return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, + dl, DAG, InVals); + } +} + +SDValue +PPCTargetLowering::LowerFormalArguments_SVR4( + SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + // 32-bit SVR4 ABI Stack Frame Layout: + // +-----------------------------------+ + // +--> | Back chain | + // | +-----------------------------------+ + // | | Floating-point register save area | + // | +-----------------------------------+ + // | | General register save area | + // | +-----------------------------------+ + // | | CR save word | + // | +-----------------------------------+ + // | | VRSAVE save word | + // | +-----------------------------------+ + // | | Alignment padding | + // | +-----------------------------------+ + // | | Vector register save area | + // | +-----------------------------------+ + // | | Local variable space | + // | +-----------------------------------+ + // | | Parameter list area | + // | +-----------------------------------+ + // | | LR save word | + // | +-----------------------------------+ + // SP--> +--- | Back chain | + // +-----------------------------------+ + // + // Specifications: + // System V Application Binary Interface PowerPC Processor Supplement + // AltiVec Technology Programming Interface Manual + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Potential tail calls could cause overwriting of argument stack slots. + bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast)); + unsigned PtrByteSize = 4; + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + // Reserve space for the linkage area on the stack. + CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); + + CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments stored in registers. + if (VA.isRegLoc()) { + TargetRegisterClass *RC; + EVT ValVT = VA.getValVT(); + + switch (ValVT.getSimpleVT().SimpleTy) { + default: + llvm_unreachable("ValVT not supported by formal arguments Lowering"); + case MVT::i32: + RC = PPC::GPRCRegisterClass; + break; + case MVT::f32: + RC = PPC::F4RCRegisterClass; + break; + case MVT::f64: + RC = PPC::F8RCRegisterClass; + break; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v4f32: + RC = PPC::VRRCRegisterClass; + break; + } + + // Transform the arguments stored in physical registers into virtual ones. + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); + + InVals.push_back(ArgValue); + } else { + // Argument stored in memory. + assert(VA.isMemLoc()); + + unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), + isImmutable); + + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo(), + false, false, 0)); + } + } + + // Assign locations to all of the incoming aggregate by value arguments. + // Aggregates passed by value are stored in the local variable space of the + // caller's stack frame, right above the parameter list area. + SmallVector<CCValAssign, 16> ByValArgLocs; + CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ByValArgLocs, *DAG.getContext()); + + // Reserve stack space for the allocations in CCInfo. + CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); + + CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal); + + // Area that is at least reserved in the caller of this function. + unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); + + // Set the size that is at least reserved in caller of this function. Tail + // call optimized function's reserved stack space needs to be aligned so that + // taking the difference between two stack areas will result in an aligned + // stack. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + + MinReservedArea = + std::max(MinReservedArea, + PPCFrameLowering::getMinCallFrameSize(false, false)); + + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> + getStackAlignment(); + unsigned AlignMask = TargetAlign-1; + MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; + + FI->setMinReservedArea(MinReservedArea); + + SmallVector<SDValue, 8> MemOps; + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + static const unsigned GPArgRegs[] = { + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); + + static const unsigned FPArgRegs[] = { + PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8 + }; + const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); + + FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, + NumGPArgRegs)); + FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, + NumFPArgRegs)); + + // Make room for NumGPArgRegs and NumFPArgRegs. + int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + + NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; + + FuncInfo->setVarArgsStackOffset( + MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, + CCInfo.getNextStackOffset(), true)); + + FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); + + // The fixed integer arguments of a variadic function are stored to the + // VarArgsFrameIndex on the stack so that they may be loaded by deferencing + // the result of va_next. + for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { + // Get an existing live-in vreg, or add a new one. + unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); + if (!VReg) + VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), false, false, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + + // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 + // is set. + // The double arguments are stored to the VarArgsFrameIndex + // on the stack. + for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { + // Get an existing live-in vreg, or add a new one. + unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); + if (!VReg) + VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), false, false, 0); + MemOps.push_back(Store); + // Increment the address by eight for the next argument to store + SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, + PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, + MVT::Other, &MemOps[0], MemOps.size()); + + return Chain; +} + +SDValue +PPCTargetLowering::LowerFormalArguments_Darwin( + SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + // TODO: add description of PPC stack frame format, or at least some docs. + // + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = PtrVT == MVT::i64; + // Potential tail calls could cause overwriting of argument stack slots. + bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast)); + unsigned PtrByteSize = isPPC64 ? 8 : 4; + + unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); + // Area that is at least reserved in caller of this function. + unsigned MinReservedArea = ArgOffset; + + static const unsigned GPR_32[] = { // 32-bit registers. + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + static const unsigned GPR_64[] = { // 64-bit registers. + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10, + }; + + static const unsigned *FPR = GetFPR(); + + static const unsigned VR[] = { + PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, + PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 + }; + + const unsigned Num_GPR_Regs = array_lengthof(GPR_32); + const unsigned Num_FPR_Regs = 13; + const unsigned Num_VR_Regs = array_lengthof( VR); + + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + + const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; + + // In 32-bit non-varargs functions, the stack space for vectors is after the + // stack space for non-vectors. We do not use this space unless we have + // too many vectors to fit in registers, something that only occurs in + // constructed examples:), but we have to walk the arglist to figure + // that out...for the pathological case, compute VecArgOffset as the + // start of the vector parameter area. Computing VecArgOffset is the + // entire point of the following loop. + unsigned VecArgOffset = ArgOffset; + if (!isVarArg && !isPPC64) { + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; + ++ArgNo) { + EVT ObjectVT = Ins[ArgNo].VT; + unsigned ObjSize = ObjectVT.getSizeInBits()/8; + ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; + + if (Flags.isByVal()) { + // ObjSize is the true size, ArgSize rounded up to multiple of regs. + ObjSize = Flags.getByValSize(); + unsigned ArgSize = + ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + VecArgOffset += ArgSize; + continue; + } + + switch(ObjectVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled argument type!"); + case MVT::i32: + case MVT::f32: + VecArgOffset += isPPC64 ? 8 : 4; + break; + case MVT::i64: // PPC64 + case MVT::f64: + VecArgOffset += 8; + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + // Nothing to do, we're only looking at Nonvector args here. + break; + } + } + } + // We've found where the vector parameter area in memory is. Skip the + // first 12 parameters; these don't use that memory. + VecArgOffset = ((VecArgOffset+15)/16)*16; + VecArgOffset += 12*16; + + // Add DAG nodes to load the arguments or copy them out of registers. On + // entry to a function on PPC, the arguments start after the linkage area, + // although the first ones are often in registers. + + SmallVector<SDValue, 8> MemOps; + unsigned nAltivecParamsAtEnd = 0; + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { + SDValue ArgVal; + bool needsLoad = false; + EVT ObjectVT = Ins[ArgNo].VT; + unsigned ObjSize = ObjectVT.getSizeInBits()/8; + unsigned ArgSize = ObjSize; + ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; + + unsigned CurArgOffset = ArgOffset; + + // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. + if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || + ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { + if (isVarArg || isPPC64) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += CalculateStackSlotSize(ObjectVT, + Flags, + PtrByteSize); + } else nAltivecParamsAtEnd++; + } else + // Calculate min reserved area. + MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, + Flags, + PtrByteSize); + + // FIXME the codegen can be much improved in some cases. + // We do not have to keep everything in memory. + if (Flags.isByVal()) { + // ObjSize is the true size, ArgSize rounded up to multiple of registers. + ObjSize = Flags.getByValSize(); + ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + // Objects of size 1 and 2 are right justified, everything else is + // left justified. This means the memory address is adjusted forwards. + if (ObjSize==1 || ObjSize==2) { + CurArgOffset = CurArgOffset + (4 - ObjSize); + } + // The value of the object is its address. + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + InVals.push_back(FIN); + if (ObjSize==1 || ObjSize==2) { + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg; + if (isPPC64) + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + else + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), + ObjSize==1 ? MVT::i8 : MVT::i16, + false, false, 0); + MemOps.push_back(Store); + ++GPR_idx; + } + + ArgOffset += PtrByteSize; + + continue; + } + for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { + // Store whatever pieces of the object are in registers + // to memory. ArgVal will be address of the beginning of + // the object. + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg; + if (isPPC64) + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + else + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), + false, false, 0); + MemOps.push_back(Store); + ++GPR_idx; + ArgOffset += PtrByteSize; + } else { + ArgOffset += ArgSize - (ArgOffset-CurArgOffset); + break; + } + } + continue; + } + + switch (ObjectVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled argument type!"); + case MVT::i32: + if (!isPPC64) { + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + ++GPR_idx; + } else { + needsLoad = true; + ArgSize = PtrByteSize; + } + // All int arguments reserve stack space in the Darwin ABI. + ArgOffset += PtrByteSize; + break; + } + // FALLTHROUGH + case MVT::i64: // PPC64 + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); + + if (ObjectVT == MVT::i32) { + // PPC64 passes i8, i16, and i32 values in i64 registers. Promote + // value to MVT::i64 and then truncate to the correct register size. + if (Flags.isSExt()) + ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, + DAG.getValueType(ObjectVT)); + else if (Flags.isZExt()) + ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, + DAG.getValueType(ObjectVT)); + + ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); + } + + ++GPR_idx; + } else { + needsLoad = true; + ArgSize = PtrByteSize; + } + // All int arguments reserve stack space in the Darwin ABI. + ArgOffset += 8; + break; + + case MVT::f32: + case MVT::f64: + // Every 4 bytes of argument space consumes one of the GPRs available for + // argument passing. + if (GPR_idx != Num_GPR_Regs) { + ++GPR_idx; + if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) + ++GPR_idx; + } + if (FPR_idx != Num_FPR_Regs) { + unsigned VReg; + + if (ObjectVT == MVT::f32) + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); + else + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); + + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); + ++FPR_idx; + } else { + needsLoad = true; + } + + // All FP arguments reserve stack space in the Darwin ABI. + ArgOffset += isPPC64 ? 8 : ObjSize; + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + // Note that vector arguments in registers don't reserve stack space, + // except in varargs functions. + if (VR_idx != Num_VR_Regs) { + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); + if (isVarArg) { + while ((ArgOffset % 16) != 0) { + ArgOffset += PtrByteSize; + if (GPR_idx != Num_GPR_Regs) + GPR_idx++; + } + ArgOffset += 16; + GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? + } + ++VR_idx; + } else { + if (!isVarArg && !isPPC64) { + // Vectors go after all the nonvectors. + CurArgOffset = VecArgOffset; + VecArgOffset += 16; + } else { + // Vectors are aligned. + ArgOffset = ((ArgOffset+15)/16)*16; + CurArgOffset = ArgOffset; + ArgOffset += 16; + } + needsLoad = true; + } + break; + } + + // We need to load the argument to a virtual register if we determined above + // that we ran out of physical registers of the appropriate type. + if (needsLoad) { + int FI = MFI->CreateFixedObject(ObjSize, + CurArgOffset + (ArgSize - ObjSize), + isImmutable); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), + false, false, 0); + } + + InVals.push_back(ArgVal); + } + + // Set the size that is at least reserved in caller of this function. Tail + // call optimized function's reserved stack space needs to be aligned so that + // taking the difference between two stack areas will result in an aligned + // stack. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + // Add the Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += 16*nAltivecParamsAtEnd; + } + MinReservedArea = + std::max(MinReservedArea, + PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> + getStackAlignment(); + unsigned AlignMask = TargetAlign-1; + MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; + FI->setMinReservedArea(MinReservedArea); + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + int Depth = ArgOffset; + + FuncInfo->setVarArgsFrameIndex( + MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, + Depth, true)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); + + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing the + // result of va_next. + for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { + unsigned VReg; + + if (isPPC64) + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + else + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), false, false, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, + MVT::Other, &MemOps[0], MemOps.size()); + + return Chain; +} + +/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus +/// linkage area for the Darwin ABI. +static unsigned +CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, + bool isPPC64, + bool isVarArg, + unsigned CC, + const SmallVectorImpl<ISD::OutputArg> + &Outs, + const SmallVectorImpl<SDValue> &OutVals, + unsigned &nAltivecParamsAtEnd) { + // Count how many bytes are to be pushed on the stack, including the linkage + // area, and parameter passing area. We start with 24/48 bytes, which is + // prereserved space for [SP][CR][LR][3 x unused]. + unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true); + unsigned NumOps = Outs.size(); + unsigned PtrByteSize = isPPC64 ? 8 : 4; + + // Add up all the space actually used. + // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually + // they all go in registers, but we must reserve stack space for them for + // possible use by the caller. In varargs or 64-bit calls, parameters are + // assigned stack space in order, with padding so Altivec parameters are + // 16-byte aligned. + nAltivecParamsAtEnd = 0; + for (unsigned i = 0; i != NumOps; ++i) { + ISD::ArgFlagsTy Flags = Outs[i].Flags; + EVT ArgVT = Outs[i].VT; + // Varargs Altivec parameters are padded to a 16 byte boundary. + if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || + ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { + if (!isVarArg && !isPPC64) { + // Non-varargs Altivec parameters go after all the non-Altivec + // parameters; handle those later so we know how much padding we need. + nAltivecParamsAtEnd++; + continue; + } + // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. + NumBytes = ((NumBytes+15)/16)*16; + } + NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); + } + + // Allow for Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + NumBytes = ((NumBytes+15)/16)*16; + NumBytes += 16*nAltivecParamsAtEnd; + } + + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + NumBytes = std::max(NumBytes, + PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); + + // Tail call needs the stack to be aligned. + if (CC==CallingConv::Fast && GuaranteedTailCallOpt) { + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> + getStackAlignment(); + unsigned AlignMask = TargetAlign-1; + NumBytes = (NumBytes + AlignMask) & ~AlignMask; + } + + return NumBytes; +} + +/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be +/// adjusted to accommodate the arguments for the tailcall. +static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, + unsigned ParamSize) { + + if (!isTailCall) return 0; + + PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); + unsigned CallerMinReservedArea = FI->getMinReservedArea(); + int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; + // Remember only if the new adjustement is bigger. + if (SPDiff < FI->getTailCallSPDelta()) + FI->setTailCallSPDelta(SPDiff); + + return SPDiff; +} + +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. Targets which want to do tail call +/// optimization should implement this function. +bool +PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + if (!GuaranteedTailCallOpt) + return false; + + // Variable argument functions are not supported. + if (isVarArg) + return false; + + MachineFunction &MF = DAG.getMachineFunction(); + CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); + if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { + // Functions containing by val parameters are not supported. + for (unsigned i = 0; i != Ins.size(); i++) { + ISD::ArgFlagsTy Flags = Ins[i].Flags; + if (Flags.isByVal()) return false; + } + + // Non PIC/GOT tail calls are supported. + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + return true; + + // At the moment we can only do local tail calls (in same module, hidden + // or protected) if we are generating PIC. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + return G->getGlobal()->hasHiddenVisibility() + || G->getGlobal()->hasProtectedVisibility(); + } + + return false; +} + +/// isCallCompatibleAddress - Return the immediate to use if the specified +/// 32-bit value is representable in the immediate field of a BxA instruction. +static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + if (!C) return 0; + + int Addr = C->getZExtValue(); + if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. + (Addr << 6 >> 6) != Addr) + return 0; // Top 6 bits have to be sext of immediate. + + return DAG.getConstant((int)C->getZExtValue() >> 2, + DAG.getTargetLoweringInfo().getPointerTy()).getNode(); +} + +namespace { + +struct TailCallArgumentInfo { + SDValue Arg; + SDValue FrameIdxOp; + int FrameIdx; + + TailCallArgumentInfo() : FrameIdx(0) {} +}; + +} + +/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. +static void +StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, + SDValue Chain, + const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs, + SmallVector<SDValue, 8> &MemOpChains, + DebugLoc dl) { + for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { + SDValue Arg = TailCallArgs[i].Arg; + SDValue FIN = TailCallArgs[i].FrameIdxOp; + int FI = TailCallArgs[i].FrameIdx; + // Store relative to framepointer. + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); + } +} + +/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to +/// the appropriate stack slot for the tail call optimized function call. +static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, + MachineFunction &MF, + SDValue Chain, + SDValue OldRetAddr, + SDValue OldFP, + int SPDiff, + bool isPPC64, + bool isDarwinABI, + DebugLoc dl) { + if (SPDiff) { + // Calculate the new stack slot for the return address. + int SlotSize = isPPC64 ? 8 : 4; + int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, + isDarwinABI); + int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, + NewRetAddrLoc, true); + EVT VT = isPPC64 ? MVT::i64 : MVT::i32; + SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); + Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, + MachinePointerInfo::getFixedStack(NewRetAddr), + false, false, 0); + + // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack + // slot as the FP is never overwritten. + if (isDarwinABI) { + int NewFPLoc = + SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); + int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, + true); + SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); + Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, + MachinePointerInfo::getFixedStack(NewFPIdx), + false, false, 0); + } + } + return Chain; +} + +/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate +/// the position of the argument. +static void +CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, + SDValue Arg, int SPDiff, unsigned ArgOffset, + SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { + int Offset = ArgOffset + SPDiff; + uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); + EVT VT = isPPC64 ? MVT::i64 : MVT::i32; + SDValue FIN = DAG.getFrameIndex(FI, VT); + TailCallArgumentInfo Info; + Info.Arg = Arg; + Info.FrameIdxOp = FIN; + Info.FrameIdx = FI; + TailCallArguments.push_back(Info); +} + +/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address +/// stack slot. Returns the chain as result and the loaded frame pointers in +/// LROpOut/FPOpout. Used when tail calling. +SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, + int SPDiff, + SDValue Chain, + SDValue &LROpOut, + SDValue &FPOpOut, + bool isDarwinABI, + DebugLoc dl) const { + if (SPDiff) { + // Load the LR and FP stack slot for later adjusting. + EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; + LROpOut = getReturnAddrFrameIndex(DAG); + LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), + false, false, 0); + Chain = SDValue(LROpOut.getNode(), 1); + + // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack + // slot as the FP is never overwritten. + if (isDarwinABI) { + FPOpOut = getFramePointerFrameIndex(DAG); + FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), + false, false, 0); + Chain = SDValue(FPOpOut.getNode(), 1); + } + } + return Chain; +} + +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" of size "Size". Alignment information is +/// specified by the specific parameter attribute. The copy will be passed as +/// a byval function parameter. +/// Sometimes what we are copying is the end of a larger object, the part that +/// does not fit in registers. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + DebugLoc dl) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + false, false, MachinePointerInfo(0), + MachinePointerInfo(0)); +} + +/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of +/// tail calls. +static void +LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, + SDValue Arg, SDValue PtrOff, int SPDiff, + unsigned ArgOffset, bool isPPC64, bool isTailCall, + bool isVector, SmallVector<SDValue, 8> &MemOpChains, + SmallVector<TailCallArgumentInfo, 8> &TailCallArguments, + DebugLoc dl) { + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + if (!isTailCall) { + if (isVector) { + SDValue StackPtr; + if (isPPC64) + StackPtr = DAG.getRegister(PPC::X1, MVT::i64); + else + StackPtr = DAG.getRegister(PPC::R1, MVT::i32); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, + DAG.getConstant(ArgOffset, PtrVT)); + } + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0)); + // Calculate and remember argument location. + } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, + TailCallArguments); +} + +static +void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, + DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, + SDValue LROp, SDValue FPOp, bool isDarwinABI, + SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) { + MachineFunction &MF = DAG.getMachineFunction(); + + // Emit a sequence of copyto/copyfrom virtual registers for arguments that + // might overwrite each other in case of tail call optimization. + SmallVector<SDValue, 8> MemOpChains2; + // Do not flag preceding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, + MemOpChains2, dl); + if (!MemOpChains2.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains2[0], MemOpChains2.size()); + + // Store the return address to the appropriate stack slot. + Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, + isPPC64, isDarwinABI, dl); + + // Emit callseq_end just before tailcall node. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); +} + +static +unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, + SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall, + SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, + SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys, + const PPCSubtarget &PPCSubTarget) { + + bool isPPC64 = PPCSubTarget.isPPC64(); + bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + NodeTys.push_back(MVT::Other); // Returns a chain + NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. + + unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin; + + bool needIndirectCall = true; + if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { + // If this is an absolute destination address, use the munged value. + Callee = SDValue(Dest, 0); + needIndirectCall = false; + } + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 + // Use indirect calls for ALL functions calls in JIT mode, since the + // far-call stubs may be outside relocation limits for a BL instruction. + if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { + unsigned OpFlags = 0; + if (DAG.getTarget().getRelocationModel() != Reloc::Static && + (PPCSubTarget.getTargetTriple().isMacOSX() && + PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && + (G->getGlobal()->isDeclaration() || + G->getGlobal()->isWeakForLinker())) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = PPCII::MO_DARWIN_STUB; + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, + // every direct call is) turn it into a TargetGlobalAddress / + // TargetExternalSymbol node so that legalize doesn't hack it. + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + Callee.getValueType(), + 0, OpFlags); + needIndirectCall = false; + } + } + + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + unsigned char OpFlags = 0; + + if (DAG.getTarget().getRelocationModel() != Reloc::Static && + (PPCSubTarget.getTargetTriple().isMacOSX() && + PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = PPCII::MO_DARWIN_STUB; + } + + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), + OpFlags); + needIndirectCall = false; + } + + if (needIndirectCall) { + // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair + // to do the call, we can't use PPCISD::CALL. + SDValue MTCTROps[] = {Chain, Callee, InFlag}; + + if (isSVR4ABI && isPPC64) { + // Function pointers in the 64-bit SVR4 ABI do not point to the function + // entry point, but to the function descriptor (the function entry point + // address is part of the function descriptor though). + // The function descriptor is a three doubleword structure with the + // following fields: function entry point, TOC base address and + // environment pointer. + // Thus for a call through a function pointer, the following actions need + // to be performed: + // 1. Save the TOC of the caller in the TOC save area of its stack + // frame (this is done in LowerCall_Darwin()). + // 2. Load the address of the function entry point from the function + // descriptor. + // 3. Load the TOC of the callee from the function descriptor into r2. + // 4. Load the environment pointer from the function descriptor into + // r11. + // 5. Branch to the function entry point address. + // 6. On return of the callee, the TOC of the caller needs to be + // restored (this is done in FinishCall()). + // + // All those operations are flagged together to ensure that no other + // operations can be scheduled in between. E.g. without flagging the + // operations together, a TOC access in the caller could be scheduled + // between the load of the callee TOC and the branch to the callee, which + // results in the TOC access going through the TOC of the callee instead + // of going through the TOC of the caller, which leads to incorrect code. + + // Load the address of the function entry point from the function + // descriptor. + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); + SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps, + InFlag.getNode() ? 3 : 2); + Chain = LoadFuncPtr.getValue(1); + InFlag = LoadFuncPtr.getValue(2); + + // Load environment pointer into r11. + // Offset of the environment pointer within the function descriptor. + SDValue PtrOff = DAG.getIntPtrConstant(16); + + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); + SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, + InFlag); + Chain = LoadEnvPtr.getValue(1); + InFlag = LoadEnvPtr.getValue(2); + + SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, + InFlag); + Chain = EnvVal.getValue(0); + InFlag = EnvVal.getValue(1); + + // Load TOC of the callee into r2. We are using a target-specific load + // with r2 hard coded, because the result of a target-independent load + // would never go directly into r2, since r2 is a reserved register (which + // prevents the register allocator from allocating it), resulting in an + // additional register being allocated and an unnecessary move instruction + // being generated. + VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, + Callee, InFlag); + Chain = LoadTOCPtr.getValue(0); + InFlag = LoadTOCPtr.getValue(1); + + MTCTROps[0] = Chain; + MTCTROps[1] = LoadFuncPtr; + MTCTROps[2] = InFlag; + } + + Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, + 2 + (InFlag.getNode() != 0)); + InFlag = Chain.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::Other); + NodeTys.push_back(MVT::Glue); + Ops.push_back(Chain); + CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin; + Callee.setNode(0); + // Add CTR register as callee so a bctr can be emitted later. + if (isTailCall) + Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); + } + + // If this is a direct call, pass the chain and the callee. + if (Callee.getNode()) { + Ops.push_back(Chain); + Ops.push_back(Callee); + } + // If this is a tail call add stack pointer delta. + if (isTailCall) + Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + return CallOpc; +} + +SDValue +PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + SmallVector<CCValAssign, 16> RVLocs; + CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign &VA = RVLocs[i]; + EVT VT = VA.getValVT(); + assert(VA.isRegLoc() && "Can only return in registers!"); + Chain = DAG.getCopyFromReg(Chain, dl, + VA.getLocReg(), VT, InFlag).getValue(1); + InVals.push_back(Chain.getValue(0)); + InFlag = Chain.getValue(2); + } + + return Chain; +} + +SDValue +PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, + bool isTailCall, bool isVarArg, + SelectionDAG &DAG, + SmallVector<std::pair<unsigned, SDValue>, 8> + &RegsToPass, + SDValue InFlag, SDValue Chain, + SDValue &Callee, + int SPDiff, unsigned NumBytes, + const SmallVectorImpl<ISD::InputArg> &Ins, + SmallVectorImpl<SDValue> &InVals) const { + std::vector<EVT> NodeTys; + SmallVector<SDValue, 8> Ops; + unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, + isTailCall, RegsToPass, Ops, NodeTys, + PPCSubTarget); + + // When performing tail call optimization the callee pops its arguments off + // the stack. Account for this here so these bytes can be pushed back on in + // PPCRegisterInfo::eliminateCallFramePseudoInstr. + int BytesCalleePops = + (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0; + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + // Emit tail call. + if (isTailCall) { + // If this is the first return lowered for this function, add the regs + // to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, RetCC_PPC); + for (unsigned i = 0; i != RVLocs.size(); ++i) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + assert(((Callee.getOpcode() == ISD::Register && + cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || + Callee.getOpcode() == ISD::TargetExternalSymbol || + Callee.getOpcode() == ISD::TargetGlobalAddress || + isa<ConstantSDNode>(Callee)) && + "Expecting an global address, external symbol, absolute value or register"); + + return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); + } + + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Add a NOP immediately after the branch instruction when using the 64-bit + // SVR4 ABI. At link time, if caller and callee are in a different module and + // thus have a different TOC, the call will be replaced with a call to a stub + // function which saves the current TOC, loads the TOC of the callee and + // branches to the callee. The NOP will be replaced with a load instruction + // which restores the TOC of the caller from the TOC save slot of the current + // stack frame. If caller and callee belong to the same module (and have the + // same TOC), the NOP will remain unchanged. + if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + if (CallOpc == PPCISD::BCTRL_SVR4) { + // This is a call through a function pointer. + // Restore the caller TOC from the save area into R2. + // See PrepareCall() for more information about calls through function + // pointers in the 64-bit SVR4 ABI. + // We are using a target-specific load with r2 hard coded, because the + // result of a target-independent load would never go directly into r2, + // since r2 is a reserved register (which prevents the register allocator + // from allocating it), resulting in an additional register being + // allocated and an unnecessary move instruction being generated. + Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); + InFlag = Chain.getValue(1); + } else { + // Otherwise insert NOP. + InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag); + } + } + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(BytesCalleePops, true), + InFlag); + if (!Ins.empty()) + InFlag = Chain.getValue(1); + + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, + Ins, dl, DAG, InVals); +} + +SDValue +PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + if (isTailCall) + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, + Ins, DAG); + + if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) + return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, Outs, OutVals, Ins, + dl, DAG, InVals); + + return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, + isTailCall, Outs, OutVals, Ins, + dl, DAG, InVals); +} + +SDValue +PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description + // of the 32-bit SVR4 ABI stack frame layout. + + assert((CallConv == CallingConv::C || + CallConv == CallingConv::Fast) && "Unknown calling convention!"); + + unsigned PtrByteSize = 4; + + MachineFunction &MF = DAG.getMachineFunction(); + + // Mark this function as potentially containing a function that contains a + // tail call. As a consequence the frame pointer will be used for dynamicalloc + // and restoring the callers stack pointer in this functions epilog. This is + // done because by tail calling the called function might overwrite the value + // in this function's (MF) stack pointer stack slot 0(SP). + if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast) + MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); + + // Count how many bytes are to be pushed on the stack, including the linkage + // area, parameter list area and the part of the local variable space which + // contains copies of aggregates which are passed by value. + + // Assign locations to all of the outgoing arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + // Reserve space for the linkage area on the stack. + CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); + + if (isVarArg) { + // Handle fixed and variable vector arguments differently. + // Fixed vector arguments go into registers as long as registers are + // available. Variable vector arguments always go into memory. + unsigned NumArgs = Outs.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + bool Result; + + if (Outs[i].IsFixed) { + Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, + CCInfo); + } else { + Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, + ArgFlags, CCInfo); + } + + if (Result) { +#ifndef NDEBUG + errs() << "Call operand #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << "\n"; +#endif + llvm_unreachable(0); + } + } + } else { + // All arguments are treated the same. + CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4); + } + + // Assign locations to all of the outgoing aggregate by value arguments. + SmallVector<CCValAssign, 16> ByValArgLocs; + CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ByValArgLocs, *DAG.getContext()); + + // Reserve stack space for the allocations in CCInfo. + CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); + + CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal); + + // Size of the linkage area, parameter list area and the part of the local + // space variable where copies of aggregates which are passed by value are + // stored. + unsigned NumBytes = CCByValInfo.getNextStackOffset(); + + // Calculate by how many bytes the stack has to be adjusted in case of tail + // call optimization. + int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + SDValue CallSeqStart = Chain; + + // Load the return address and frame pointer so it can be moved somewhere else + // later. + SDValue LROp, FPOp; + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, + dl); + + // Set up a copy of the stack pointer for use loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); + + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SmallVector<TailCallArgumentInfo, 8> TailCallArguments; + SmallVector<SDValue, 8> MemOpChains; + + bool seenFloatArg = false; + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, j = 0, e = ArgLocs.size(); + i != e; + ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + if (Flags.isByVal()) { + // Argument is an aggregate which is passed by value, thus we need to + // create a copy of it in the local variable space of the current stack + // frame (which is the stack frame of the caller) and pass the address of + // this copy to the callee. + assert((j < ByValArgLocs.size()) && "Index out of bounds!"); + CCValAssign &ByValVA = ByValArgLocs[j++]; + assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); + + // Memory reserved in the local variable space of the callers stack frame. + unsigned LocMemOffset = ByValVA.getLocMemOffset(); + + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + + // Create a copy of the argument in the local area of the current + // stack frame. + SDValue MemcpyCall = + CreateCopyOfByValArgument(Arg, PtrOff, + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, dl); + + // This must go outside the CALLSEQ_START..END. + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.getNode()->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), + NewCallSeqStart.getNode()); + Chain = CallSeqStart = NewCallSeqStart; + + // Pass the address of the aggregate copy on the stack either in a + // physical register or in the parameter list area of the current stack + // frame to the callee. + Arg = PtrOff; + } + + if (VA.isRegLoc()) { + seenFloatArg |= VA.getLocVT().isFloatingPoint(); + // Put argument in a physical register. + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + // Put argument in the parameter list area of the current stack frame. + assert(VA.isMemLoc()); + unsigned LocMemOffset = VA.getLocMemOffset(); + + if (!isTailCall) { + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), + false, false, 0)); + } else { + // Calculate and remember argument location. + CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, + TailCallArguments); + } + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Set CR6 to true if this is a vararg call with floating args passed in + // registers. + if (isVarArg) { + SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET, + dl, MVT::i32), 0); + RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); + } + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + if (isTailCall) + PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, + false, TailCallArguments); + + return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, + Ins, InVals); +} + +SDValue +PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + unsigned NumOps = Outs.size(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = PtrVT == MVT::i64; + unsigned PtrByteSize = isPPC64 ? 8 : 4; + + MachineFunction &MF = DAG.getMachineFunction(); + + // Mark this function as potentially containing a function that contains a + // tail call. As a consequence the frame pointer will be used for dynamicalloc + // and restoring the callers stack pointer in this functions epilog. This is + // done because by tail calling the called function might overwrite the value + // in this function's (MF) stack pointer stack slot 0(SP). + if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast) + MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); + + unsigned nAltivecParamsAtEnd = 0; + + // Count how many bytes are to be pushed on the stack, including the linkage + // area, and parameter passing area. We start with 24/48 bytes, which is + // prereserved space for [SP][CR][LR][3 x unused]. + unsigned NumBytes = + CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, + Outs, OutVals, + nAltivecParamsAtEnd); + + // Calculate by how many bytes the stack has to be adjusted in case of tail + // call optimization. + int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); + + // To protect arguments on the stack from being clobbered in a tail call, + // force all the loads to happen before doing any other lowering. + if (isTailCall) + Chain = DAG.getStackArgumentTokenFactor(Chain); + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + SDValue CallSeqStart = Chain; + + // Load the return address and frame pointer so it can be move somewhere else + // later. + SDValue LROp, FPOp; + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, + dl); + + // Set up a copy of the stack pointer for use loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + SDValue StackPtr; + if (isPPC64) + StackPtr = DAG.getRegister(PPC::X1, MVT::i64); + else + StackPtr = DAG.getRegister(PPC::R1, MVT::i32); + + // Figure out which arguments are going to go in registers, and which in + // memory. Also, if this is a vararg function, floating point operations + // must be stored to our stack, and loaded into integer regs as well, if + // any integer regs are available for argument passing. + unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + + static const unsigned GPR_32[] = { // 32-bit registers. + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + static const unsigned GPR_64[] = { // 64-bit registers. + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10, + }; + static const unsigned *FPR = GetFPR(); + + static const unsigned VR[] = { + PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, + PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 + }; + const unsigned NumGPRs = array_lengthof(GPR_32); + const unsigned NumFPRs = 13; + const unsigned NumVRs = array_lengthof(VR); + + const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; + + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SmallVector<TailCallArgumentInfo, 8> TailCallArguments; + + SmallVector<SDValue, 8> MemOpChains; + for (unsigned i = 0; i != NumOps; ++i) { + SDValue Arg = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + // PtrOff will be used to store the current argument to the stack if a + // register cannot be found for it. + SDValue PtrOff; + + PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + + // On PPC64, promote integers to 64-bit values. + if (isPPC64 && Arg.getValueType() == MVT::i32) { + // FIXME: Should this use ANY_EXTEND if neither sext nor zext? + unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); + } + + // FIXME memcpy is used way more than necessary. Correctness first. + if (Flags.isByVal()) { + unsigned Size = Flags.getByValSize(); + if (Size==1 || Size==2) { + // Very small objects are passed right-justified. + // Everything else is passed left-justified. + EVT VT = (Size==1) ? MVT::i8 : MVT::i16; + if (GPR_idx != NumGPRs) { + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, + MachinePointerInfo(), VT, + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + + ArgOffset += PtrByteSize; + } else { + SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, dl); + // This must go outside the CALLSEQ_START..END. + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.getNode()->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), + NewCallSeqStart.getNode()); + Chain = CallSeqStart = NewCallSeqStart; + ArgOffset += PtrByteSize; + } + continue; + } + // Copy entire object into memory. There are cases where gcc-generated + // code assumes it is there, even if it could be put entirely into + // registers. (This is not what the doc says.) + SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, dl); + // This must go outside the CALLSEQ_START..END. + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.getNode()->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); + Chain = CallSeqStart = NewCallSeqStart; + // And copy the pieces of it that fit into registers. + for (unsigned j=0; j<Size; j+=PtrByteSize) { + SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); + SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); + if (GPR_idx != NumGPRs) { + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, + MachinePointerInfo(), + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + ArgOffset += PtrByteSize; + } else { + ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; + break; + } + } + continue; + } + + switch (Arg.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i32: + case MVT::i64: + if (GPR_idx != NumGPRs) { + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); + } else { + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + isPPC64, isTailCall, false, MemOpChains, + TailCallArguments, dl); + } + ArgOffset += PtrByteSize; + break; + case MVT::f32: + case MVT::f64: + if (FPR_idx != NumFPRs) { + RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); + + if (isVarArg) { + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0); + MemOpChains.push_back(Store); + + // Float varargs are always shadowed in available integer registers + if (GPR_idx != NumGPRs) { + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, + MachinePointerInfo(), false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + } + if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ + SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, + MachinePointerInfo(), + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + } + } else { + // If we have any FPRs remaining, we may also have GPRs remaining. + // Args passed in FPRs consume either 1 (f32) or 2 (f64) available + // GPRs. + if (GPR_idx != NumGPRs) + ++GPR_idx; + if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && + !isPPC64) // PPC64 has 64-bit GPR's obviously :) + ++GPR_idx; + } + } else { + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + isPPC64, isTailCall, false, MemOpChains, + TailCallArguments, dl); + } + if (isPPC64) + ArgOffset += 8; + else + ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + if (isVarArg) { + // These go aligned on the stack, or in the corresponding R registers + // when within range. The Darwin PPC ABI doc claims they also go in + // V registers; in fact gcc does this only for arguments that are + // prototyped, not for those that match the ... We do it for all + // arguments, seems to work. + while (ArgOffset % 16 !=0) { + ArgOffset += PtrByteSize; + if (GPR_idx != NumGPRs) + GPR_idx++; + } + // We could elide this store in the case where the object fits + // entirely in R registers. Maybe later. + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, + DAG.getConstant(ArgOffset, PtrVT)); + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0); + MemOpChains.push_back(Store); + if (VR_idx != NumVRs) { + SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, + MachinePointerInfo(), + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); + } + ArgOffset += 16; + for (unsigned i=0; i<16; i+=PtrByteSize) { + if (GPR_idx == NumGPRs) + break; + SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, + DAG.getConstant(i, PtrVT)); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + } + break; + } + + // Non-varargs Altivec params generally go in registers, but have + // stack space allocated at the end. + if (VR_idx != NumVRs) { + // Doesn't have GPR space allocated. + RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); + } else if (nAltivecParamsAtEnd==0) { + // We are emitting Altivec params in order. + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + isPPC64, isTailCall, true, MemOpChains, + TailCallArguments, dl); + ArgOffset += 16; + } + break; + } + } + // If all Altivec parameters fit in registers, as they usually do, + // they get stack space following the non-Altivec parameters. We + // don't track this here because nobody below needs it. + // If there are more Altivec parameters than fit in registers emit + // the stores here. + if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { + unsigned j = 0; + // Offset is aligned; skip 1st 12 params which go in V registers. + ArgOffset = ((ArgOffset+15)/16)*16; + ArgOffset += 12*16; + for (unsigned i = 0; i != NumOps; ++i) { + SDValue Arg = OutVals[i]; + EVT ArgType = Outs[i].VT; + if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || + ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { + if (++j > NumVRs) { + SDValue PtrOff; + // We are emitting Altivec params in order. + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + isPPC64, isTailCall, true, MemOpChains, + TailCallArguments, dl); + ArgOffset += 16; + } + } + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Check if this is an indirect call (MTCTR/BCTRL). + // See PrepareCall() for more information about calls through function + // pointers in the 64-bit SVR4 ABI. + if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() && + !dyn_cast<GlobalAddressSDNode>(Callee) && + !dyn_cast<ExternalSymbolSDNode>(Callee) && + !isBLACompatibleAddress(Callee, DAG)) { + // Load r2 into a virtual register and store it to the TOC save area. + SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); + // TOC save area offset. + SDValue PtrOff = DAG.getIntPtrConstant(40); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), + false, false, 0); + } + + // On Darwin, R12 must contain the address of an indirect callee. This does + // not mean the MTCTR instruction must use R12; it's easier to model this as + // an extra parameter, so do that. + if (!isTailCall && + !dyn_cast<GlobalAddressSDNode>(Callee) && + !dyn_cast<ExternalSymbolSDNode>(Callee) && + !isBLACompatibleAddress(Callee, DAG)) + RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : + PPC::R12), Callee)); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + if (isTailCall) + PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, + FPOp, true, TailCallArguments); + + return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, + Ins, InVals); +} + +bool +PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), + RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_PPC); +} + +SDValue +PPCTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const { + + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeReturn(Outs, RetCC_PPC); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + OutVals[i], Flag); + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + else + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain); +} + +SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const { + // When we pop the dynamic allocation we need to restore the SP link. + DebugLoc dl = Op.getDebugLoc(); + + // Get the corect type for pointers. + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Construct the stack pointer operand. + bool isPPC64 = Subtarget.isPPC64(); + unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; + SDValue StackPtr = DAG.getRegister(SP, PtrVT); + + // Get the operands for the STACKRESTORE. + SDValue Chain = Op.getOperand(0); + SDValue SaveSP = Op.getOperand(1); + + // Load the old link SP. + SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, + MachinePointerInfo(), + false, false, 0); + + // Restore the stack pointer. + Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); + + // Store the old link SP. + return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), + false, false, 0); +} + + + +SDValue +PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + bool isPPC64 = PPCSubTarget.isPPC64(); + bool isDarwinABI = PPCSubTarget.isDarwinABI(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Get current frame pointer save index. The users of this index will be + // primarily DYNALLOC instructions. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + int RASI = FI->getReturnAddrSaveIndex(); + + // If the frame pointer save index hasn't been defined yet. + if (!RASI) { + // Find out what the fix offset of the frame pointer save area. + int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); + // Allocate the frame index for frame pointer save area. + RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); + // Save the result. + FI->setReturnAddrSaveIndex(RASI); + } + return DAG.getFrameIndex(RASI, PtrVT); +} + +SDValue +PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + bool isPPC64 = PPCSubTarget.isPPC64(); + bool isDarwinABI = PPCSubTarget.isDarwinABI(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Get current frame pointer save index. The users of this index will be + // primarily DYNALLOC instructions. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + int FPSI = FI->getFramePointerSaveIndex(); + + // If the frame pointer save index hasn't been defined yet. + if (!FPSI) { + // Find out what the fix offset of the frame pointer save area. + int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, + isDarwinABI); + + // Allocate the frame index for frame pointer save area. + FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); + // Save the result. + FI->setFramePointerSaveIndex(FPSI); + } + return DAG.getFrameIndex(FPSI, PtrVT); +} + +SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const { + // Get the inputs. + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + // Get the corect type for pointers. + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Negate the size. + SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, + DAG.getConstant(0, PtrVT), Size); + // Construct a node for the frame pointer save index. + SDValue FPSIdx = getFramePointerFrameIndex(DAG); + // Build a DYNALLOC node. + SDValue Ops[3] = { Chain, NegSize, FPSIdx }; + SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); + return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); +} + +/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when +/// possible. +SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + // Not FP? Not a fsel. + if (!Op.getOperand(0).getValueType().isFloatingPoint() || + !Op.getOperand(2).getValueType().isFloatingPoint()) + return Op; + + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + + // Cannot handle SETEQ/SETNE. + if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op; + + EVT ResVT = Op.getValueType(); + EVT CmpVT = Op.getOperand(0).getValueType(); + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); + SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); + DebugLoc dl = Op.getDebugLoc(); + + // If the RHS of the comparison is a 0.0, we don't need to do the + // subtraction at all. + if (isFloatingPointZero(RHS)) + switch (CC) { + default: break; // SETUO etc aren't handled by fsel. + case ISD::SETULT: + case ISD::SETLT: + std::swap(TV, FV); // fsel is natively setge, swap operands for setlt + case ISD::SETOGE: + case ISD::SETGE: + if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits + LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); + case ISD::SETUGT: + case ISD::SETGT: + std::swap(TV, FV); // fsel is natively setge, swap operands for setlt + case ISD::SETOLE: + case ISD::SETLE: + if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits + LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, + DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); + } + + SDValue Cmp; + switch (CC) { + default: break; // SETUO etc aren't handled by fsel. + case ISD::SETULT: + case ISD::SETLT: + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); + case ISD::SETOGE: + case ISD::SETGE: + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + case ISD::SETUGT: + case ISD::SETGT: + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); + if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); + case ISD::SETOLE: + case ISD::SETLE: + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); + if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + } + return Op; +} + +// FIXME: Split this code up when LegalizeDAGTypes lands. +SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + DebugLoc dl) const { + assert(Op.getOperand(0).getValueType().isFloatingPoint()); + SDValue Src = Op.getOperand(0); + if (Src.getValueType() == MVT::f32) + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + + SDValue Tmp; + switch (Op.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); + case MVT::i32: + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : + PPCISD::FCTIDZ, + dl, MVT::f64, Src); + break; + case MVT::i64: + Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src); + break; + } + + // Convert the FP value to an int value through memory. + SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); + + // Emit a store to the stack slot. + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, + MachinePointerInfo(), false, false, 0); + + // Result is a load from the stack slot. If loading 4 bytes, make sure to + // add in a bias. + if (Op.getValueType() == MVT::i32) + FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, + DAG.getConstant(4, FIPtr.getValueType())); + return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(), + false, false, 0); +} + +SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + // Don't handle ppc_fp128 here; let it be lowered to a libcall. + if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) + return SDValue(); + + if (Op.getOperand(0).getValueType() == MVT::i64) { + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0)); + SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); + if (Op.getValueType() == MVT::f32) + FP = DAG.getNode(ISD::FP_ROUND, dl, + MVT::f32, FP, DAG.getIntPtrConstant(0)); + return FP; + } + + assert(Op.getOperand(0).getValueType() == MVT::i32 && + "Unhandled SINT_TO_FP type in custom expander!"); + // Since we only generate this in 64-bit mode, we can take advantage of + // 64-bit registers. In particular, sign extend the input value into the + // 64-bit register with extsw, store the WHOLE 64-bit value into the stack + // then lfd it and fcfid it. + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *FrameInfo = MF.getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32, + Op.getOperand(0)); + + // STD the extended value into the stack slot. + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, 8, 8); + SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx }; + SDValue Store = + DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other), + Ops, 4, MVT::i64, MMO); + // Load the value as a double. + SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(), + false, false, 0); + + // FCFID it and return it. + SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); + if (Op.getValueType() == MVT::f32) + FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); + return FP; +} + +SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + /* + The rounding mode is in bits 30:31 of FPSR, and has the following + settings: + 00 Round to nearest + 01 Round to 0 + 10 Round to +inf + 11 Round to -inf + + FLT_ROUNDS, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + + To perform the conversion, we do: + ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) + */ + + MachineFunction &MF = DAG.getMachineFunction(); + EVT VT = Op.getValueType(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + std::vector<EVT> NodeTys; + SDValue MFFSreg, InFlag; + + // Save FP Control Word to register + NodeTys.push_back(MVT::f64); // return register + NodeTys.push_back(MVT::Glue); // unused in this context + SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); + + // Save FP register to stack slot + int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, + StackSlot, MachinePointerInfo(), false, false,0); + + // Load FP Control Word from low 32 bits of stack slot. + SDValue Four = DAG.getConstant(4, PtrVT); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); + SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), + false, false, 0); + + // Transform as necessary + SDValue CWD1 = + DAG.getNode(ISD::AND, dl, MVT::i32, + CWD, DAG.getConstant(3, MVT::i32)); + SDValue CWD2 = + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getNode(ISD::AND, dl, MVT::i32, + DAG.getNode(ISD::XOR, dl, MVT::i32, + CWD, DAG.getConstant(3, MVT::i32)), + DAG.getConstant(3, MVT::i32)), + DAG.getConstant(1, MVT::i32)); + + SDValue RetVal = + DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); + + return DAG.getNode((VT.getSizeInBits() < 16 ? + ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); +} + +SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + DebugLoc dl = Op.getDebugLoc(); + assert(Op.getNumOperands() == 3 && + VT == Op.getOperand(1).getValueType() && + "Unexpected SHL!"); + + // Expand into a bunch of logical ops. Note that these ops + // depend on the PPC behavior for oversized shift amounts. + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Amt = Op.getOperand(2); + EVT AmtVT = Amt.getValueType(); + + SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, + DAG.getConstant(BitWidth, AmtVT), Amt); + SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); + SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); + SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); + SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, + DAG.getConstant(-BitWidth, AmtVT)); + SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); + SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); + SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); + SDValue OutOps[] = { OutLo, OutHi }; + return DAG.getMergeValues(OutOps, 2, dl); +} + +SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned BitWidth = VT.getSizeInBits(); + assert(Op.getNumOperands() == 3 && + VT == Op.getOperand(1).getValueType() && + "Unexpected SRL!"); + + // Expand into a bunch of logical ops. Note that these ops + // depend on the PPC behavior for oversized shift amounts. + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Amt = Op.getOperand(2); + EVT AmtVT = Amt.getValueType(); + + SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, + DAG.getConstant(BitWidth, AmtVT), Amt); + SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); + SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); + SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, + DAG.getConstant(-BitWidth, AmtVT)); + SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); + SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); + SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); + SDValue OutOps[] = { OutLo, OutHi }; + return DAG.getMergeValues(OutOps, 2, dl); +} + +SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + assert(Op.getNumOperands() == 3 && + VT == Op.getOperand(1).getValueType() && + "Unexpected SRA!"); + + // Expand into a bunch of logical ops, followed by a select_cc. + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Amt = Op.getOperand(2); + EVT AmtVT = Amt.getValueType(); + + SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, + DAG.getConstant(BitWidth, AmtVT), Amt); + SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); + SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); + SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, + DAG.getConstant(-BitWidth, AmtVT)); + SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); + SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); + SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), + Tmp4, Tmp6, ISD::SETLE); + SDValue OutOps[] = { OutLo, OutHi }; + return DAG.getMergeValues(OutOps, 2, dl); +} + +//===----------------------------------------------------------------------===// +// Vector related lowering. +// + +/// BuildSplatI - Build a canonical splati of Val with an element size of +/// SplatSize. Cast the result to VT. +static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, + SelectionDAG &DAG, DebugLoc dl) { + assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); + + static const EVT VTys[] = { // canonical VT to use for each size. + MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 + }; + + EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; + + // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. + if (Val == -1) + SplatSize = 1; + + EVT CanonicalVT = VTys[SplatSize-1]; + + // Build a canonical splat for this value. + SDValue Elt = DAG.getConstant(Val, MVT::i32); + SmallVector<SDValue, 8> Ops; + Ops.assign(CanonicalVT.getVectorNumElements(), Elt); + SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, + &Ops[0], Ops.size()); + return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); +} + +/// BuildIntrinsicOp - Return a binary operator intrinsic node with the +/// specified intrinsic ID. +static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, + SelectionDAG &DAG, DebugLoc dl, + EVT DestVT = MVT::Other) { + if (DestVT == MVT::Other) DestVT = LHS.getValueType(); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + DAG.getConstant(IID, MVT::i32), LHS, RHS); +} + +/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the +/// specified intrinsic ID. +static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, + SDValue Op2, SelectionDAG &DAG, + DebugLoc dl, EVT DestVT = MVT::Other) { + if (DestVT == MVT::Other) DestVT = Op0.getValueType(); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); +} + + +/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified +/// amount. The result has the specified value type. +static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, + EVT VT, SelectionDAG &DAG, DebugLoc dl) { + // Force LHS/RHS to be the right type. + LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); + RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); + + int Ops[16]; + for (unsigned i = 0; i != 16; ++i) + Ops[i] = i + Amt; + SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); + return DAG.getNode(ISD::BITCAST, dl, VT, T); +} + +// If this is a case we can't handle, return null and let the default +// expansion code take care of it. If we CAN select this case, and if it +// selects to a single instruction, return Op. Otherwise, if we can codegen +// this case more efficiently than a constant pool load, lower it to the +// sequence of ops that should be used. +SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); + assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + + // Check if this is a splat of a constant value. + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, 0, true) || SplatBitSize > 32) + return SDValue(); + + unsigned SplatBits = APSplatBits.getZExtValue(); + unsigned SplatUndef = APSplatUndef.getZExtValue(); + unsigned SplatSize = SplatBitSize / 8; + + // First, handle single instruction cases. + + // All zeros? + if (SplatBits == 0) { + // Canonicalize all zero vectors to be v4i32. + if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { + SDValue Z = DAG.getConstant(0, MVT::i32); + Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); + Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); + } + return Op; + } + + // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. + int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> + (32-SplatBitSize)); + if (SextVal >= -16 && SextVal <= 15) + return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); + + + // Two instruction sequences. + + // If this value is in the range [-32,30] and is even, use: + // tmp = VSPLTI[bhw], result = add tmp, tmp + if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { + SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); + Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + } + + // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is + // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important + // for fneg/fabs. + if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { + // Make -1 and vspltisw -1: + SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); + + // Make the VSLW intrinsic, computing 0x8000_0000. + SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, + OnesV, DAG, dl); + + // xor by OnesV to invert it. + Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + } + + // Check to see if this is a wide variety of vsplti*, binop self cases. + static const signed char SplatCsts[] = { + -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, + -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 + }; + + for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { + // Indirect through the SplatCsts array so that we favor 'vsplti -1' for + // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' + int i = SplatCsts[idx]; + + // Figure out what shift amount will be used by altivec if shifted by i in + // this splat size. + unsigned TypeShiftAmt = i & (SplatBitSize-1); + + // vsplti + shl self. + if (SextVal == (i << (int)TypeShiftAmt)) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, + Intrinsic::ppc_altivec_vslw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + } + + // vsplti + srl self. + if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, + Intrinsic::ppc_altivec_vsrw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + } + + // vsplti + sra self. + if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, + Intrinsic::ppc_altivec_vsraw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + } + + // vsplti + rol self. + if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | + ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, + Intrinsic::ppc_altivec_vrlw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + } + + // t = vsplti c, result = vsldoi t, t, 1 + if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) { + SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); + } + // t = vsplti c, result = vsldoi t, t, 2 + if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) { + SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); + } + // t = vsplti c, result = vsldoi t, t, 3 + if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) { + SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); + } + } + + // Three instruction sequences. + + // Odd, in range [17,31]: (vsplti C)-(vsplti -16). + if (SextVal >= 0 && SextVal <= 31) { + SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl); + SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); + LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); + } + // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). + if (SextVal >= -31 && SextVal <= 0) { + SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl); + SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); + LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); + } + + return SDValue(); +} + +/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit +/// the specified operations to build the shuffle. +static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, + SDValue RHS, SelectionDAG &DAG, + DebugLoc dl) { + unsigned OpNum = (PFEntry >> 26) & 0x0F; + unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); + unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); + + enum { + OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> + OP_VMRGHW, + OP_VMRGLW, + OP_VSPLTISW0, + OP_VSPLTISW1, + OP_VSPLTISW2, + OP_VSPLTISW3, + OP_VSLDOI4, + OP_VSLDOI8, + OP_VSLDOI12 + }; + + if (OpNum == OP_COPY) { + if (LHSID == (1*9+2)*9+3) return LHS; + assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); + return RHS; + } + + SDValue OpLHS, OpRHS; + OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); + OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); + + int ShufIdxs[16]; + switch (OpNum) { + default: llvm_unreachable("Unknown i32 permute!"); + case OP_VMRGHW: + ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; + ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; + ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; + ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; + break; + case OP_VMRGLW: + ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; + ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; + ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; + ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; + break; + case OP_VSPLTISW0: + for (unsigned i = 0; i != 16; ++i) + ShufIdxs[i] = (i&3)+0; + break; + case OP_VSPLTISW1: + for (unsigned i = 0; i != 16; ++i) + ShufIdxs[i] = (i&3)+4; + break; + case OP_VSPLTISW2: + for (unsigned i = 0; i != 16; ++i) + ShufIdxs[i] = (i&3)+8; + break; + case OP_VSPLTISW3: + for (unsigned i = 0; i != 16; ++i) + ShufIdxs[i] = (i&3)+12; + break; + case OP_VSLDOI4: + return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); + case OP_VSLDOI8: + return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); + case OP_VSLDOI12: + return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); + } + EVT VT = OpLHS.getValueType(); + OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); + OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); + SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); + return DAG.getNode(ISD::BITCAST, dl, VT, T); +} + +/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this +/// is a shuffle we can handle in a single instruction, return it. Otherwise, +/// return the code it can be lowered into. Worst case, it can always be +/// lowered into a vperm. +SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + EVT VT = Op.getValueType(); + + // Cases that are handled by instructions that take permute immediates + // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be + // selected by the instruction selector. + if (V2.getOpcode() == ISD::UNDEF) { + if (PPC::isSplatShuffleMask(SVOp, 1) || + PPC::isSplatShuffleMask(SVOp, 2) || + PPC::isSplatShuffleMask(SVOp, 4) || + PPC::isVPKUWUMShuffleMask(SVOp, true) || + PPC::isVPKUHUMShuffleMask(SVOp, true) || + PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, true) || + PPC::isVMRGLShuffleMask(SVOp, 2, true) || + PPC::isVMRGLShuffleMask(SVOp, 4, true) || + PPC::isVMRGHShuffleMask(SVOp, 1, true) || + PPC::isVMRGHShuffleMask(SVOp, 2, true) || + PPC::isVMRGHShuffleMask(SVOp, 4, true)) { + return Op; + } + } + + // Altivec has a variety of "shuffle immediates" that take two vector inputs + // and produce a fixed permutation. If any of these match, do not lower to + // VPERM. + if (PPC::isVPKUWUMShuffleMask(SVOp, false) || + PPC::isVPKUHUMShuffleMask(SVOp, false) || + PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, false) || + PPC::isVMRGLShuffleMask(SVOp, 2, false) || + PPC::isVMRGLShuffleMask(SVOp, 4, false) || + PPC::isVMRGHShuffleMask(SVOp, 1, false) || + PPC::isVMRGHShuffleMask(SVOp, 2, false) || + PPC::isVMRGHShuffleMask(SVOp, 4, false)) + return Op; + + // Check to see if this is a shuffle of 4-byte values. If so, we can use our + // perfect shuffle table to emit an optimal matching sequence. + SmallVector<int, 16> PermMask; + SVOp->getMask(PermMask); + + unsigned PFIndexes[4]; + bool isFourElementShuffle = true; + for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number + unsigned EltNo = 8; // Start out undef. + for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. + if (PermMask[i*4+j] < 0) + continue; // Undef, ignore it. + + unsigned ByteSource = PermMask[i*4+j]; + if ((ByteSource & 3) != j) { + isFourElementShuffle = false; + break; + } + + if (EltNo == 8) { + EltNo = ByteSource/4; + } else if (EltNo != ByteSource/4) { + isFourElementShuffle = false; + break; + } + } + PFIndexes[i] = EltNo; + } + + // If this shuffle can be expressed as a shuffle of 4-byte elements, use the + // perfect shuffle vector to determine if it is cost effective to do this as + // discrete instructions, or whether we should use a vperm. + if (isFourElementShuffle) { + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = + PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; + + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + unsigned Cost = (PFEntry >> 30); + + // Determining when to avoid vperm is tricky. Many things affect the cost + // of vperm, particularly how many times the perm mask needs to be computed. + // For example, if the perm mask can be hoisted out of a loop or is already + // used (perhaps because there are multiple permutes with the same shuffle + // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of + // the loop requires an extra register. + // + // As a compromise, we only emit discrete instructions if the shuffle can be + // generated in 3 or fewer operations. When we have loop information + // available, if this block is within a loop, we should avoid using vperm + // for 3-operation perms and use a constant pool load instead. + if (Cost < 3) + return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); + } + + // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant + // vector that will get spilled to the constant pool. + if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + + // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except + // that it is in input element units, not in bytes. Convert now. + EVT EltVT = V1.getValueType().getVectorElementType(); + unsigned BytesPerElement = EltVT.getSizeInBits()/8; + + SmallVector<SDValue, 16> ResultMask; + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; + + for (unsigned j = 0; j != BytesPerElement; ++j) + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, + MVT::i32)); + } + + SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, + &ResultMask[0], ResultMask.size()); + return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); +} + +/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an +/// altivec comparison. If it is, return true and fill in Opc/isDot with +/// information about the intrinsic. +static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, + bool &isDot) { + unsigned IntrinsicID = + cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); + CompareOpc = -1; + isDot = false; + switch (IntrinsicID) { + default: return false; + // Comparison predicates. + case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; + + // Normal Comparisons. + case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; + } + return true; +} + +/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom +/// lower, do it, otherwise return null. +SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + // If this is a lowered altivec predicate compare, CompareOpc is set to the + // opcode number of the comparison. + DebugLoc dl = Op.getDebugLoc(); + int CompareOpc; + bool isDot; + if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) + return SDValue(); // Don't custom lower most intrinsics. + + // If this is a non-dot comparison, make the VCMP node and we are done. + if (!isDot) { + SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), + Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(CompareOpc, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); + } + + // Create the PPCISD altivec 'dot' comparison node. + SDValue Ops[] = { + Op.getOperand(2), // LHS + Op.getOperand(3), // RHS + DAG.getConstant(CompareOpc, MVT::i32) + }; + std::vector<EVT> VTs; + VTs.push_back(Op.getOperand(2).getValueType()); + VTs.push_back(MVT::Glue); + SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); + + // Now that we have the comparison, emit a copy from the CR to a GPR. + // This is flagged to the above dot comparison. + SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32, + DAG.getRegister(PPC::CR6, MVT::i32), + CompNode.getValue(1)); + + // Unpack the result based on how the target uses it. + unsigned BitNo; // Bit # of CR6. + bool InvertBit; // Invert result? + switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { + default: // Can't happen, don't crash on invalid number though. + case 0: // Return the value of the EQ bit of CR6. + BitNo = 0; InvertBit = false; + break; + case 1: // Return the inverted value of the EQ bit of CR6. + BitNo = 0; InvertBit = true; + break; + case 2: // Return the value of the LT bit of CR6. + BitNo = 2; InvertBit = false; + break; + case 3: // Return the inverted value of the LT bit of CR6. + BitNo = 2; InvertBit = true; + break; + } + + // Shift the bit into the low position. + Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, + DAG.getConstant(8-(3-BitNo), MVT::i32)); + // Isolate the bit. + Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, + DAG.getConstant(1, MVT::i32)); + + // If we are supposed to, toggle the bit. + if (InvertBit) + Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, + DAG.getConstant(1, MVT::i32)); + return Flags; +} + +SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + // Create a stack slot that is 16-byte aligned. + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); + EVT PtrVT = getPointerTy(); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + // Store the input value into Value#0 of the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, + Op.getOperand(0), FIdx, MachinePointerInfo(), + false, false, 0); + // Load it out. + return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), + false, false, 0); +} + +SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + if (Op.getValueType() == MVT::v4i32) { + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); + + SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); + SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. + + SDValue RHSSwap = // = vrlw RHS, 16 + BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); + + // Shrinkify inputs to v8i16. + LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); + RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); + RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); + + // Low parts multiplied together, generating 32-bit results (we ignore the + // top parts). + SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, + LHS, RHS, DAG, dl, MVT::v4i32); + + SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, + LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); + // Shift the high parts up 16 bits. + HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, + Neg16, DAG, dl); + return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); + } else if (Op.getValueType() == MVT::v8i16) { + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); + + SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); + + return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, + LHS, RHS, Zero, DAG, dl); + } else if (Op.getValueType() == MVT::v16i8) { + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); + + // Multiply the even 8-bit parts, producing 16-bit sums. + SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, + LHS, RHS, DAG, dl, MVT::v8i16); + EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); + + // Multiply the odd 8-bit parts, producing 16-bit sums. + SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, + LHS, RHS, DAG, dl, MVT::v8i16); + OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); + + // Merge the results together. + int Ops[16]; + for (unsigned i = 0; i != 8; ++i) { + Ops[i*2 ] = 2*i+1; + Ops[i*2+1] = 2*i+1+16; + } + return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); + } else { + llvm_unreachable("Unknown mul to lower!"); + } +} + +/// LowerOperation - Provide custom lowering hooks for some operations. +/// +SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: llvm_unreachable("Wasn't expecting to be able to lower this!"); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC"); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG, PPCSubTarget); + + case ISD::VAARG: + return LowerVAARG(Op, DAG, PPCSubTarget); + + case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); + case ISD::DYNAMIC_STACKALLOC: + return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); + + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, + Op.getDebugLoc()); + case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); + + // Lower 64-bit shifts. + case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); + case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); + case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); + + // Vector-related lowering. + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::MUL: return LowerMUL(Op, DAG); + + // Frame & Return address. + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + } + return SDValue(); +} + +void PPCTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue>&Results, + SelectionDAG &DAG) const { + const TargetMachine &TM = getTargetMachine(); + DebugLoc dl = N->getDebugLoc(); + switch (N->getOpcode()) { + default: + assert(false && "Do not know how to custom type legalize this operation!"); + return; + case ISD::VAARG: { + if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() + || TM.getSubtarget<PPCSubtarget>().isPPC64()) + return; + + EVT VT = N->getValueType(0); + + if (VT == MVT::i64) { + SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); + + Results.push_back(NewNode); + Results.push_back(NewNode.getValue(1)); + } + return; + } + case ISD::FP_ROUND_INREG: { + assert(N->getValueType(0) == MVT::ppcf128); + assert(N->getOperand(0).getValueType() == MVT::ppcf128); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + MVT::f64, N->getOperand(0), + DAG.getIntPtrConstant(0)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + MVT::f64, N->getOperand(0), + DAG.getIntPtrConstant(1)); + + // This sequence changes FPSCR to do round-to-zero, adds the two halves + // of the long double, and puts FPSCR back the way it was. We do not + // actually model FPSCR. + std::vector<EVT> NodeTys; + SDValue Ops[4], Result, MFFSreg, InFlag, FPreg; + + NodeTys.push_back(MVT::f64); // Return register + NodeTys.push_back(MVT::Glue); // Returns a flag for later insns + Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); + MFFSreg = Result.getValue(0); + InFlag = Result.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::Glue); // Returns a flag + Ops[0] = DAG.getConstant(31, MVT::i32); + Ops[1] = InFlag; + Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2); + InFlag = Result.getValue(0); + + NodeTys.clear(); + NodeTys.push_back(MVT::Glue); // Returns a flag + Ops[0] = DAG.getConstant(30, MVT::i32); + Ops[1] = InFlag; + Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2); + InFlag = Result.getValue(0); + + NodeTys.clear(); + NodeTys.push_back(MVT::f64); // result of add + NodeTys.push_back(MVT::Glue); // Returns a flag + Ops[0] = Lo; + Ops[1] = Hi; + Ops[2] = InFlag; + Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3); + FPreg = Result.getValue(0); + InFlag = Result.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::f64); + Ops[0] = DAG.getConstant(1, MVT::i32); + Ops[1] = MFFSreg; + Ops[2] = FPreg; + Ops[3] = InFlag; + Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4); + FPreg = Result.getValue(0); + + // We know the low half is about to be thrown away, so just use something + // convenient. + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, + FPreg, FPreg)); + return; + } + case ISD::FP_TO_SINT: + Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); + return; + } +} + + +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, + bool is64bit, unsigned BinOpcode) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *F = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptrA = MI->getOperand(1).getReg(); + unsigned ptrB = MI->getOperand(2).getReg(); + unsigned incr = MI->getOperand(3).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loopMBB); + F->insert(It, exitMBB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned TmpReg = (!BinOpcode) ? incr : + RegInfo.createVirtualRegister( + is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : + (const TargetRegisterClass *) &PPC::GPRCRegClass); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // l[wd]arx dest, ptr + // add r0, dest, incr + // st[wd]cx. r0, ptr + // bne- loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + .addReg(ptrA).addReg(ptrB); + if (BinOpcode) + BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); + BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + .addReg(TmpReg).addReg(ptrA).addReg(ptrB); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + return BB; +} + +MachineBasicBlock * +PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, + MachineBasicBlock *BB, + bool is8bit, // operation + unsigned BinOpcode) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + // In 64 bit mode we have to use 64 bits for addresses, even though the + // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address + // registers without caring whether they're 32 or 64, but here we're + // doing actual arithmetic on the addresses. + bool is64bit = PPCSubTarget.isPPC64(); + unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *F = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptrA = MI->getOperand(1).getReg(); + unsigned ptrB = MI->getOperand(2).getReg(); + unsigned incr = MI->getOperand(3).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loopMBB); + F->insert(It, exitMBB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + const TargetRegisterClass *RC = + is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : + (const TargetRegisterClass *) &PPC::GPRCRegClass; + unsigned PtrReg = RegInfo.createVirtualRegister(RC); + unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); + unsigned ShiftReg = RegInfo.createVirtualRegister(RC); + unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); + unsigned MaskReg = RegInfo.createVirtualRegister(RC); + unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); + unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); + unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); + unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); + unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); + unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); + unsigned Ptr1Reg; + unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // The 4-byte load must be aligned, while a char or short may be + // anywhere in the word. Hence all this nasty bookkeeping code. + // add ptr1, ptrA, ptrB [copy if ptrA==0] + // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] + // xori shift, shift1, 24 [16] + // rlwinm ptr, ptr1, 0, 0, 29 + // slw incr2, incr, shift + // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] + // slw mask, mask2, shift + // loopMBB: + // lwarx tmpDest, ptr + // add tmp, tmpDest, incr2 + // andc tmp2, tmpDest, mask + // and tmp3, tmp, mask + // or tmp4, tmp3, tmp2 + // stwcx. tmp4, ptr + // bne- loopMBB + // fallthrough --> exitMBB + // srw dest, tmpDest, shift + if (ptrA != ZeroReg) { + Ptr1Reg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) + .addReg(ptrA).addReg(ptrB); + } else { + Ptr1Reg = ptrB; + } + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) + .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); + BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) + .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); + if (is64bit) + BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(61); + else + BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); + BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) + .addReg(incr).addReg(ShiftReg); + if (is8bit) + BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); + else { + BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); + BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); + } + BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) + .addReg(Mask2Reg).addReg(ShiftReg); + + BB = loopMBB; + BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) + .addReg(ZeroReg).addReg(PtrReg); + if (BinOpcode) + BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) + .addReg(Incr2Reg).addReg(TmpDestReg); + BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) + .addReg(TmpDestReg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) + .addReg(TmpReg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) + .addReg(Tmp3Reg).addReg(Tmp2Reg); + BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) + .addReg(ShiftReg); + return BB; +} + +MachineBasicBlock * +PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + // To "insert" these instructions we actually have to insert their + // control-flow patterns. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + MachineFunction *F = BB->getParent(); + + if (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8 || + MI->getOpcode() == PPC::SELECT_CC_F4 || + MI->getOpcode() == PPC::SELECT_CC_F8 || + MI->getOpcode() == PPC::SELECT_CC_VRRC) { + + // The incoming instruction knows the destination vreg to set, the + // condition code register to branch on, the true/false values to + // select between, and a branch opcode to use. + + // thisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // bCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + unsigned SelectPred = MI->getOperand(4).getImm(); + DebugLoc dl = MI->getDebugLoc(); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(*BB, BB->begin(), dl, + TII->get(PPC::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + } + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); + + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::AND); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); + + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::OR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); + + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); + + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8); + + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) + BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); + else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) + BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); + + else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0); + else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0); + else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) + BB = EmitAtomicBinary(MI, BB, false, 0); + else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) + BB = EmitAtomicBinary(MI, BB, true, 0); + + else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { + bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptrA = MI->getOperand(1).getReg(); + unsigned ptrB = MI->getOperand(2).getReg(); + unsigned oldval = MI->getOperand(3).getReg(); + unsigned newval = MI->getOperand(4).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loop1MBB); + F->insert(It, loop2MBB); + F->insert(It, midMBB); + F->insert(It, exitMBB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loop1MBB); + + // loop1MBB: + // l[wd]arx dest, ptr + // cmp[wd] dest, oldval + // bne- midMBB + // loop2MBB: + // st[wd]cx. newval, ptr + // bne- loopMBB + // b exitBB + // midMBB: + // st[wd]cx. dest, ptr + // exitBB: + BB = loop1MBB; + BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + .addReg(ptrA).addReg(ptrB); + BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) + .addReg(oldval).addReg(dest); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(midMBB); + + BB = loop2MBB; + BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + .addReg(newval).addReg(ptrA).addReg(ptrB); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); + BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + BB = midMBB; + BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + .addReg(dest).addReg(ptrA).addReg(ptrB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { + // We must use 64-bit registers for addresses when targeting 64-bit, + // since we're actually doing arithmetic on them. Other registers + // can be 32-bit. + bool is64bit = PPCSubTarget.isPPC64(); + bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptrA = MI->getOperand(1).getReg(); + unsigned ptrB = MI->getOperand(2).getReg(); + unsigned oldval = MI->getOperand(3).getReg(); + unsigned newval = MI->getOperand(4).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loop1MBB); + F->insert(It, loop2MBB); + F->insert(It, midMBB); + F->insert(It, exitMBB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + const TargetRegisterClass *RC = + is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : + (const TargetRegisterClass *) &PPC::GPRCRegClass; + unsigned PtrReg = RegInfo.createVirtualRegister(RC); + unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); + unsigned ShiftReg = RegInfo.createVirtualRegister(RC); + unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); + unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); + unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); + unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); + unsigned MaskReg = RegInfo.createVirtualRegister(RC); + unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); + unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); + unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); + unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); + unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); + unsigned Ptr1Reg; + unsigned TmpReg = RegInfo.createVirtualRegister(RC); + unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loop1MBB); + + // The 4-byte load must be aligned, while a char or short may be + // anywhere in the word. Hence all this nasty bookkeeping code. + // add ptr1, ptrA, ptrB [copy if ptrA==0] + // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] + // xori shift, shift1, 24 [16] + // rlwinm ptr, ptr1, 0, 0, 29 + // slw newval2, newval, shift + // slw oldval2, oldval,shift + // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] + // slw mask, mask2, shift + // and newval3, newval2, mask + // and oldval3, oldval2, mask + // loop1MBB: + // lwarx tmpDest, ptr + // and tmp, tmpDest, mask + // cmpw tmp, oldval3 + // bne- midMBB + // loop2MBB: + // andc tmp2, tmpDest, mask + // or tmp4, tmp2, newval3 + // stwcx. tmp4, ptr + // bne- loop1MBB + // b exitBB + // midMBB: + // stwcx. tmpDest, ptr + // exitBB: + // srw dest, tmpDest, shift + if (ptrA != ZeroReg) { + Ptr1Reg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) + .addReg(ptrA).addReg(ptrB); + } else { + Ptr1Reg = ptrB; + } + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) + .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); + BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) + .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); + if (is64bit) + BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(61); + else + BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) + .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); + BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) + .addReg(newval).addReg(ShiftReg); + BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) + .addReg(oldval).addReg(ShiftReg); + if (is8bit) + BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); + else { + BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); + BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) + .addReg(Mask3Reg).addImm(65535); + } + BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) + .addReg(Mask2Reg).addReg(ShiftReg); + BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) + .addReg(NewVal2Reg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) + .addReg(OldVal2Reg).addReg(MaskReg); + + BB = loop1MBB; + BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) + .addReg(ZeroReg).addReg(PtrReg); + BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) + .addReg(TmpDestReg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) + .addReg(TmpReg).addReg(OldVal3Reg); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(midMBB); + + BB = loop2MBB; + BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) + .addReg(TmpDestReg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) + .addReg(Tmp2Reg).addReg(NewVal3Reg); + BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) + .addReg(ZeroReg).addReg(PtrReg); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); + BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + BB = midMBB; + BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) + .addReg(ZeroReg).addReg(PtrReg); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) + .addReg(ShiftReg); + } else { + llvm_unreachable("Unexpected instr type to insert"); + } + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +//===----------------------------------------------------------------------===// +// Target Optimization Hooks +//===----------------------------------------------------------------------===// + +SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + const TargetMachine &TM = getTargetMachine(); + SelectionDAG &DAG = DCI.DAG; + DebugLoc dl = N->getDebugLoc(); + switch (N->getOpcode()) { + default: break; + case PPCISD::SHL: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { + if (C->isNullValue()) // 0 << V -> 0. + return N->getOperand(0); + } + break; + case PPCISD::SRL: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { + if (C->isNullValue()) // 0 >>u V -> 0. + return N->getOperand(0); + } + break; + case PPCISD::SRA: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { + if (C->isNullValue() || // 0 >>s V -> 0. + C->isAllOnesValue()) // -1 >>s V -> -1. + return N->getOperand(0); + } + break; + + case ISD::SINT_TO_FP: + if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { + if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { + // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. + // We allow the src/dst to be either f32/f64, but the intermediate + // type must be i64. + if (N->getOperand(0).getValueType() == MVT::i64 && + N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { + SDValue Val = N->getOperand(0).getOperand(0); + if (Val.getValueType() == MVT::f32) { + Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + } + + Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + if (N->getValueType(0) == MVT::f32) { + Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val, + DAG.getIntPtrConstant(0)); + DCI.AddToWorklist(Val.getNode()); + } + return Val; + } else if (N->getOperand(0).getValueType() == MVT::i32) { + // If the intermediate type is i32, we can avoid the load/store here + // too. + } + } + } + break; + case ISD::STORE: + // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). + if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && + !cast<StoreSDNode>(N)->isTruncatingStore() && + N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && + N->getOperand(1).getValueType() == MVT::i32 && + N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { + SDValue Val = N->getOperand(1).getOperand(0); + if (Val.getValueType() == MVT::f32) { + Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + } + Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); + DCI.AddToWorklist(Val.getNode()); + + Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val, + N->getOperand(2), N->getOperand(3)); + DCI.AddToWorklist(Val.getNode()); + return Val; + } + + // Turn STORE (BSWAP) -> sthbrx/stwbrx. + if (cast<StoreSDNode>(N)->isUnindexed() && + N->getOperand(1).getOpcode() == ISD::BSWAP && + N->getOperand(1).getNode()->hasOneUse() && + (N->getOperand(1).getValueType() == MVT::i32 || + N->getOperand(1).getValueType() == MVT::i16)) { + SDValue BSwapOp = N->getOperand(1).getOperand(0); + // Do an any-extend to 32-bits if this is a half-word input. + if (BSwapOp.getValueType() == MVT::i16) + BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); + + SDValue Ops[] = { + N->getOperand(0), BSwapOp, N->getOperand(2), + DAG.getValueType(N->getOperand(1).getValueType()) + }; + return + DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), + Ops, array_lengthof(Ops), + cast<StoreSDNode>(N)->getMemoryVT(), + cast<StoreSDNode>(N)->getMemOperand()); + } + break; + case ISD::BSWAP: + // Turn BSWAP (LOAD) -> lhbrx/lwbrx. + if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && + N->getOperand(0).hasOneUse() && + (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { + SDValue Load = N->getOperand(0); + LoadSDNode *LD = cast<LoadSDNode>(Load); + // Create the byte-swapping load. + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr(), // Ptr + DAG.getValueType(N->getValueType(0)) // VT + }; + SDValue BSLoad = + DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, + DAG.getVTList(MVT::i32, MVT::Other), Ops, 3, + LD->getMemoryVT(), LD->getMemOperand()); + + // If this is an i16 load, insert the truncate. + SDValue ResVal = BSLoad; + if (N->getValueType(0) == MVT::i16) + ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); + + // First, combine the bswap away. This makes the value produced by the + // load dead. + DCI.CombineTo(N, ResVal); + + // Next, combine the load away, we give it a bogus result value but a real + // chain result. The result value is dead because the bswap is dead. + DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); + + // Return N so it doesn't get rechecked! + return SDValue(N, 0); + } + + break; + case PPCISD::VCMP: { + // If a VCMPo node already exists with exactly the same operands as this + // node, use its result instead of this node (VCMPo computes both a CR6 and + // a normal output). + // + if (!N->getOperand(0).hasOneUse() && + !N->getOperand(1).hasOneUse() && + !N->getOperand(2).hasOneUse()) { + + // Scan all of the users of the LHS, looking for VCMPo's that match. + SDNode *VCMPoNode = 0; + + SDNode *LHSN = N->getOperand(0).getNode(); + for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); + UI != E; ++UI) + if (UI->getOpcode() == PPCISD::VCMPo && + UI->getOperand(1) == N->getOperand(1) && + UI->getOperand(2) == N->getOperand(2) && + UI->getOperand(0) == N->getOperand(0)) { + VCMPoNode = *UI; + break; + } + + // If there is no VCMPo node, or if the flag value has a single use, don't + // transform this. + if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) + break; + + // Look at the (necessarily single) use of the flag value. If it has a + // chain, this transformation is more complex. Note that multiple things + // could use the value result, which we should ignore. + SDNode *FlagUser = 0; + for (SDNode::use_iterator UI = VCMPoNode->use_begin(); + FlagUser == 0; ++UI) { + assert(UI != VCMPoNode->use_end() && "Didn't find user!"); + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { + FlagUser = User; + break; + } + } + } + + // If the user is a MFCR instruction, we know this is safe. Otherwise we + // give up for right now. + if (FlagUser->getOpcode() == PPCISD::MFCR) + return SDValue(VCMPoNode, 0); + } + break; + } + case ISD::BR_CC: { + // If this is a branch on an altivec predicate comparison, lower this so + // that we don't have to do a MFCR: instead, branch directly on CR6. This + // lowering is done pre-legalize, because the legalizer lowers the predicate + // compare down to code that is difficult to reassemble. + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); + SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); + int CompareOpc; + bool isDot; + + if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && + isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && + getAltivecCompareInfo(LHS, CompareOpc, isDot)) { + assert(isDot && "Can't compare against a vector result!"); + + // If this is a comparison against something other than 0/1, then we know + // that the condition is never/always true. + unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); + if (Val != 0 && Val != 1) { + if (CC == ISD::SETEQ) // Cond never true, remove branch. + return N->getOperand(0); + // Always !=, turn it into an unconditional branch. + return DAG.getNode(ISD::BR, dl, MVT::Other, + N->getOperand(0), N->getOperand(4)); + } + + bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); + + // Create the PPCISD altivec 'dot' comparison node. + std::vector<EVT> VTs; + SDValue Ops[] = { + LHS.getOperand(2), // LHS of compare + LHS.getOperand(3), // RHS of compare + DAG.getConstant(CompareOpc, MVT::i32) + }; + VTs.push_back(LHS.getOperand(2).getValueType()); + VTs.push_back(MVT::Glue); + SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); + + // Unpack the result based on how the target uses it. + PPC::Predicate CompOpc; + switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { + default: // Can't happen, don't crash on invalid number though. + case 0: // Branch on the value of the EQ bit of CR6. + CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; + break; + case 1: // Branch on the inverted value of the EQ bit of CR6. + CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; + break; + case 2: // Branch on the value of the LT bit of CR6. + CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; + break; + case 3: // Branch on the inverted value of the LT bit of CR6. + CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; + break; + } + + return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), + DAG.getConstant(CompOpc, MVT::i32), + DAG.getRegister(PPC::CR6, MVT::i32), + N->getOperand(4), CompNode.getValue(1)); + } + break; + } + } + + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembly Support +//===----------------------------------------------------------------------===// + +void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + switch (Op.getOpcode()) { + default: break; + case PPCISD::LBRX: { + // lhbrx is known to have the top bits cleared out. + if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) + KnownZero = 0xFFFF0000; + break; + } + case ISD::INTRINSIC_WO_CHAIN: { + switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { + default: break; + case Intrinsic::ppc_altivec_vcmpbfp_p: + case Intrinsic::ppc_altivec_vcmpeqfp_p: + case Intrinsic::ppc_altivec_vcmpequb_p: + case Intrinsic::ppc_altivec_vcmpequh_p: + case Intrinsic::ppc_altivec_vcmpequw_p: + case Intrinsic::ppc_altivec_vcmpgefp_p: + case Intrinsic::ppc_altivec_vcmpgtfp_p: + case Intrinsic::ppc_altivec_vcmpgtsb_p: + case Intrinsic::ppc_altivec_vcmpgtsh_p: + case Intrinsic::ppc_altivec_vcmpgtsw_p: + case Intrinsic::ppc_altivec_vcmpgtub_p: + case Intrinsic::ppc_altivec_vcmpgtuh_p: + case Intrinsic::ppc_altivec_vcmpgtuw_p: + KnownZero = ~1U; // All bits but the low one are known to be zero. + break; + } + } + } +} + + +/// getConstraintType - Given a constraint, return the type of +/// constraint it is for this target. +PPCTargetLowering::ConstraintType +PPCTargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'b': + case 'r': + case 'f': + case 'v': + case 'y': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +PPCTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'b': + if (type->isIntegerTy()) + weight = CW_Register; + break; + case 'f': + if (type->isFloatTy()) + weight = CW_Register; + break; + case 'd': + if (type->isDoubleTy()) + weight = CW_Register; + break; + case 'v': + if (type->isVectorTy()) + weight = CW_Register; + break; + case 'y': + weight = CW_Register; + break; + } + return weight; +} + +std::pair<unsigned, const TargetRegisterClass*> +PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + // GCC RS6000 Constraint Letters + switch (Constraint[0]) { + case 'b': // R1-R31 + case 'r': // R0-R31 + if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + return std::make_pair(0U, PPC::G8RCRegisterClass); + return std::make_pair(0U, PPC::GPRCRegisterClass); + case 'f': + if (VT == MVT::f32) + return std::make_pair(0U, PPC::F4RCRegisterClass); + else if (VT == MVT::f64) + return std::make_pair(0U, PPC::F8RCRegisterClass); + break; + case 'v': + return std::make_pair(0U, PPC::VRRCRegisterClass); + case 'y': // crrc + return std::make_pair(0U, PPC::CRRCRegisterClass); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue>&Ops, + SelectionDAG &DAG) const { + SDValue Result(0,0); + + // Only support length 1 constraints. + if (Constraint.length() > 1) return; + + char Letter = Constraint[0]; + switch (Letter) { + default: break; + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': { + ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); + if (!CST) return; // Must be an immediate to match. + unsigned Value = CST->getZExtValue(); + switch (Letter) { + default: llvm_unreachable("Unknown constraint letter!"); + case 'I': // "I" is a signed 16-bit constant. + if ((short)Value == (int)Value) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'J': // "J" is a constant with only the high-order 16 bits nonzero. + case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. + if ((short)Value == 0) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'K': // "K" is a constant with only the low-order 16 bits nonzero. + if ((Value >> 16) == 0) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'M': // "M" is a constant that is greater than 31. + if (Value > 31) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'N': // "N" is a positive constant that is an exact power of two. + if ((int)Value > 0 && isPowerOf2_32(Value)) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'O': // "O" is the constant zero. + if (Value == 0) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + case 'P': // "P" is a constant whose negation is a signed 16-bit constant. + if ((short)-Value == (int)-Value) + Result = DAG.getTargetConstant(Value, Op.getValueType()); + break; + } + break; + } + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + + // Handle standard constraint letters. + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +// isLegalAddressingMode - Return true if the addressing mode represented +// by AM is legal for this target, for a load/store of the specified type. +bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // FIXME: PPC does not allow r+i addressing modes for vectors! + + // PPC allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // PPC only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + default: + // No other scales are supported. + return false; + } + + return true; +} + +/// isLegalAddressImmediate - Return true if the integer value can be used +/// as the offset of the target addressing mode for load / store of the +/// given type. +bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{ + // PPC allows a sign-extended 16-bit immediate field. + return (V > -(1 << 16) && V < (1 << 16)-1); +} + +bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { + return false; +} + +SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + DebugLoc dl = Op.getDebugLoc(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + + // Make sure the function does not optimize away the store of the RA to + // the stack. + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setLRStoreRequired(); + bool isPPC64 = PPCSubTarget.isPPC64(); + bool isDarwinABI = PPCSubTarget.isDarwinABI(); + + if (Depth > 0) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = + + DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), + isPPC64? MVT::i64 : MVT::i32); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, getPointerTy(), + FrameAddr, Offset), + MachinePointerInfo(), false, false, 0); + } + + // Just load the return address off the stack. + SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + RetAddrFI, MachinePointerInfo(), false, false, 0); +} + +SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = PtrVT == MVT::i64; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && + MFI->getStackSize() && + !MF.getFunction()->hasFnAttr(Attribute::Naked); + unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : + (is31 ? PPC::R31 : PPC::R1); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, + PtrVT); + while (Depth--) + FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), + FrameAddr, MachinePointerInfo(), false, false, 0); + return FrameAddr; +} + +bool +PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The PowerPC target isn't yet aware of offsets. + return false; +} + +/// getOptimalMemOpType - Returns the target specific optimal type for load +/// and store operations as a result of memset, memcpy, and memmove +/// lowering. If DstAlign is zero that means it's safe to destination +/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it +/// means there isn't a need to check it against alignment requirement, +/// probably because the source does not need to be loaded. If +/// 'NonScalarIntSafe' is true, that means it's safe to return a +/// non-scalar-integer type, e.g. empty string source, constant, or loaded +/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is +/// constant so it does not need to be loaded. +/// It returns EVT::Other if the type should be determined using generic +/// target-independent logic. +EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, + bool MemcpyStrSrc, + MachineFunction &MF) const { + if (this->PPCSubTarget.isPPC64()) { + return MVT::i64; + } else { + return MVT::i32; + } +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h new file mode 100644 index 0000000..430e45e --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -0,0 +1,493 @@ +//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that PPC uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H +#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "PPC.h" +#include "PPCSubtarget.h" + +namespace llvm { + namespace PPCISD { + enum NodeType { + // Start the numbering where the builtin ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + /// FSEL - Traditional three-operand fsel node. + /// + FSEL, + + /// FCFID - The FCFID instruction, taking an f64 operand and producing + /// and f64 value containing the FP representation of the integer that + /// was temporarily in the f64 operand. + FCFID, + + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 + /// operand, producing an f64 value containing the integer representation + /// of that FP value. + FCTIDZ, FCTIWZ, + + /// STFIWX - The STFIWX instruction. The first operand is an input token + /// chain, then an f64 value to store, then an address to store it to. + STFIWX, + + // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking + // three v4f32 operands and producing a v4f32 result. + VMADDFP, VNMSUBFP, + + /// VPERM - The PPC VPERM Instruction. + /// + VPERM, + + /// Hi/Lo - These represent the high and low 16-bit parts of a global + /// address respectively. These nodes have two operands, the first of + /// which must be a TargetGlobalAddress, and the second of which must be a + /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', + /// though these are usually folded into other nodes. + Hi, Lo, + + TOC_ENTRY, + + /// The following three target-specific nodes are used for calls through + /// function pointers in the 64-bit SVR4 ABI. + + /// Restore the TOC from the TOC save area of the current stack frame. + /// This is basically a hard coded load instruction which additionally + /// takes/produces a flag. + TOC_RESTORE, + + /// Like a regular LOAD but additionally taking/producing a flag. + LOAD, + + /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is + /// a hard coded load instruction. + LOAD_TOC, + + /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) + /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to + /// compute an allocation on the stack. + DYNALLOC, + + /// GlobalBaseReg - On Darwin, this node represents the result of the mflr + /// at function entry, used for PIC code. + GlobalBaseReg, + + /// These nodes represent the 32-bit PPC shifts that operate on 6-bit + /// shift amounts. These nodes are generated by the multi-precision shift + /// code. + SRL, SRA, SHL, + + /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit" + /// registers. + EXTSW_32, + + /// CALL - A direct function call. + CALL_Darwin, CALL_SVR4, + + /// NOP - Special NOP which follows 64-bit SVR4 calls. + NOP, + + /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a + /// MTCTR instruction. + MTCTR, + + /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a + /// BCTRL instruction. + BCTRL_Darwin, BCTRL_SVR4, + + /// Return with a flag operand, matched by 'blr' + RET_FLAG, + + /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF + /// instructions. This copies the bits corresponding to the specified + /// CRREG into the resultant GPR. Bits corresponding to other CR regs + /// are undefined. + MFCR, + + /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* + /// instructions. For lack of better number, we use the opcode number + /// encoding for the OPC field to identify the compare. For example, 838 + /// is VCMPGTSH. + VCMP, + + /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the + /// altivec VCMP*o instructions. For lack of better number, we use the + /// opcode number encoding for the OPC field to identify the compare. For + /// example, 838 is VCMPGTSH. + VCMPo, + + /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This + /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the + /// condition register to branch on, OPC is the branch opcode to use (e.g. + /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is + /// an optional input flag argument. + COND_BRANCH, + + // The following 5 instructions are used only as part of the + // long double-to-int conversion sequence. + + /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the + /// register. + MFFS, + + /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR. + MTFSB0, + + /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR. + MTFSB1, + + /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with + /// rounding towards zero. It has flags added so it won't move past the + /// FPSCR-setting instructions. + FADDRTZ, + + /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR. + MTFSF, + + /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and + /// reserve indexed. This is used to implement atomic operations. + LARX, + + /// STCX = This corresponds to PPC stcx. instrcution: store conditional + /// indexed. This is used to implement atomic operations. + STCX, + + /// TC_RETURN - A tail call return. + /// operand #0 chain + /// operand #1 callee (register or absolute) + /// operand #2 stack adjustment + /// operand #3 optional in flag + TC_RETURN, + + /// STD_32 - This is the STD instruction for use with "32-bit" registers. + STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, + + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a + /// byte-swapping store instruction. It byte-swaps the low "Type" bits of + /// the GPRC input, then stores it through Ptr. Type can be either i16 or + /// i32. + STBRX, + + /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a + /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, + /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 + /// or i32. + LBRX + }; + } + + /// Define some predicates that are used for node matching. + namespace PPC { + /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a + /// VPKUHUM instruction. + bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); + + /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a + /// VPKUWUM instruction. + bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); + + /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for + /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). + bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary); + + /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for + /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). + bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary); + + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift + /// amount, otherwise return -1. + int isVSLDOIShuffleMask(SDNode *N, bool isUnary); + + /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a splat of a single element that is suitable for input to + /// VSPLTB/VSPLTH/VSPLTW. + bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); + + /// isAllNegativeZeroVector - Returns true if all elements of build_vector + /// are -0.0. + bool isAllNegativeZeroVector(SDNode *N); + + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the + /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. + unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize); + + /// get_VSPLTI_elt - If this is a build_vector of constants which can be + /// formed by using a vspltis[bhw] instruction of the specified element + /// size, return the constant being splatted. The ByteSize field indicates + /// the number of bytes of each element [124] -> [bhw]. + SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); + } + + class PPCTargetLowering : public TargetLowering { + const PPCSubtarget &PPCSubTarget; + + public: + explicit PPCTargetLowering(PPCTargetMachine &TM); + + /// getTargetNodeName() - This method returns the name of a target specific + /// DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + + /// getSetCCResultType - Return the ISD::SETCC ValueType + virtual EVT getSetCCResultType(EVT VT) const; + + /// getPreIndexedAddressParts - returns true by value, base pointer and + /// offset pointer and addressing mode by reference if the node's address + /// can be legally represented as pre-indexed load / store address. + virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const; + + /// SelectAddressRegReg - Given the specified addressed, check to see if it + /// can be represented as an indexed [r+r] operation. Returns false if it + /// can be more efficiently represented with [r+imm]. + bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, + SelectionDAG &DAG) const; + + /// SelectAddressRegImm - Returns true if the address N can be represented + /// by a base register plus a signed 16-bit displacement [r+imm], and if it + /// is not better represented as reg+reg. + bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, + SelectionDAG &DAG) const; + + /// SelectAddressRegRegOnly - Given the specified addressed, force it to be + /// represented as an indexed [r+r] operation. + bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, + SelectionDAG &DAG) const; + + /// SelectAddressRegImmShift - Returns true if the address N can be + /// represented by a base register plus a signed 14-bit displacement + /// [r+imm*4]. Suitable for use by STD and friends. + bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, + SelectionDAG &DAG) const; + + + /// LowerOperation - Provide custom lowering hooks for some operations. + /// + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + /// ReplaceNodeResults - Replace the results of node with an illegal result + /// type with new values built out of custom code. + /// + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, + SelectionDAG &DAG) const; + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + virtual void computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, + MachineBasicBlock *MBB, bool is64Bit, + unsigned BinOpcode) const; + MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, + MachineBasicBlock *MBB, + bool is8bit, unsigned Opcode) const; + + ConstraintType getConstraintType(const std::string &Constraint) const; + + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; + + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate + /// function arguments in the caller parameter area. This is the actual + /// alignment, not its logarithm. + unsigned getByValTypeAlignment(Type *Ty) const; + + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops + /// vector. If it is invalid, don't add anything to Ops. + virtual void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const; + + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; + + /// isLegalAddressImmediate - Return true if the integer value can be used + /// as the offset of the target addressing mode for load / store of the + /// given type. + virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const; + + /// isLegalAddressImmediate - Return true if the GlobalValue can be used as + /// the offset of the target addressing mode. + virtual bool isLegalAddressImmediate(GlobalValue *GV) const; + + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + /// getOptimalMemOpType - Returns the target specific optimal type for load + /// and store operations as a result of memset, memcpy, and memmove + /// lowering. If DstAlign is zero that means it's safe to destination + /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it + /// means there isn't a need to check it against alignment requirement, + /// probably because the source does not need to be loaded. If + /// 'NonScalarIntSafe' is true, that means it's safe to return a + /// non-scalar-integer type, e.g. empty string source, constant, or loaded + /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is + /// constant so it does not need to be loaded. + /// It returns EVT::Other if the type should be determined using generic + /// target-independent logic. + virtual EVT + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, bool MemcpyStrSrc, + MachineFunction &MF) const; + + private: + SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; + SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; + + bool + IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; + + SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, + int SPDiff, + SDValue Chain, + SDValue &LROpOut, + SDValue &FPOpOut, + bool isDarwinABI, + DebugLoc dl) const; + + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const; + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const; + SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const; + SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall, + bool isVarArg, + SelectionDAG &DAG, + SmallVector<std::pair<unsigned, SDValue>, 8> + &RegsToPass, + SDValue InFlag, SDValue Chain, + SDValue &Callee, + int SPDiff, unsigned NumBytes, + const SmallVectorImpl<ISD::InputArg> &Ins, + SmallVectorImpl<SDValue> &InVals) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual bool + CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const; + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const; + + SDValue + LowerFormalArguments_Darwin(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + SDValue + LowerFormalArguments_SVR4(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue + LowerCall_Darwin(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + SDValue + LowerCall_SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + }; +} + +#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td new file mode 100644 index 0000000..e88ad37 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -0,0 +1,749 @@ +//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PowerPC 64-bit instructions. These patterns are used +// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 64-bit operands. +// +def s16imm64 : Operand<i64> { + let PrintMethod = "printS16ImmOperand"; +} +def u16imm64 : Operand<i64> { + let PrintMethod = "printU16ImmOperand"; +} +def symbolHi64 : Operand<i64> { + let PrintMethod = "printSymbolHi"; + let EncoderMethod = "getHA16Encoding"; +} +def symbolLo64 : Operand<i64> { + let PrintMethod = "printSymbolLo"; + let EncoderMethod = "getLO16Encoding"; +} + +//===----------------------------------------------------------------------===// +// 64-bit transformation functions. +// + +def SHL64 : SDNodeXForm<imm, [{ + // Transformation function: 63 - imm + return getI32Imm(63 - N->getZExtValue()); +}]>; + +def SRL64 : SDNodeXForm<imm, [{ + // Transformation function: 64 - imm + return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0); +}]>; + +def HI32_48 : SDNodeXForm<imm, [{ + // Transformation function: shift the immediate value down into the low bits. + return getI32Imm((unsigned short)(N->getZExtValue() >> 32)); +}]>; + +def HI48_64 : SDNodeXForm<imm, [{ + // Transformation function: shift the immediate value down into the low bits. + return getI32Imm((unsigned short)(N->getZExtValue() >> 48)); +}]>; + + +//===----------------------------------------------------------------------===// +// Calls. +// + +let Defs = [LR8] in + def MovePCtoLR8 : Pseudo<(outs), (ins), "", []>, + PPC970_Unit_BRU; + +// Darwin ABI Calls. +let isCall = 1, PPC970_Unit = 7, + // All calls clobber the PPC64 non-callee saved registers. + Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, + F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, + V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, + LR8,CTR8, + CR0,CR1,CR5,CR6,CR7,CARRY] in { + // Convenient aliases for call instructions + let Uses = [RM] in { + def BL8_Darwin : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA8_Darwin : IForm<18, 1, 1, + (outs), (ins aaddr:$func, variable_ops), + "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>; + } + let Uses = [CTR8, RM] in { + def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>; + } +} + +// ELF 64 ABI Calls = Darwin ABI Calls +// Used to define BL8_ELF and BLA8_ELF +let isCall = 1, PPC970_Unit = 7, + // All calls clobber the PPC64 non-callee saved registers. + Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, + F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, + V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, + LR8,CTR8, + CR0,CR1,CR5,CR6,CR7,CARRY] in { + // Convenient aliases for call instructions + let Uses = [RM] in { + def BL8_ELF : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA8_ELF : IForm<18, 1, 1, + (outs), (ins aaddr:$func, variable_ops), + "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; + } + let Uses = [CTR8, RM] in { + def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>; + } +} + + +// Calls +def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)), + (BL8_Darwin tglobaladdr:$dst)>; +def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)), + (BL8_Darwin texternalsym:$dst)>; + +def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), + (BL8_ELF tglobaladdr:$dst)>; +def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), + (BL8_ELF texternalsym:$dst)>; +def : Pat<(PPCnop), + (NOP)>; + +// Atomic operations +let usesCustomInserter = 1 in { + let Defs = [CR0] in { + def ATOMIC_LOAD_ADD_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>; + def ATOMIC_LOAD_SUB_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>; + def ATOMIC_LOAD_OR_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>; + def ATOMIC_LOAD_XOR_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>; + def ATOMIC_LOAD_AND_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>; + def ATOMIC_LOAD_NAND_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>; + + def ATOMIC_CMP_SWAP_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "", + [(set G8RC:$dst, + (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>; + + def ATOMIC_SWAP_I64 : Pseudo< + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "", + [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>; + } +} + +// Instructions to support atomic operations +def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr), + "ldarx $rD, $ptr", LdStLDARX, + [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>; + +let Defs = [CR0] in +def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst), + "stdcx. $rS, $dst", LdStSTDCX, + [(PPCstcx G8RC:$rS, xoaddr:$dst)]>, + isDOT; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNdi8 :Pseudo< (outs), + (ins calltarget:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNd8 $dst $offset", + []>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops), + "#TC_RETURNa8 $func $offset", + [(PPCtc_return (i64 imm:$func), imm:$offset)]>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNr8 $dst $offset", + []>; + + +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, + isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in { + let isReturn = 1 in { + def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In64BitMode]>; + } + + def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In64BitMode]>; +} + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst), + "b $dst", BrB, + []>; + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst), + "ba $dst", BrB, + []>; + +def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), + (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), + (TCRETURNdi8 texternalsym:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), + (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; + + +//===----------------------------------------------------------------------===// +// 64-bit SPR manipulation instrs. + +let Uses = [CTR8] in { +def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins), + "mfctr $rT", SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in { +def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS), + "mtctr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +let Defs = [X1], Uses = [X1] in +def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"", + [(set G8RC:$result, + (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>; + +let Defs = [LR8] in { +def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS), + "mtlr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Uses = [LR8] in { +def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins), + "mflr $rT", SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +//===----------------------------------------------------------------------===// +// Fixed point instructions. +// + +let PPC970_Unit = 1 in { // FXU Operations. + +// Copies, extends, truncates. +def OR4To8 : XForm_6<31, 444, (outs G8RC:$rA), (ins GPRC:$rS, GPRC:$rB), + "or $rA, $rS, $rB", IntGeneral, + []>; +def OR8To4 : XForm_6<31, 444, (outs GPRC:$rA), (ins G8RC:$rS, G8RC:$rB), + "or $rA, $rS, $rB", IntGeneral, + []>; + +def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), + "li $rD, $imm", IntGeneral, + [(set G8RC:$rD, immSExt16:$imm)]>; +def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), + "lis $rD, $imm", IntGeneral, + [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>; + +// Logical ops. +def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "nand $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>; +def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "and $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>; +def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "andc $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>; +def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "or $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>; +def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "nor $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>; +def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "orc $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>; +def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "eqv $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>; +def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), + "xor $rA, $rS, $rB", IntGeneral, + [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>; + +// Logical ops with immediate. +def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "andi. $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>, + isDOT; +def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "andis. $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>, + isDOT; +def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "ori $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>; +def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "oris $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>; +def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "xori $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>; +def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), + "xoris $dst, $src1, $src2", IntGeneral, + [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>; + +def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "add $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>; + +let Defs = [CARRY] in { +def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "addc $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>, + PPC970_DGroup_Cracked; +def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), + "addic $rD, $rA, $imm", IntGeneral, + [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>; +} +def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), + "addi $rD, $rA, $imm", IntGeneral, + [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; +def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm), + "addis $rD, $rA, $imm", IntGeneral, + [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>; + +let Defs = [CARRY] in { +def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), + "subfic $rD, $rA, $imm", IntGeneral, + [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>; +def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "subfc $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>, + PPC970_DGroup_Cracked; +} +def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "subf $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>; +def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA), + "neg $rT, $rA", IntGeneral, + [(set G8RC:$rT, (ineg G8RC:$rA))]>; +let Uses = [CARRY], Defs = [CARRY] in { +def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "adde $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>; +def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA), + "addme $rT, $rA", IntGeneral, + [(set G8RC:$rT, (adde G8RC:$rA, -1))]>; +def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA), + "addze $rT, $rA", IntGeneral, + [(set G8RC:$rT, (adde G8RC:$rA, 0))]>; +def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "subfe $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>; +def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA), + "subfme $rT, $rA", IntGeneral, + [(set G8RC:$rT, (sube -1, G8RC:$rA))]>; +def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA), + "subfze $rT, $rA", IntGeneral, + [(set G8RC:$rT, (sube 0, G8RC:$rA))]>; +} + + +def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "mulhd $rT, $rA, $rB", IntMulHW, + [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>; +def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "mulhdu $rT, $rA, $rB", IntMulHWU, + [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>; + +def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB), + "cmpd $crD, $rA, $rB", IntCompare>, isPPC64; +def CMPLD : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB), + "cmpld $crD, $rA, $rB", IntCompare>, isPPC64; +def CMPDI : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm), + "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64; +def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2), + "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64; + +def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), + "sld $rA, $rS, $rB", IntRotateD, + [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64; +def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), + "srd $rA, $rS, $rB", IntRotateD, + [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64; +let Defs = [CARRY] in { +def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), + "srad $rA, $rS, $rB", IntRotateD, + [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64; +} + +def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS), + "extsb $rA, $rS", IntGeneral, + [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>; +def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS), + "extsh $rA, $rS", IntGeneral, + [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>; + +def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS), + "extsw $rA, $rS", IntGeneral, + [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64; +/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers. +def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS), + "extsw $rA, $rS", IntGeneral, + [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64; +def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), + "extsw $rA, $rS", IntGeneral, + [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64; + +let Defs = [CARRY] in { +def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), + "sradi $rA, $rS, $SH", IntRotateD, + [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64; +} +def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), + "cntlzd $rA, $rS", IntGeneral, + [(set G8RC:$rA, (ctlz G8RC:$rS))]>; + +def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "divd $rT, $rA, $rB", IntDivD, + [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "divdu $rT, $rA, $rB", IntDivD, + [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "mulld $rT, $rA, $rB", IntMulHD, + [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64; + + +let isCommutable = 1 in { +def RLDIMI : MDForm_1<30, 3, + (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB), + "rldimi $rA, $rS, $SH, $MB", IntRotateD, + []>, isPPC64, RegConstraint<"$rSi = $rA">, + NoEncode<"$rSi">; +} + +// Rotate instructions. +def RLDCL : MDForm_1<30, 0, + (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB), + "rldcl $rA, $rS, $rB, $MB", IntRotateD, + []>, isPPC64; +def RLDICL : MDForm_1<30, 0, + (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB), + "rldicl $rA, $rS, $SH, $MB", IntRotateD, + []>, isPPC64; +def RLDICR : MDForm_1<30, 1, + (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), + "rldicr $rA, $rS, $SH, $ME", IntRotateD, + []>, isPPC64; +} // End FXU Operations. + + +//===----------------------------------------------------------------------===// +// Load/Store instructions. +// + + +// Sign extending loads. +let canFoldAsLoad = 1, PPC970_Unit = 2 in { +def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src), + "lha $rD, $src", LdStLHA, + [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>, + PPC970_DGroup_Cracked; +def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src), + "lwa $rD, $src", LdStLWA, + [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64, + PPC970_DGroup_Cracked; +def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src), + "lhax $rD, $src", LdStLHA, + [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>, + PPC970_DGroup_Cracked; +def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), + "lwax $rD, $src", LdStLHA, + [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, + PPC970_DGroup_Cracked; + +// Update forms. +let mayLoad = 1 in +def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, + ptr_rc:$rA), + "lhau $rD, $disp($rA)", LdStGeneral, + []>, RegConstraint<"$rA = $ea_result">, + NoEncode<"$ea_result">; +// NO LWAU! + +} + +// Zero extending loads. +let canFoldAsLoad = 1, PPC970_Unit = 2 in { +def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src), + "lbz $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>; +def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src), + "lhz $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>; +def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src), + "lwz $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; + +def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src), + "lbzx $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>; +def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src), + "lhzx $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>; +def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), + "lwzx $rD, $src", LdStGeneral, + [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>; + + +// Update forms. +let mayLoad = 1 in { +def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lbzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lhzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lwzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +} +} + + +// Full 8-byte loads. +let canFoldAsLoad = 1, PPC970_Unit = 2 in { +def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), + "ld $rD, $src", LdStLD, + [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; +def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "", + [(set G8RC:$rD, + (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; + +let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. +def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg), + "ld 2, 8($reg)", LdStLD, + [(PPCload_toc G8RC:$reg)]>, isPPC64; + +let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. +def LDtoc_restore : DSForm_1<58, 0, (outs), (ins), + "ld 2, 40(1)", LdStLD, + [(PPCtoc_restore)]>, isPPC64; +def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), + "ldx $rD, $src", LdStLD, + [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; + +let mayLoad = 1 in +def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), + "ldu $rD, $addr", LdStLD, + []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, + NoEncode<"$ea_result">; + +} + +def : Pat<(PPCload ixaddr:$src), + (LD ixaddr:$src)>; +def : Pat<(PPCload xaddr:$src), + (LDX xaddr:$src)>; + +let PPC970_Unit = 2 in { +// Truncating stores. +def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src), + "stb $rS, $src", LdStGeneral, + [(truncstorei8 G8RC:$rS, iaddr:$src)]>; +def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src), + "sth $rS, $src", LdStGeneral, + [(truncstorei16 G8RC:$rS, iaddr:$src)]>; +def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src), + "stw $rS, $src", LdStGeneral, + [(truncstorei32 G8RC:$rS, iaddr:$src)]>; +def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst), + "stbx $rS, $dst", LdStGeneral, + [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst), + "sthx $rS, $dst", LdStGeneral, + [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), + "stwx $rS, $dst", LdStGeneral, + [(truncstorei32 G8RC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +// Normal 8-byte stores. +def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst), + "std $rS, $dst", LdStSTD, + [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64; +def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), + "stdx $rS, $dst", LdStSTD, + [(store G8RC:$rS, xaddr:$dst)]>, isPPC64, + PPC970_DGroup_Cracked; +} + +let PPC970_Unit = 2 in { + +def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, + (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, + (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; + +def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, + s16immX4:$ptroff, ptr_rc:$ptrreg), + "stdu $rS, $ptroff($ptrreg)", LdStSTD, + [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; + +let mayStore = 1 in +def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst), + "stdux $rS, $dst", LdStSTD, + []>, isPPC64; + +// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. +def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), + "std $rT, $dst", LdStSTD, + [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64; +def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst), + "stdx $rT, $dst", LdStSTD, + [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64, + PPC970_DGroup_Cracked; +} + + + +//===----------------------------------------------------------------------===// +// Floating point instructions. +// + + +let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations. +def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB), + "fcfid $frD, $frB", FPGeneral, + [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64; +def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB), + "fctidz $frD, $frB", FPGeneral, + [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64; +} + + +//===----------------------------------------------------------------------===// +// Instruction Patterns +// + +// Extensions and truncates to/from 32-bit regs. +def : Pat<(i64 (zext GPRC:$in)), + (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>; +def : Pat<(i64 (anyext GPRC:$in)), + (OR4To8 GPRC:$in, GPRC:$in)>; +def : Pat<(i32 (trunc G8RC:$in)), + (OR8To4 G8RC:$in, G8RC:$in)>; + +// Extending loads with i64 targets. +def : Pat<(zextloadi1 iaddr:$src), + (LBZ8 iaddr:$src)>; +def : Pat<(zextloadi1 xaddr:$src), + (LBZX8 xaddr:$src)>; +def : Pat<(extloadi1 iaddr:$src), + (LBZ8 iaddr:$src)>; +def : Pat<(extloadi1 xaddr:$src), + (LBZX8 xaddr:$src)>; +def : Pat<(extloadi8 iaddr:$src), + (LBZ8 iaddr:$src)>; +def : Pat<(extloadi8 xaddr:$src), + (LBZX8 xaddr:$src)>; +def : Pat<(extloadi16 iaddr:$src), + (LHZ8 iaddr:$src)>; +def : Pat<(extloadi16 xaddr:$src), + (LHZX8 xaddr:$src)>; +def : Pat<(extloadi32 iaddr:$src), + (LWZ8 iaddr:$src)>; +def : Pat<(extloadi32 xaddr:$src), + (LWZX8 xaddr:$src)>; + +// Standard shifts. These are represented separately from the real shifts above +// so that we can distinguish between shifts that allow 6-bit and 7-bit shift +// amounts. +def : Pat<(sra G8RC:$rS, GPRC:$rB), + (SRAD G8RC:$rS, GPRC:$rB)>; +def : Pat<(srl G8RC:$rS, GPRC:$rB), + (SRD G8RC:$rS, GPRC:$rB)>; +def : Pat<(shl G8RC:$rS, GPRC:$rB), + (SLD G8RC:$rS, GPRC:$rB)>; + +// SHL/SRL +def : Pat<(shl G8RC:$in, (i32 imm:$imm)), + (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>; +def : Pat<(srl G8RC:$in, (i32 imm:$imm)), + (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>; + +// ROTL +def : Pat<(rotl G8RC:$in, GPRC:$sh), + (RLDCL G8RC:$in, GPRC:$sh, 0)>; +def : Pat<(rotl G8RC:$in, (i32 imm:$imm)), + (RLDICL G8RC:$in, imm:$imm, 0)>; + +// Hi and Lo for Darwin Global Addresses. +def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>; +def : Pat<(PPClo tglobaladdr:$in, 0), (LI8 tglobaladdr:$in)>; +def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>; +def : Pat<(PPClo tconstpool:$in , 0), (LI8 tconstpool:$in)>; +def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>; +def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>; +def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>; +def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>; +def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS8 G8RC:$in, tglobaladdr:$g)>; +def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)), + (ADDIS8 G8RC:$in, tconstpool:$g)>; +def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)), + (ADDIS8 G8RC:$in, tjumptable:$g)>; +def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)), + (ADDIS8 G8RC:$in, tblockaddress:$g)>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td new file mode 100644 index 0000000..256370f --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -0,0 +1,695 @@ +//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Altivec extension to the PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Altivec transformation functions and pattern fragments. +// + +// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be +// of that type. +def vnot_ppc : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>; + +def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false); +}]>; +def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false); +}]>; +def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true); +}]>; +def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true); +}]>; + + +def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false); +}]>; +def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false); +}]>; +def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false); +}]>; +def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false); +}]>; +def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false); +}]>; +def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false); +}]>; + + +def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true); +}]>; +def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true); +}]>; +def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true); +}]>; +def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true); +}]>; +def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true); +}]>; +def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true); +}]>; + + +def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::isVSLDOIShuffleMask(N, false)); +}]>; +def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVSLDOIShuffleMask(N, false) != -1; +}], VSLDOI_get_imm>; + + +/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into +/// vector_shuffle(X,undef,mask) by the dag combiner. +def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::isVSLDOIShuffleMask(N, true)); +}]>; +def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVSLDOIShuffleMask(N, true) != -1; +}], VSLDOI_unary_get_imm>; + + +// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. +def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::getVSPLTImmediate(N, 1)); +}]>; +def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1); +}], VSPLTB_get_imm>; +def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::getVSPLTImmediate(N, 2)); +}]>; +def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2); +}], VSPLTH_get_imm>; +def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::getVSPLTImmediate(N, 4)); +}]>; +def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4); +}], VSPLTW_get_imm>; + + +// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm. +def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{ + return PPC::get_VSPLTI_elt(N, 1, *CurDAG); +}]>; +def vecspltisb : PatLeaf<(build_vector), [{ + return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0; +}], VSPLTISB_get_imm>; + +// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm. +def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{ + return PPC::get_VSPLTI_elt(N, 2, *CurDAG); +}]>; +def vecspltish : PatLeaf<(build_vector), [{ + return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0; +}], VSPLTISH_get_imm>; + +// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm. +def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{ + return PPC::get_VSPLTI_elt(N, 4, *CurDAG); +}]>; +def vecspltisw : PatLeaf<(build_vector), [{ + return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0; +}], VSPLTISW_get_imm>; + +def V_immneg0 : PatLeaf<(build_vector), [{ + return PPC::isAllNegativeZeroVector(N); +}]>; + +//===----------------------------------------------------------------------===// +// Helpers for defining instructions that directly correspond to intrinsics. + +// VA1a_Int - A VAForm_1a intrinsic definition. +class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID> + : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>; + +// VX1_Int - A VXForm_1 intrinsic definition. +class VX1_Int<bits<11> xo, string opc, Intrinsic IntID> + : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + !strconcat(opc, " $vD, $vA, $vB"), VecFP, + [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>; + +// VX2_Int - A VXForm_2 intrinsic definition. +class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> + : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB), + !strconcat(opc, " $vD, $vB"), VecFP, + [(set VRRC:$vD, (IntID VRRC:$vB))]>; + +//===----------------------------------------------------------------------===// +// Instruction Definitions. + +def DSS : DSS_Form<822, (outs), + (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), + "dss $STRM", LdStGeneral /*FIXME*/, []>; +def DSSALL : DSS_Form<822, (outs), + (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), + "dssall", LdStGeneral /*FIXME*/, []>; +def DST : DSS_Form<342, (outs), + (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTT : DSS_Form<342, (outs), + (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTST : DSS_Form<374, (outs), + (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTSTT : DSS_Form<374, (outs), + (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + +def DST64 : DSS_Form<342, (outs), + (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTT64 : DSS_Form<342, (outs), + (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTST64 : DSS_Form<374, (outs), + (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; +def DSTSTT64 : DSS_Form<374, (outs), + (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + +def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins), + "mfvscr $vD", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; +def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB), + "mtvscr $vB", LdStGeneral, + [(int_ppc_altivec_mtvscr VRRC:$vB)]>; + +let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. +def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src), + "lvebx $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; +def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src), + "lvehx $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; +def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src), + "lvewx $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; +def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src), + "lvx $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; +def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src), + "lvxl $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; +} + +def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src), + "lvsl $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, + PPC970_Unit_LSU; +def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src), + "lvsr $vD, $src", LdStGeneral, + [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, + PPC970_Unit_LSU; + +let PPC970_Unit = 2 in { // Stores. +def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst), + "stvebx $rS, $dst", LdStGeneral, + [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>; +def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst), + "stvehx $rS, $dst", LdStGeneral, + [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>; +def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst), + "stvewx $rS, $dst", LdStGeneral, + [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>; +def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst), + "stvx $rS, $dst", LdStGeneral, + [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>; +def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst), + "stvxl $rS, $dst", LdStGeneral, + [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>; +} + +let PPC970_Unit = 5 in { // VALU Operations. +// VA-Form instructions. 3-input AltiVec ops. +def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), + "vmaddfp $vD, $vA, $vC, $vB", VecFP, + [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC), + VRRC:$vB))]>, + Requires<[FPContractions]>; +def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), + "vnmsubfp $vD, $vA, $vC, $vB", VecFP, + [(set VRRC:$vD, (fsub V_immneg0, + (fsub (fmul VRRC:$vA, VRRC:$vC), + VRRC:$vB)))]>, + Requires<[FPContractions]>; + +def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>; +def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>; +def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>; +def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>; +def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>; + +// Shuffles. +def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), + "vsldoi $vD, $vA, $vB, $SH", VecFP, + [(set VRRC:$vD, + (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>; + +// VX-Form instructions. AltiVec arithmetic ops. +def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vaddfp $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>; + +def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vaddubm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>; +def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vadduhm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>; +def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vadduwm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>; + +def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>; +def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>; +def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>; +def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>; +def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>; +def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>; +def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>; + + +def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vand $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>; +def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vandc $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (and (v4i32 VRRC:$vA), + (vnot_ppc VRRC:$vB)))]>; + +def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vcfsx $vD, $vB, $UIMM", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>; +def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vcfux $vD, $vB, $UIMM", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>; +def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vctsxs $vD, $vB, $UIMM", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>; +def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vctuxs $vD, $vB, $UIMM", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>; +def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>; +def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>; + +def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>; +def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>; +def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>; +def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>; +def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>; +def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>; + +def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>; +def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>; +def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>; +def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>; +def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>; +def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>; +def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>; +def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>; +def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>; +def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>; +def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>; +def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>; +def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>; +def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>; + +def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrghb $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>; +def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrghh $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>; +def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrghw $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>; +def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrglb $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>; +def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrglh $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>; +def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vmrglw $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>; + +def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>; +def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>; +def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>; +def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>; +def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>; +def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>; + +def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>; +def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>; +def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>; +def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>; +def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>; +def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>; +def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>; +def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>; + +def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>; +def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>; +def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>; +def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>; +def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>; +def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; + +def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>; + +def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vsubfp $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>; +def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vsububm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>; +def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vsubuhm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>; +def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vsubuwm $vD, $vA, $vB", VecGeneral, + [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>; + +def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>; +def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>; +def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>; +def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>; +def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>; +def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>; +def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>; +def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>; +def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>; +def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>; +def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>; + +def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vnor $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA), + VRRC:$vB)))]>; +def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vor $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>; +def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vxor $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>; + +def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>; +def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>; +def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>; + +def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >; +def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>; +def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>; +def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>; +def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>; + +def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vspltb $vD, $vB, $UIMM", VecPerm, + [(set VRRC:$vD, + (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; +def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vsplth $vD, $vB, $UIMM", VecPerm, + [(set VRRC:$vD, + (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; +def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), + "vspltw $vD, $vB, $UIMM", VecPerm, + [(set VRRC:$vD, + (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; + +def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>; +def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>; +def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>; +def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>; +def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>; +def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>; +def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>; +def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>; + + +def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM), + "vspltisb $vD, $SIMM", VecPerm, + [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>; +def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM), + "vspltish $vD, $SIMM", VecPerm, + [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>; +def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM), + "vspltisw $vD, $SIMM", VecPerm, + [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>; + +// Vector Pack. +def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>; +def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>; +def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>; +def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>; +def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>; +def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vpkuhum $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, + (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; +def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>; +def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + "vpkuwum $vD, $vA, $vB", VecFP, + [(set VRRC:$vD, + (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; +def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>; + +// Vector Unpack. +def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>; +def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>; +def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>; +def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>; +def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>; +def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>; + + +// Altivec Comparisons. + +class VCMP<bits<10> xo, string asmstr, ValueType Ty> + : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare, + [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>; +class VCMPo<bits<10> xo, string asmstr, ValueType Ty> + : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare, + [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> { + let Defs = [CR6]; + let RC = 1; +} + +// f32 element comparisons.0 +def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>; +def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>; +def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>; +def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>; +def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>; +def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>; +def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>; +def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>; + +// i8 element comparisons. +def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>; +def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>; +def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>; +def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>; +def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>; +def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>; + +// i16 element comparisons. +def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>; +def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>; +def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>; +def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>; +def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>; +def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>; + +// i32 element comparisons. +def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>; +def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>; +def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>; +def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; +def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; +def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; + +def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins), + "vxor $vD, $vD, $vD", VecFP, + [(set VRRC:$vD, (v4i32 immAllZerosV))]>; +} + +//===----------------------------------------------------------------------===// +// Additional Altivec Patterns +// + +// DS* intrinsics +def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>; +def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>; + +// * 32-bit +def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM), + (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM), + (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM), + (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM), + (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>; + +// * 64-bit +def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM), + (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM), + (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM), + (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM), + (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; + +// Loads. +def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; + +// Stores. +def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst), + (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>; + +// Bit conversions. +def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>; + +def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>; + +def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>; + +def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>; + +// Shuffles. + +// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) +def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef), + (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>; +def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef), + (VPKUWUM VRRC:$vA, VRRC:$vA)>; +def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef), + (VPKUHUM VRRC:$vA, VRRC:$vA)>; + +// Match vmrg*(x,x) +def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGLB VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGLH VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGLW VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGHB VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGHH VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef), + (VMRGHW VRRC:$vA, VRRC:$vA)>; + +// Logical Operations +def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>; + +def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))), + (VNOR VRRC:$A, VRRC:$B)>; +def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))), + (VANDC VRRC:$A, VRRC:$B)>; + +def : Pat<(fmul VRRC:$vA, VRRC:$vB), + (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>; + +// Fused multiply add and multiply sub for packed float. These are represented +// separately from the real instructions above, for operations that must have +// the additional precision, such as Newton-Rhapson (used by divide, sqrt) +def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C), + (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>; +def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C), + (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>; + +def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C), + (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>; +def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C), + (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>; + +def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC), + (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>; + +// Vector shifts +def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), + (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>; +def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), + (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>; +def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), + (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>; + +def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), + (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>; +def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), + (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>; +def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), + (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>; + +def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), + (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>; +def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), + (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>; +def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), + (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h new file mode 100644 index 0000000..b424d11 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h @@ -0,0 +1,43 @@ +//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to simplify generating frame and constant pool +// references. +// +// For reference, the order of operands for memory references is: +// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate +// Displacement. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC_INSTRBUILDER_H +#define POWERPC_INSTRBUILDER_H + +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace llvm { + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +static inline const MachineInstrBuilder& +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, + bool mem = true) { + if (mem) + return MIB.addImm(Offset).addFrameIndex(FI); + else + return MIB.addFrameIndex(FI).addImm(Offset); +} + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td new file mode 100644 index 0000000..84a15b1 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -0,0 +1,907 @@ +//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// PowerPC instruction formats + +class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> + : Instruction { + field bits<32> Inst; + + bit PPC64 = 0; // Default value, override with isPPC64 + + let Namespace = "PPC"; + let Inst{0-5} = opcode; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; + + bits<1> PPC970_First = 0; + bits<1> PPC970_Single = 0; + bits<1> PPC970_Cracked = 0; + bits<3> PPC970_Unit = 0; + + /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + let TSFlags{0} = PPC970_First; + let TSFlags{1} = PPC970_Single; + let TSFlags{2} = PPC970_Cracked; + let TSFlags{5-3} = PPC970_Unit; +} + +class PPC970_DGroup_First { bits<1> PPC970_First = 1; } +class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; } +class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; } +class PPC970_MicroCode; + +class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; } +class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; } +class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; } +class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; } +class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; } +class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; } +class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } +class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } + + +// 1.7.1 I-Form +class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + let Pattern = pattern; + bits<24> LI; + + let Inst{6-29} = LI; + let Inst{30} = aa; + let Inst{31} = lk; +} + +// 1.7.2 B-Form +class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, BrB> { + bits<7> BIBO; // 2 bits of BI and 5 bits of BO. + bits<3> CR; + bits<14> BD; + + bits<5> BI; + let BI{0-1} = BIBO{5-6}; + let BI{2-4} = CR{0-2}; + + let Inst{6-10} = BIBO{4-0}; + let Inst{11-15} = BI; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + + +// 1.7.4 D-Form +class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<5> B; + bits<16> C; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = B; + let Inst{16-31} = C; +} + +class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<21> Addr; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = Addr{20-16}; // Base Reg + let Inst{16-31} = Addr{15-0}; // Displacement +} + +class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<16> C; + bits<5> B; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = B; + let Inst{16-31} = C; +} + + +class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>; + +class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<16> B; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = 0; + let Inst{16-31} = B; +} + +class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> B; + bits<5> A; + bits<16> C; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = B; + let Inst{16-31} = C; +} + +class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> { + let A = 0; + let Addr = 0; +} + +class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<1> L; + bits<5> RA; + bits<16> I; + + let Inst{6-8} = BF; + let Inst{9} = 0; + let Inst{10} = L; + let Inst{11-15} = RA; + let Inst{16-31} = I; +} + +class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : DForm_5<opcode, OOL, IOL, asmstr, itin> { + let L = PPC64; +} + +class DForm_6<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : DForm_5<opcode, OOL, IOL, asmstr, itin>; + +class DForm_6_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : DForm_6<opcode, OOL, IOL, asmstr, itin> { + let L = PPC64; +} + + +// 1.7.5 DS-Form +class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RST; + bits<19> DS_RA; + + let Pattern = pattern; + + let Inst{6-10} = RST; + let Inst{11-15} = DS_RA{18-14}; // Register # + let Inst{16-29} = DS_RA{13-0}; // Displacement. + let Inst{30-31} = xo; +} + +class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RST; + bits<14> DS; + bits<5> RA; + + let Pattern = pattern; + + let Inst{6-10} = RST; + let Inst{11-15} = RA; + let Inst{16-29} = DS; + let Inst{30-31} = xo; +} + +// 1.7.6 X-Form +class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RST; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RST; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +// This is the same as XForm_base_r3xo, but the first two operands are swapped +// when code is emitted. +class XForm_base_r3xo_swapped + <bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<5> RST; + bits<5> B; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RST; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + + +class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>; + +class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> { + let Pattern = pattern; +} + +class XForm_8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>; + +class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> { + let Pattern = pattern; +} + +class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> { + let B = 0; + let Pattern = pattern; +} + +class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<1> L; + bits<5> RA; + bits<5> RB; + + let Inst{6-8} = BF; + let Inst{9} = 0; + let Inst{10} = L; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> { + let L = PPC64; +} + +class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<5> FRA; + bits<5> FRB; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + let Pattern = pattern; + let Inst{6-10} = 31; + let Inst{11-15} = 0; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + let Pattern = pattern; + let Inst{6-10} = 0; + let Inst{11-15} = 0; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { +} + +class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let A = 0; +} + +class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { +} + +// This is used for MFFS, MTFSB0, MTFSB1. 42 is arbitrary; this series of +// numbers presumably relates to some document, but I haven't found it. +class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RST; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} +class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let Pattern = pattern; + bits<5> FM; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FM; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +// DCB_Form - Form X instruction, used for dcb* instructions. +class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<31, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + let Inst{6-10} = immfield; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + + +// DSS_Form - Form X instruction, used for altivec dss* instructions. +class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<31, OOL, IOL, asmstr, itin> { + bits<1> T; + bits<2> STRM; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + let Inst{6} = T; + let Inst{7-8} = 0; + let Inst{9-10} = STRM; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +// 1.7.7 XL-Form +class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> CRD; + bits<5> CRA; + bits<5> CRB; + + let Pattern = pattern; + + let Inst{6-10} = CRD; + let Inst{11-15} = CRA; + let Inst{16-20} = CRB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> CRD; + + let Pattern = pattern; + + let Inst{6-10} = CRD; + let Inst{11-15} = CRD; + let Inst{16-20} = CRD; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> BO; + bits<5> BI; + bits<2> BH; + + let Pattern = pattern; + + let Inst{6-10} = BO; + let Inst{11-15} = BI; + let Inst{16-18} = 0; + let Inst{19-20} = BH; + let Inst{21-30} = xo; + let Inst{31} = lk; +} + +class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> + : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> { + bits<7> BIBO; // 2 bits of BI and 5 bits of BO. + bits<3> CR; + + let BO = BIBO{2-6}; + let BI{0-1} = BIBO{0-1}; + let BI{2-4} = CR; + let BH = 0; +} + + +class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> + : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> { + let BO = bo; + let BI = bi; + let BH = 0; +} + +class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<3> BFA; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-13} = BFA; + let Inst{14-15} = 0; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +// 1.7.8 XFX-Form +class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<10> SPR; + + let Inst{6-10} = RT; + let Inst{11} = SPR{4}; + let Inst{12} = SPR{3}; + let Inst{13} = SPR{2}; + let Inst{14} = SPR{1}; + let Inst{15} = SPR{0}; + let Inst{16} = SPR{9}; + let Inst{17} = SPR{8}; + let Inst{18} = SPR{7}; + let Inst{19} = SPR{6}; + let Inst{20} = SPR{5}; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr, + dag OOL, dag IOL, string asmstr, InstrItinClass itin> + : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin> { + let SPR = spr; +} + +class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + + let Inst{6-10} = RT; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<8> FXM; + bits<5> ST; + + let Inst{6-10} = ST; + let Inst{11} = 0; + let Inst{12-19} = FXM; + let Inst{20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> ST; + bits<8> FXM; + + let Inst{6-10} = ST; + let Inst{11} = 1; + let Inst{12-19} = FXM; + let Inst{20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>; + +class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr, + dag OOL, dag IOL, string asmstr, InstrItinClass itin> + : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> { + let SPR = spr; +} + +// XFL-Form - MTFSF +// This is probably 1.7.9, but I don't have the reference that uses this +// numbering scheme... +class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + string cstr, InstrItinClass itin, list<dag>pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<8> FM; + bits<5> RT; + + bit RC = 0; // set by isDOT + let Pattern = pattern; + let Constraints = cstr; + + let Inst{6} = 0; + let Inst{7-14} = FM; + let Inst{15} = 0; + let Inst{16-20} = RT; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +// 1.7.10 XS-Form - SRADI. +class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<5> RS; + bits<6> SH; + + bit RC = 0; // set by isDOT + let Pattern = pattern; + + let Inst{6-10} = RS; + let Inst{11-15} = A; + let Inst{16-20} = SH{4,3,2,1,0}; + let Inst{21-29} = xo; + let Inst{30} = SH{5}; + let Inst{31} = RC; +} + +// 1.7.11 XO-Form +class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<5> RB; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21} = oe; + let Inst{22-30} = xo; + let Inst{31} = RC; +} + +class XOForm_3<bits<6> opcode, bits<9> xo, bit oe, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> + : XOForm_1<opcode, xo, oe, OOL, IOL, asmstr, itin, pattern> { + let RB = 0; +} + +// 1.7.12 A-Form +class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> FRT; + bits<5> FRA; + bits<5> FRC; + bits<5> FRB; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-25} = FRC; + let Inst{26-30} = xo; + let Inst{31} = RC; +} + +class AForm_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let FRC = 0; +} + +class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let FRB = 0; +} + +// 1.7.13 M-Form +class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RA; + bits<5> RS; + bits<5> RB; + bits<5> MB; + bits<5> ME; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RS; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-25} = MB; + let Inst{26-30} = ME; + let Inst{31} = RC; +} + +class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> { +} + +// 1.7.14 MD-Form +class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RA; + bits<5> RS; + bits<6> SH; + bits<6> MBE; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RS; + let Inst{11-15} = RA; + let Inst{16-20} = SH{4,3,2,1,0}; + let Inst{21-26} = MBE{4,3,2,1,0,5}; + let Inst{27-29} = xo; + let Inst{30} = SH{5}; + let Inst{31} = RC; +} + + + +// E-1 VA-Form + +// VAForm_1 - DACB ordering. +class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VC; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-25} = VC; + let Inst{26-31} = xo; +} + +// VAForm_1a - DABC ordering. +class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + bits<5> VC; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-25} = VC; + let Inst{26-31} = xo; +} + +class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + bits<4> SH; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21} = 0; + let Inst{22-25} = SH; + let Inst{26-31} = xo; +} + +// E-2 VX-Form +class VXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +class VXForm_setzero<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : VXForm_1<xo, OOL, IOL, asmstr, itin, pattern> { + let VA = VD; + let VB = VD; +} + + +class VXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = 0; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +class VXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> IMM; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = IMM; + let Inst{16-20} = 0; + let Inst{21-31} = xo; +} + +/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr. +class VXForm_4<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = 0; + let Inst{16-20} = 0; + let Inst{21-31} = xo; +} + +/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr. +class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = 0; + let Inst{11-15} = 0; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +// E-4 VXR-Form +class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + bit RC = 0; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21} = RC; + let Inst{22-31} = xo; +} + +//===----------------------------------------------------------------------===// +class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern> + : I<0, OOL, IOL, asmstr, NoItinerary> { + let PPC64 = 0; + let Pattern = pattern; + let Inst{31-0} = 0; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp new file mode 100644 index 0000000..2bc109c --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -0,0 +1,655 @@ +//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "PPCInstrInfo.h" +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCMachineFunctionInfo.h" +#include "PPCTargetMachine.h" +#include "PPCHazardRecognizers.h" +#include "MCTargetDesc/PPCPredicates.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/STLExtras.h" + +#define GET_INSTRINFO_CTOR +#include "PPCGenInstrInfo.inc" + +namespace llvm { +extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. +extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. +} + +using namespace llvm; + +PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) + : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), + TM(tm), RI(*TM.getSubtargetImpl(), *this) {} + +/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for +/// this target when scheduling the DAG. +ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( + const TargetMachine *TM, + const ScheduleDAG *DAG) const { + // Should use subtarget info to pick the right hazard recognizer. For + // now, always return a PPC970 recognizer. + const TargetInstrInfo *TII = TM->getInstrInfo(); + assert(TII && "No InstrInfo?"); + return new PPCHazardRecognizer970(*TII); +} + +unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case PPC::LD: + case PPC::LWZ: + case PPC::LFS: + case PPC::LFD: + if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && + MI->getOperand(2).isFI()) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case PPC::STD: + case PPC::STW: + case PPC::STFS: + case PPC::STFD: + if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && + MI->getOperand(2).isFI()) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +// commuteInstruction - We can commute rlwimi instructions, but only if the +// rotate amt is zero. We also have to munge the immediates a bit. +MachineInstr * +PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { + MachineFunction &MF = *MI->getParent()->getParent(); + + // Normal instructions can be commuted the obvious way. + if (MI->getOpcode() != PPC::RLWIMI) + return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + + // Cannot commute if it has a non-zero rotate count. + if (MI->getOperand(3).getImm() != 0) + return 0; + + // If we have a zero rotate count, we have: + // M = mask(MB,ME) + // Op0 = (Op1 & ~M) | (Op2 & M) + // Change this to: + // M = mask((ME+1)&31, (MB-1)&31) + // Op0 = (Op2 & ~M) | (Op1 & M) + + // Swap op1/op2 + unsigned Reg0 = MI->getOperand(0).getReg(); + unsigned Reg1 = MI->getOperand(1).getReg(); + unsigned Reg2 = MI->getOperand(2).getReg(); + bool Reg1IsKill = MI->getOperand(1).isKill(); + bool Reg2IsKill = MI->getOperand(2).isKill(); + bool ChangeReg0 = false; + // If machine instrs are no longer in two-address forms, update + // destination register as well. + if (Reg0 == Reg1) { + // Must be two address instruction! + assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) && + "Expecting a two-address instruction!"); + Reg2IsKill = false; + ChangeReg0 = true; + } + + // Masks. + unsigned MB = MI->getOperand(4).getImm(); + unsigned ME = MI->getOperand(5).getImm(); + + if (NewMI) { + // Create a new instruction. + unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg(); + bool Reg0IsDead = MI->getOperand(0).isDead(); + return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) + .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) + .addReg(Reg2, getKillRegState(Reg2IsKill)) + .addReg(Reg1, getKillRegState(Reg1IsKill)) + .addImm((ME+1) & 31) + .addImm((MB-1) & 31); + } + + if (ChangeReg0) + MI->getOperand(0).setReg(Reg2); + MI->getOperand(2).setReg(Reg1); + MI->getOperand(1).setReg(Reg2); + MI->getOperand(2).setIsKill(Reg1IsKill); + MI->getOperand(1).setIsKill(Reg2IsKill); + + // Swap the mask around. + MI->getOperand(4).setImm((ME+1) & 31); + MI->getOperand(5).setImm((MB-1) & 31); + return MI; +} + +void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + DebugLoc DL; + BuildMI(MBB, MI, DL, get(PPC::NOP)); +} + + +// Branch analysis. +bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastInst->getOpcode() == PPC::B) { + if (!LastInst->getOperand(0).isMBB()) + return true; + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (LastInst->getOpcode() == PPC::BCC) { + if (!LastInst->getOperand(2).isMBB()) + return true; + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(2).getMBB(); + Cond.push_back(LastInst->getOperand(0)); + Cond.push_back(LastInst->getOperand(1)); + return false; + } + // Otherwise, don't know what this is. + return true; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && + isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with PPC::B and PPC:BCC, handle it. + if (SecondLastInst->getOpcode() == PPC::BCC && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(2).isMBB() || + !LastInst->getOperand(0).isMBB()) + return true; + TBB = SecondLastInst->getOperand(2).getMBB(); + Cond.push_back(SecondLastInst->getOperand(0)); + Cond.push_back(SecondLastInst->getOperand(1)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two PPC:Bs, handle it. The second one is not + // executed, so remove it. + if (SecondLastInst->getOpcode() == PPC::B && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(0).isMBB()) + return true; + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } + if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (I->getOpcode() != PPC::BCC) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +unsigned +PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "PPC branch conditions have two components!"); + + // One-way branch. + if (FBB == 0) { + if (Cond.empty()) // Unconditional branch + BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB); + else // Conditional branch + BuildMI(&MBB, DL, get(PPC::BCC)) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + return 1; + } + + // Two-way Conditional Branch. + BuildMI(&MBB, DL, get(PPC::BCC)) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); + return 2; +} + +void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::OR; + else if (PPC::G8RCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::OR8; + else if (PPC::F4RCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::FMR; + else if (PPC::CRRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::MCRF; + else if (PPC::VRRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::VOR; + else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::CROR; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + const MCInstrDesc &MCID = get(Opc); + if (MCID.getNumOperands() == 3) + BuildMI(MBB, I, DL, MCID, DestReg) + .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); + else + BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); +} + +bool +PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, + unsigned SrcReg, bool isKill, + int FrameIdx, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const{ + DebugLoc DL; + if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) { + if (SrcReg != PPC::LR) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + } else { + // FIXME: this spills LR immediately to memory in one step. To do this, + // we use R11, which we know cannot be used in the prolog/epilog. This is + // a hack. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + .addReg(PPC::R11, + getKillRegState(isKill)), + FrameIdx)); + } + } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) { + if (SrcReg != PPC::LR8) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + } else { + // FIXME: this spills LR immediately to memory in one step. To do this, + // we use R11, which we know cannot be used in the prolog/epilog. This is + // a hack. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) + .addReg(PPC::X11, + getKillRegState(isKill)), + FrameIdx)); + } + } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { + if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || + (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { + // FIXME (64-bit): Enable + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; + } else { + // FIXME: We need a scatch reg here. The trouble with using R0 is that + // it's possible for the stack frame to be so big the save location is + // out of range of immediate offsets, necessitating another register. + // We hack this on Darwin by reserving R2. It's probably broken on Linux + // at the moment. + + // We need to store the CR in the low 4-bits of the saved value. First, + // issue a MFCR to save all of the CRBits. + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + PPC::R2 : PPC::R0; + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg) + .addReg(SrcReg, getKillRegState(isKill))); + + // If the saved register wasn't CR0, shift the bits left so that they are + // in CR0's slot. + if (SrcReg != PPC::CR0) { + unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; + // rlwinm scratch, scratch, ShiftBits, 0, 31. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + .addReg(ScratchReg).addImm(ShiftBits) + .addImm(0).addImm(31)); + } + + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + .addReg(ScratchReg, + getKillRegState(isKill)), + FrameIdx)); + } + } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) { + // FIXME: We use CRi here because there is no mtcrf on a bit. Since the + // backend currently only uses CR1EQ as an individual bit, this should + // not cause any bug. If we need other uses of CR bits, the following + // code may be invalid. + unsigned Reg = 0; + if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || + SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) + Reg = PPC::CR0; + else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || + SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) + Reg = PPC::CR1; + else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || + SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) + Reg = PPC::CR2; + else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || + SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) + Reg = PPC::CR3; + else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || + SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) + Reg = PPC::CR4; + else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || + SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) + Reg = PPC::CR5; + else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || + SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) + Reg = PPC::CR6; + else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || + SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) + Reg = PPC::CR7; + + return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, + PPC::CRRCRegisterClass, NewMIs); + + } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) { + // We don't have indexed addressing for vector loads. Emit: + // R0 = ADDI FI# + // STVX VAL, 0, R0 + // + // FIXME: We use R0 here, because it isn't available for RA. + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), + FrameIdx, 0, 0)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) + .addReg(SrcReg, getKillRegState(isKill)) + .addReg(PPC::R0) + .addReg(PPC::R0)); + } else { + llvm_unreachable("Unknown regclass!"); + } + + return false; +} + +void +PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineFunction &MF = *MBB.getParent(); + SmallVector<MachineInstr*, 4> NewMIs; + + if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) { + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setSpillsCR(); + } + + for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) + MBB.insert(MI, NewMIs[i]); + + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MachineMemOperand::MOStore, + MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); + NewMIs.back()->addMemOperand(MF, MMO); +} + +void +PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs)const{ + if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) { + if (DestReg != PPC::LR) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + DestReg), FrameIdx)); + } else { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + PPC::R11), FrameIdx)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11)); + } + } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) { + if (DestReg != PPC::LR8) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), + FrameIdx)); + } else { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), + PPC::R11), FrameIdx)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11)); + } + } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), + FrameIdx)); + } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), + FrameIdx)); + } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { + // FIXME: We need a scatch reg here. The trouble with using R0 is that + // it's possible for the stack frame to be so big the save location is + // out of range of immediate offsets, necessitating another register. + // We hack this on Darwin by reserving R2. It's probably broken on Linux + // at the moment. + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + PPC::R2 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + ScratchReg), FrameIdx)); + + // If the reloaded register isn't CR0, shift the bits right so that they are + // in the right CR's slot. + if (DestReg != PPC::CR0) { + unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; + // rlwinm r11, r11, 32-ShiftBits, 0, 31. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) + .addImm(31)); + } + + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg) + .addReg(ScratchReg)); + } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) { + + unsigned Reg = 0; + if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT || + DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN) + Reg = PPC::CR0; + else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT || + DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN) + Reg = PPC::CR1; + else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT || + DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN) + Reg = PPC::CR2; + else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT || + DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN) + Reg = PPC::CR3; + else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT || + DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN) + Reg = PPC::CR4; + else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT || + DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN) + Reg = PPC::CR5; + else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT || + DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN) + Reg = PPC::CR6; + else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT || + DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN) + Reg = PPC::CR7; + + return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, + PPC::CRRCRegisterClass, NewMIs); + + } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) { + // We don't have indexed addressing for vector loads. Emit: + // R0 = ADDI FI# + // Dest = LVX 0, R0 + // + // FIXME: We use R0 here, because it isn't available for RA. + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), + FrameIdx, 0, 0)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0) + .addReg(PPC::R0)); + } else { + llvm_unreachable("Unknown regclass!"); + } +} + +void +PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineFunction &MF = *MBB.getParent(); + SmallVector<MachineInstr*, 4> NewMIs; + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs); + for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) + MBB.insert(MI, NewMIs[i]); + + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); + NewMIs.back()->addMemOperand(MF, MMO); +} + +MachineInstr* +PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE)); + addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + +bool PPCInstrInfo:: +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); + // Leave the CR# the same, but invert the condition. + Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm())); + return false; +} + +/// GetInstSize - Return the number of bytes of code the specified +/// instruction may be. This returns the maximum number of bytes. +/// +unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case PPC::INLINEASM: { // Inline Asm: Variable size. + const MachineFunction *MF = MI->getParent()->getParent(); + const char *AsmStr = MI->getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + case PPC::PROLOG_LABEL: + case PPC::EH_LABEL: + case PPC::GC_LABEL: + case PPC::DBG_VALUE: + return 0; + default: + return 4; // PowerPC instructions are all 4 bytes + } +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h new file mode 100644 index 0000000..90bacc9 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -0,0 +1,149 @@ +//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC32_INSTRUCTIONINFO_H +#define POWERPC32_INSTRUCTIONINFO_H + +#include "PPC.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "PPCRegisterInfo.h" + +#define GET_INSTRINFO_HEADER +#include "PPCGenInstrInfo.inc" + +namespace llvm { + +/// PPCII - This namespace holds all of the PowerPC target-specific +/// per-instruction flags. These must match the corresponding definitions in +/// PPC.td and PPCInstrFormats.td. +namespace PPCII { +enum { + // PPC970 Instruction Flags. These flags describe the characteristics of the + // PowerPC 970 (aka G5) dispatch groups and how they are formed out of + // raw machine instructions. + + /// PPC970_First - This instruction starts a new dispatch group, so it will + /// always be the first one in the group. + PPC970_First = 0x1, + + /// PPC970_Single - This instruction starts a new dispatch group and + /// terminates it, so it will be the sole instruction in the group. + PPC970_Single = 0x2, + + /// PPC970_Cracked - This instruction is cracked into two pieces, requiring + /// two dispatch pipes to be available to issue. + PPC970_Cracked = 0x4, + + /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that + /// an instruction is issued to. + PPC970_Shift = 3, + PPC970_Mask = 0x07 << PPC970_Shift +}; +enum PPC970_Unit { + /// These are the various PPC970 execution unit pipelines. Each instruction + /// is one of these. + PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction + PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit + PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit + PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit + PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit + PPC970_VALU = 5 << PPC970_Shift, // Vector ALU + PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit + PPC970_BRU = 7 << PPC970_Shift // Branch Unit +}; +} // end namespace PPCII + + +class PPCInstrInfo : public PPCGenInstrInfo { + PPCTargetMachine &TM; + const PPCRegisterInfo RI; + + bool StoreRegToStackSlot(MachineFunction &MF, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; +public: + explicit PPCInstrInfo(PPCTargetMachine &TM); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; } + + ScheduleHazardRecognizer * + CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + // commuteInstruction - We can commute rlwimi instructions, but only if the + // rotate amt is zero. We also have to munge the immediates a bit. + virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const; + + virtual void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + + // Branch analysis. + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; + + virtual + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + + /// GetInstSize - Return the number of bytes of code the specified + /// instruction may be. This returns the maximum number of bytes. + /// + virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const; +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td new file mode 100644 index 0000000..f248b5b --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -0,0 +1,1482 @@ +//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the subset of the 32-bit PowerPC instruction set, as used +// by the PowerPC instruction selector. +// +//===----------------------------------------------------------------------===// + +include "PPCInstrFormats.td" + +//===----------------------------------------------------------------------===// +// PowerPC specific type constraints. +// +def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx + SDTCisVT<0, f64>, SDTCisPtrTy<1> +]>; +def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; +def SDT_PPCvperm : SDTypeProfile<1, 3, [ + SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2> +]>; + +def SDT_PPCvcmp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> +]>; + +def SDT_PPCcondbr : SDTypeProfile<0, 3, [ + SDTCisVT<0, i32>, SDTCisVT<2, OtherVT> +]>; + +def SDT_PPClbrx : SDTypeProfile<1, 2, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> +]>; +def SDT_PPCstbrx : SDTypeProfile<0, 3, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> +]>; + +def SDT_PPClarx : SDTypeProfile<1, 1, [ + SDTCisInt<0>, SDTCisPtrTy<1> +]>; +def SDT_PPCstcx : SDTypeProfile<0, 2, [ + SDTCisInt<0>, SDTCisPtrTy<1> +]>; + +def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ + SDTCisPtrTy<0>, SDTCisVT<1, i32> +]>; + +def SDT_PPCnop : SDTypeProfile<0, 0, []>; + +//===----------------------------------------------------------------------===// +// PowerPC specific DAG Nodes. +// + +def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>; +def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; +def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; +def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, + [SDNPHasChain, SDNPMayStore]>; + +// This sequence is used for long double->int conversions. It changes the +// bits in the FPSCR which is not modelled. +def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, + [SDNPOutGlue]>; +def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPInGlue, SDNPOutGlue]>; +def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPInGlue, SDNPOutGlue]>; +def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, + [SDNPInGlue, SDNPOutGlue]>; +def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, + [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>, + SDTCisVT<3, f64>]>, + [SDNPInGlue]>; + +def PPCfsel : SDNode<"PPCISD::FSEL", + // Type constraint for fsel. + SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, + SDTCisFP<0>, SDTCisVT<1, f64>]>, []>; + +def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; +def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; +def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>; +def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; +def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; + +def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; + +// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift +// amounts. These nodes are generated by the multi-precision shift code. +def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; +def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; +def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; + +def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>; +def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore, + [SDNPHasChain, SDNPMayStore]>; + +// These are target-independent nodes, but have target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; +def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; +def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; +def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; +def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + +def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + +def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; +def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; + +def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, + [SDNPHasChain, SDNPOptInGlue]>; + +def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, + [SDNPHasChain, SDNPMayLoad]>; +def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, + [SDNPHasChain, SDNPMayStore]>; + +// Instructions to support atomic operations +def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, + [SDNPHasChain, SDNPMayLoad]>; +def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx, + [SDNPHasChain, SDNPMayStore]>; + +// Instructions to support dynamic alloca. +def SDTDynOp : SDTypeProfile<1, 2, []>; +def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; + +//===----------------------------------------------------------------------===// +// PowerPC specific transformation functions and pattern fragments. +// + +def SHL32 : SDNodeXForm<imm, [{ + // Transformation function: 31 - imm + return getI32Imm(31 - N->getZExtValue()); +}]>; + +def SRL32 : SDNodeXForm<imm, [{ + // Transformation function: 32 - imm + return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0); +}]>; + +def LO16 : SDNodeXForm<imm, [{ + // Transformation function: get the low 16 bits. + return getI32Imm((unsigned short)N->getZExtValue()); +}]>; + +def HI16 : SDNodeXForm<imm, [{ + // Transformation function: shift the immediate value down into the low bits. + return getI32Imm((unsigned)N->getZExtValue() >> 16); +}]>; + +def HA16 : SDNodeXForm<imm, [{ + // Transformation function: shift the immediate value down into the low bits. + signed int Val = N->getZExtValue(); + return getI32Imm((Val - (signed short)Val) >> 16); +}]>; +def MB : SDNodeXForm<imm, [{ + // Transformation function: get the start bit of a mask + unsigned mb = 0, me; + (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me); + return getI32Imm(mb); +}]>; + +def ME : SDNodeXForm<imm, [{ + // Transformation function: get the end bit of a mask + unsigned mb, me = 0; + (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me); + return getI32Imm(me); +}]>; +def maskimm32 : PatLeaf<(imm), [{ + // maskImm predicate - True if immediate is a run of ones. + unsigned mb, me; + if (N->getValueType(0) == MVT::i32) + return isRunOfOnes((unsigned)N->getZExtValue(), mb, me); + else + return false; +}]>; + +def immSExt16 : PatLeaf<(imm), [{ + // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended + // field. Used by instructions like 'addi'. + if (N->getValueType(0) == MVT::i32) + return (int32_t)N->getZExtValue() == (short)N->getZExtValue(); + else + return (int64_t)N->getZExtValue() == (short)N->getZExtValue(); +}]>; +def immZExt16 : PatLeaf<(imm), [{ + // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended + // field. Used by instructions like 'ori'. + return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); +}], LO16>; + +// imm16Shifted* - These match immediates where the low 16-bits are zero. There +// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are +// identical in 32-bit mode, but in 64-bit mode, they return true if the +// immediate fits into a sign/zero extended 32-bit immediate (with the low bits +// clear). +def imm16ShiftedZExt : PatLeaf<(imm), [{ + // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the + // immediate are set. Used by instructions like 'xoris'. + return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0; +}], HI16>; + +def imm16ShiftedSExt : PatLeaf<(imm), [{ + // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the + // immediate are set. Used by instructions like 'addis'. Identical to + // imm16ShiftedZExt in 32-bit mode. + if (N->getZExtValue() & 0xFFFF) return false; + if (N->getValueType(0) == MVT::i32) + return true; + // For 64-bit, make sure it is sext right. + return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); +}], HI16>; + + +//===----------------------------------------------------------------------===// +// PowerPC Flag Definitions. + +class isPPC64 { bit PPC64 = 1; } +class isDOT { + list<Register> Defs = [CR0]; + bit RC = 1; +} + +class RegConstraint<string C> { + string Constraints = C; +} +class NoEncode<string E> { + string DisableEncoding = E; +} + + +//===----------------------------------------------------------------------===// +// PowerPC Operand Definitions. + +def s5imm : Operand<i32> { + let PrintMethod = "printS5ImmOperand"; +} +def u5imm : Operand<i32> { + let PrintMethod = "printU5ImmOperand"; +} +def u6imm : Operand<i32> { + let PrintMethod = "printU6ImmOperand"; +} +def s16imm : Operand<i32> { + let PrintMethod = "printS16ImmOperand"; +} +def u16imm : Operand<i32> { + let PrintMethod = "printU16ImmOperand"; +} +def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing. + let PrintMethod = "printS16X4ImmOperand"; +} +def directbrtarget : Operand<OtherVT> { + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getDirectBrEncoding"; +} +def condbrtarget : Operand<OtherVT> { + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getCondBrEncoding"; +} +def calltarget : Operand<iPTR> { + let EncoderMethod = "getDirectBrEncoding"; +} +def aaddr : Operand<iPTR> { + let PrintMethod = "printAbsAddrOperand"; +} +def symbolHi: Operand<i32> { + let PrintMethod = "printSymbolHi"; + let EncoderMethod = "getHA16Encoding"; +} +def symbolLo: Operand<i32> { + let PrintMethod = "printSymbolLo"; + let EncoderMethod = "getLO16Encoding"; +} +def crbitm: Operand<i8> { + let PrintMethod = "printcrbitm"; + let EncoderMethod = "get_crbitm_encoding"; +} +// Address operands +def memri : Operand<iPTR> { + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let EncoderMethod = "getMemRIEncoding"; +} +def memrr : Operand<iPTR> { + let PrintMethod = "printMemRegReg"; + let MIOperandInfo = (ops ptr_rc, ptr_rc); +} +def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. + let PrintMethod = "printMemRegImmShifted"; + let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let EncoderMethod = "getMemRIXEncoding"; +} +def tocentry : Operand<iPTR> { + let MIOperandInfo = (ops i32imm:$imm); +} + +// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg +// that doesn't matter. +def pred : PredicateOperand<OtherVT, (ops imm, CRRC), + (ops (i32 20), (i32 zero_reg))> { + let PrintMethod = "printPredicateOperand"; +} + +// Define PowerPC specific addressing mode. +def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; +def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; +def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>; +def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std" + +/// This is just the offset part of iaddr, used for preinc. +def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; + +//===----------------------------------------------------------------------===// +// PowerPC Instruction Predicate Definitions. +def FPContractions : Predicate<"!NoExcessFPPrecision">; +def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; +def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; + + +//===----------------------------------------------------------------------===// +// PowerPC Instruction Definitions. + +// Pseudo-instructions: + +let hasCtrlDep = 1 in { +let Defs = [R1], Uses = [R1] in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "", + [(callseq_start timm:$amt)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} + +def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS), + "UPDATE_VRSAVE $rD, $rS", []>; +} + +let Defs = [R1], Uses = [R1] in +def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "", + [(set GPRC:$result, + (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>; + +// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after +// instruction selection into a branch sequence. +let usesCustomInserter = 1, // Expanded after instruction selection. + PPC970_Single = 1 in { + def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F, + i32imm:$BROPC), "", + []>; + def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F, + i32imm:$BROPC), "", + []>; + def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F, + i32imm:$BROPC), "", + []>; + def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F, + i32imm:$BROPC), "", + []>; + def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F, + i32imm:$BROPC), "", + []>; +} + +// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to +// scavenge a register for it. +def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F), + "", []>; + +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { + let isReturn = 1, Uses = [LR, RM] in + def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p), + "b${p:cc}lr ${p:reg}", BrB, + [(retflag)]>; + let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in + def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>; +} + +let Defs = [LR] in + def MovePCtoLR : Pseudo<(outs), (ins), "", []>, + PPC970_Unit_BRU; + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { + let isBarrier = 1 in { + def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst), + "b $dst", BrB, + [(br bb:$dst)]>; + } + + // BCC represents an arbitrary conditional branch on a predicate. + // FIXME: should be able to write a pattern for PPCcondbranch, but can't use + // a two-value operand where a dag node expects two operands. :( + def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), + "b${cond:cc} ${cond:reg}, $dst" + /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; +} + +// Darwin ABI Calls. +let isCall = 1, PPC970_Unit = 7, + // All calls clobber the non-callee saved registers... + Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, + F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, + V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, + LR,CTR, + CR0,CR1,CR5,CR6,CR7,CARRY] in { + // Convenient aliases for call instructions + let Uses = [RM] in { + def BL_Darwin : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA_Darwin : IForm<18, 1, 1, + (outs), (ins aaddr:$func, variable_ops), + "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>; + } + let Uses = [CTR, RM] in { + def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>; + } +} + +// SVR4 ABI Calls. +let isCall = 1, PPC970_Unit = 7, + // All calls clobber the non-callee saved registers... + Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, + F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, + V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, + LR,CTR, + CR0,CR1,CR5,CR6,CR7,CARRY] in { + // Convenient aliases for call instructions + let Uses = [RM] in { + def BL_SVR4 : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA_SVR4 : IForm<18, 1, 1, + (outs), (ins aaddr:$func, variable_ops), + "bla $func", BrB, + [(PPCcall_SVR4 (i32 imm:$func))]>; + } + let Uses = [CTR, RM] in { + def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>; + } +} + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNdi :Pseudo< (outs), + (ins calltarget:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNd $dst $offset", + []>; + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops), + "#TC_RETURNa $func $offset", + [(PPCtc_return (i32 imm:$func), imm:$offset)]>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops), + "#TC_RETURNr $dst $offset", + []>; + + +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, + isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in +def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In32BitMode]>; + + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), + "b $dst", BrB, + []>; + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst), + "ba $dst", BrB, + []>; + + +// DCB* instructions. +def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), + "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), + "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), + "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), + "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), + "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), + "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), + "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), + "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, + PPC970_DGroup_Single; + +// Atomic operations +let usesCustomInserter = 1 in { + let Defs = [CR0] in { + def ATOMIC_LOAD_ADD_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_SUB_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_AND_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_OR_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_XOR_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_NAND_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_ADD_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_SUB_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_AND_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_OR_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_XOR_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_NAND_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_ADD_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_SUB_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_AND_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_OR_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_XOR_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>; + def ATOMIC_LOAD_NAND_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>; + + def ATOMIC_CMP_SWAP_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", + [(set GPRC:$dst, + (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + def ATOMIC_CMP_SWAP_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", + [(set GPRC:$dst, + (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + def ATOMIC_CMP_SWAP_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", + [(set GPRC:$dst, + (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + + def ATOMIC_SWAP_I8 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", + [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>; + def ATOMIC_SWAP_I16 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", + [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>; + def ATOMIC_SWAP_I32 : Pseudo< + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", + [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>; + } +} + +// Instructions to support atomic operations +def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src), + "lwarx $rD, $src", LdStLWARX, + [(set GPRC:$rD, (PPClarx xoaddr:$src))]>; + +let Defs = [CR0] in +def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), + "stwcx. $rS, $dst", LdStSTWCX, + [(PPCstcx GPRC:$rS, xoaddr:$dst)]>, + isDOT; + +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in +def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; + +//===----------------------------------------------------------------------===// +// PPC32 Load Instructions. +// + +// Unindexed (r+i) Loads. +let canFoldAsLoad = 1, PPC970_Unit = 2 in { +def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src), + "lbz $rD, $src", LdStGeneral, + [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>; +def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src), + "lha $rD, $src", LdStLHA, + [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>, + PPC970_DGroup_Cracked; +def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src), + "lhz $rD, $src", LdStGeneral, + [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>; +def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), + "lwz $rD, $src", LdStGeneral, + [(set GPRC:$rD, (load iaddr:$src))]>; + +def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), + "lfs $rD, $src", LdStLFDU, + [(set F4RC:$rD, (load iaddr:$src))]>; +def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), + "lfd $rD, $src", LdStLFD, + [(set F8RC:$rD, (load iaddr:$src))]>; + + +// Unindexed (r+i) Loads with Update (preinc). +let mayLoad = 1 in { +def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lbzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lhau $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lhzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lwzu $rD, $addr", LdStGeneral, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lfs $rD, $addr", LdStLFDU, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), + "lfd $rD, $addr", LdStLFD, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +} +} + +// Indexed (r+r) Loads. +// +let canFoldAsLoad = 1, PPC970_Unit = 2 in { +def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src), + "lbzx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>; +def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src), + "lhax $rD, $src", LdStLHA, + [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>, + PPC970_DGroup_Cracked; +def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src), + "lhzx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>; +def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src), + "lwzx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (load xaddr:$src))]>; + + +def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src), + "lhbrx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>; +def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), + "lwbrx $rD, $src", LdStGeneral, + [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; + +def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), + "lfsx $frD, $src", LdStLFDU, + [(set F4RC:$frD, (load xaddr:$src))]>; +def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), + "lfdx $frD, $src", LdStLFDU, + [(set F8RC:$frD, (load xaddr:$src))]>; +} + +//===----------------------------------------------------------------------===// +// PPC32 Store Instructions. +// + +// Unindexed (r+i) Stores. +let PPC970_Unit = 2 in { +def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src), + "stb $rS, $src", LdStGeneral, + [(truncstorei8 GPRC:$rS, iaddr:$src)]>; +def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src), + "sth $rS, $src", LdStGeneral, + [(truncstorei16 GPRC:$rS, iaddr:$src)]>; +def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), + "stw $rS, $src", LdStGeneral, + [(store GPRC:$rS, iaddr:$src)]>; +def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), + "stfs $rS, $dst", LdStUX, + [(store F4RC:$rS, iaddr:$dst)]>; +def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), + "stfd $rS, $dst", LdStUX, + [(store F8RC:$rS, iaddr:$dst)]>; +} + +// Unindexed (r+i) Stores with Update (preinc). +let PPC970_Unit = 2 in { +def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, + (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, + (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stwu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stfsu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stfdu $rS, $ptroff($ptrreg)", LdStGeneral, + [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +} + + +// Indexed (r+r) Stores. +// +let PPC970_Unit = 2 in { +def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), + "stbx $rS, $dst", LdStGeneral, + [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst), + "sthx $rS, $dst", LdStGeneral, + [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), + "stwx $rS, $dst", LdStGeneral, + [(store GPRC:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; + +let mayStore = 1 in { +def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB), + "stwux $rS, $rA, $rB", LdStGeneral, + []>; +} +def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), + "sthbrx $rS, $dst", LdStGeneral, + [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, + PPC970_DGroup_Cracked; +def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), + "stwbrx $rS, $dst", LdStGeneral, + [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>, + PPC970_DGroup_Cracked; + +def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), + "stfiwx $frS, $dst", LdStUX, + [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>; + +def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst), + "stfsx $frS, $dst", LdStUX, + [(store F4RC:$frS, xaddr:$dst)]>; +def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), + "stfdx $frS, $dst", LdStUX, + [(store F8RC:$frS, xaddr:$dst)]>; +} + +def SYNC : XForm_24_sync<31, 598, (outs), (ins), + "sync", LdStSync, + [(int_ppc_sync)]>; + +//===----------------------------------------------------------------------===// +// PPC32 Arithmetic Instructions. +// + +let PPC970_Unit = 1 in { // FXU Operations. +def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), + "addi $rD, $rA, $imm", IntGeneral, + [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; +let Defs = [CARRY] in { +def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), + "addic $rD, $rA, $imm", IntGeneral, + [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>, + PPC970_DGroup_Cracked; +def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), + "addic. $rD, $rA, $imm", IntGeneral, + []>; +} +def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm), + "addis $rD, $rA, $imm", IntGeneral, + [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>; +def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym), + "la $rD, $sym($rA)", IntGeneral, + [(set GPRC:$rD, (add GPRC:$rA, + (PPClo tglobaladdr:$sym, 0)))]>; +def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), + "mulli $rD, $rA, $imm", IntMulLI, + [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>; +let Defs = [CARRY] in { +def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), + "subfic $rD, $rA, $imm", IntGeneral, + [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>; +} + +let isReMaterializable = 1 in { + def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), + "li $rD, $imm", IntGeneral, + [(set GPRC:$rD, immSExt16:$imm)]>; + def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm), + "lis $rD, $imm", IntGeneral, + [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>; +} +} + +let PPC970_Unit = 1 in { // FXU Operations. +def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "andi. $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>, + isDOT; +def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "andis. $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>, + isDOT; +def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "ori $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>; +def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "oris $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>; +def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "xori $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>; +def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "xoris $dst, $src1, $src2", IntGeneral, + [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>; +def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntGeneral, + []>; +def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm), + "cmpwi $crD, $rA, $imm", IntCompare>; +def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2), + "cmplwi $dst, $src1, $src2", IntCompare>; +} + + +let PPC970_Unit = 1 in { // FXU Operations. +def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "nand $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>; +def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "and $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>; +def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "andc $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>; +def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "or $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>; +def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "nor $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>; +def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "orc $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>; +def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "eqv $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>; +def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "xor $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>; +def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "slw $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>; +def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "srw $rA, $rS, $rB", IntGeneral, + [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>; +let Defs = [CARRY] in { +def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), + "sraw $rA, $rS, $rB", IntShift, + [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>; +} +} + +let PPC970_Unit = 1 in { // FXU Operations. +let Defs = [CARRY] in { +def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), + "srawi $rA, $rS, $SH", IntShift, + [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>; +} +def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS), + "cntlzw $rA, $rS", IntGeneral, + [(set GPRC:$rA, (ctlz GPRC:$rS))]>; +def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS), + "extsb $rA, $rS", IntGeneral, + [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>; +def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS), + "extsh $rA, $rS", IntGeneral, + [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>; + +def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB), + "cmpw $crD, $rA, $rB", IntCompare>; +def CMPLW : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB), + "cmplw $crD, $rA, $rB", IntCompare>; +} +let PPC970_Unit = 3 in { // FPU Operations. +//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB), +// "fcmpo $crD, $fA, $fB", FPCompare>; +def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB), + "fcmpu $crD, $fA, $fB", FPCompare>; +def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB), + "fcmpu $crD, $fA, $fB", FPCompare>; + +let Uses = [RM] in { + def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB), + "fctiwz $frD, $frB", FPGeneral, + [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>; + def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB), + "frsp $frD, $frB", FPGeneral, + [(set F4RC:$frD, (fround F8RC:$frB))]>; + def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB), + "fsqrt $frD, $frB", FPSqrt, + [(set F8RC:$frD, (fsqrt F8RC:$frB))]>; + def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB), + "fsqrts $frD, $frB", FPSqrt, + [(set F4RC:$frD, (fsqrt F4RC:$frB))]>; + } +} + +/// Note that FMR is defined as pseudo-ops on the PPC970 because they are +/// often coalesced away and we don't want the dispatch group builder to think +/// that they will fill slots (which could cause the load of a LSU reject to +/// sneak into a d-group with a store). +def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), + "fmr $frD, $frB", FPGeneral, + []>, // (set F4RC:$frD, F4RC:$frB) + PPC970_Unit_Pseudo; + +let PPC970_Unit = 3 in { // FPU Operations. +// These are artificially split into two different forms, for 4/8 byte FP. +def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB), + "fabs $frD, $frB", FPGeneral, + [(set F4RC:$frD, (fabs F4RC:$frB))]>; +def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB), + "fabs $frD, $frB", FPGeneral, + [(set F8RC:$frD, (fabs F8RC:$frB))]>; +def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB), + "fnabs $frD, $frB", FPGeneral, + [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>; +def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB), + "fnabs $frD, $frB", FPGeneral, + [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>; +def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB), + "fneg $frD, $frB", FPGeneral, + [(set F4RC:$frD, (fneg F4RC:$frB))]>; +def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB), + "fneg $frD, $frB", FPGeneral, + [(set F8RC:$frD, (fneg F8RC:$frB))]>; +} + + +// XL-Form instructions. condition register logical ops. +// +def MCRF : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA), + "mcrf $BF, $BFA", BrMCR>, + PPC970_DGroup_First, PPC970_Unit_CRU; + +def CREQV : XLForm_1<19, 289, (outs CRBITRC:$CRD), + (ins CRBITRC:$CRA, CRBITRC:$CRB), + "creqv $CRD, $CRA, $CRB", BrCR, + []>; + +def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD), + (ins CRBITRC:$CRA, CRBITRC:$CRB), + "cror $CRD, $CRA, $CRB", BrCR, + []>; + +def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins), + "creqv $dst, $dst, $dst", BrCR, + []>; + +def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins), + "crxor $dst, $dst, $dst", BrCR, + []>; + +// XFX-Form instructions. Instructions that deal with SPRs. +// +let Uses = [CTR] in { +def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins), + "mfctr $rT", SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in { +def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS), + "mtctr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +let Defs = [LR] in { +def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS), + "mtlr $rS", SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Uses = [LR] in { +def MFLR : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins), + "mflr $rT", SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like +// a GPR on the PPC970. As such, copies in and out have the same performance +// characteristics as an OR instruction. +def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS), + "mtspr 256, $rS", IntGeneral>, + PPC970_DGroup_Single, PPC970_Unit_FXU; +def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), + "mfspr $rT, 256", IntGeneral>, + PPC970_DGroup_First, PPC970_Unit_FXU; + +def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS), + "mtcrf $FXM, $rS", BrMCRX>, + PPC970_MicroCode, PPC970_Unit_CRU; + +// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters; +// declaring that here gives the local register allocator problems with this: +// vreg = MCRF CR0 +// MFCR <kill of whatever preg got assigned to vreg> +// while not declaring it breaks DeadMachineInstructionElimination. +// As it turns out, in all cases where we currently use this, +// we're only interested in one subregister of it. Represent this in the +// instruction to keep the register allocator from becoming confused. +// +// FIXME: Make this a real Pseudo instruction when the JIT switches to MC. +def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), + "", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; + +def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), + "mfcr $rT", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; + +def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), + "mfcr $rT, $FXM", SprMFCR>, + PPC970_DGroup_First, PPC970_Unit_CRU; + +// Instructions to manipulate FPSCR. Only long double handling uses these. +// FPSCR is not modelled; we use the SDNode Flag to keep things in order. + +let Uses = [RM], Defs = [RM] in { + def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), + "mtfsb0 $FM", IntMTFSB0, + [(PPCmtfsb0 (i32 imm:$FM))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), + "mtfsb1 $FM", IntMTFSB0, + [(PPCmtfsb1 (i32 imm:$FM))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + // MTFSF does not actually produce an FP result. We pretend it copies + // input reg B to the output. If we didn't do this it would look like the + // instruction had no outputs (because we aren't modelling the FPSCR) and + // it would be deleted. + def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA), + (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB), + "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0, + [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), + F8RC:$rT, F8RC:$FRB))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; +} +let Uses = [RM] in { + def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins), + "mffs $rT", IntMFFS, + [(set F8RC:$rT, (PPCmffs))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + def FADDrtz: AForm_2<63, 21, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), + "fadd $FRT, $FRA, $FRB", FPGeneral, + [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; +} + + +let PPC970_Unit = 1 in { // FXU Operations. + +// XO-Form instructions. Arithmetic instructions that can set overflow bit +// +def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "add $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>; +let Defs = [CARRY] in { +def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "addc $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>, + PPC970_DGroup_Cracked; +} +def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "divw $rT, $rA, $rB", IntDivW, + [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "divwu $rT, $rA, $rB", IntDivW, + [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "mulhw $rT, $rA, $rB", IntMulHW, + [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>; +def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "mulhwu $rT, $rA, $rB", IntMulHWU, + [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>; +def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "mullw $rT, $rA, $rB", IntMulHW, + [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>; +def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "subf $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>; +let Defs = [CARRY] in { +def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "subfc $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>, + PPC970_DGroup_Cracked; +} +def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA), + "neg $rT, $rA", IntGeneral, + [(set GPRC:$rT, (ineg GPRC:$rA))]>; +let Uses = [CARRY], Defs = [CARRY] in { +def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "adde $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>; +def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA), + "addme $rT, $rA", IntGeneral, + [(set GPRC:$rT, (adde GPRC:$rA, -1))]>; +def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA), + "addze $rT, $rA", IntGeneral, + [(set GPRC:$rT, (adde GPRC:$rA, 0))]>; +def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "subfe $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>; +def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA), + "subfme $rT, $rA", IntGeneral, + [(set GPRC:$rT, (sube -1, GPRC:$rA))]>; +def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA), + "subfze $rT, $rA", IntGeneral, + [(set GPRC:$rT, (sube 0, GPRC:$rA))]>; +} +} + +// A-Form instructions. Most of the instructions executed in the FPU are of +// this type. +// +let PPC970_Unit = 3 in { // FPU Operations. +let Uses = [RM] in { + def FMADD : AForm_1<63, 29, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), + "fmadd $FRT, $FRA, $FRC, $FRB", FPFused, + [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC), + F8RC:$FRB))]>, + Requires<[FPContractions]>; + def FMADDS : AForm_1<59, 29, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC), + F4RC:$FRB))]>, + Requires<[FPContractions]>; + def FMSUB : AForm_1<63, 28, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), + "fmsub $FRT, $FRA, $FRC, $FRB", FPFused, + [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC), + F8RC:$FRB))]>, + Requires<[FPContractions]>; + def FMSUBS : AForm_1<59, 28, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC), + F4RC:$FRB))]>, + Requires<[FPContractions]>; + def FNMADD : AForm_1<63, 31, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), + "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, + [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC), + F8RC:$FRB)))]>, + Requires<[FPContractions]>; + def FNMADDS : AForm_1<59, 31, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC), + F4RC:$FRB)))]>, + Requires<[FPContractions]>; + def FNMSUB : AForm_1<63, 30, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), + "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, + [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC), + F8RC:$FRB)))]>, + Requires<[FPContractions]>; + def FNMSUBS : AForm_1<59, 30, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC), + F4RC:$FRB)))]>, + Requires<[FPContractions]>; +} +// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid +// having 4 of these, force the comparison to always be an 8-byte double (code +// should use an FMRSD if the input comparison value really wants to be a float) +// and 4/8 byte forms for the result and operand type.. +def FSELD : AForm_1<63, 23, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), + "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>; +def FSELS : AForm_1<63, 23, + (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>; +let Uses = [RM] in { + def FADD : AForm_2<63, 21, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), + "fadd $FRT, $FRA, $FRB", FPGeneral, + [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>; + def FADDS : AForm_2<59, 21, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), + "fadds $FRT, $FRA, $FRB", FPGeneral, + [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>; + def FDIV : AForm_2<63, 18, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), + "fdiv $FRT, $FRA, $FRB", FPDivD, + [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>; + def FDIVS : AForm_2<59, 18, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), + "fdivs $FRT, $FRA, $FRB", FPDivS, + [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>; + def FMUL : AForm_3<63, 25, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), + "fmul $FRT, $FRA, $FRB", FPFused, + [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>; + def FMULS : AForm_3<59, 25, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), + "fmuls $FRT, $FRA, $FRB", FPGeneral, + [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>; + def FSUB : AForm_2<63, 20, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), + "fsub $FRT, $FRA, $FRB", FPGeneral, + [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>; + def FSUBS : AForm_2<59, 20, + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), + "fsubs $FRT, $FRA, $FRB", FPGeneral, + [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>; + } +} + +let PPC970_Unit = 1 in { // FXU Operations. +// M-Form instructions. rotate and mask instructions. +// +let isCommutable = 1 in { +// RLWIMI can be commuted if the rotate amount is zero. +def RLWIMI : MForm_2<20, + (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB, + u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate, + []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">, + NoEncode<"$rSi">; +} +def RLWINM : MForm_2<21, + (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, + []>; +def RLWINMo : MForm_2<21, + (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral, + []>, isDOT, PPC970_DGroup_Cracked; +def RLWNM : MForm_2<23, + (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME), + "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral, + []>; +} + + +//===----------------------------------------------------------------------===// +// PowerPC Instruction Patterns +// + +// Arbitrary immediate support. Implement in terms of LIS/ORI. +def : Pat<(i32 imm:$imm), + (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; + +// Implement the 'not' operation with the NOR instruction. +def NOT : Pat<(not GPRC:$in), + (NOR GPRC:$in, GPRC:$in)>; + +// ADD an arbitrary immediate. +def : Pat<(add GPRC:$in, imm:$imm), + (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>; +// OR an arbitrary immediate. +def : Pat<(or GPRC:$in, imm:$imm), + (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +// XOR an arbitrary immediate. +def : Pat<(xor GPRC:$in, imm:$imm), + (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +// SUBFIC +def : Pat<(sub immSExt16:$imm, GPRC:$in), + (SUBFIC GPRC:$in, imm:$imm)>; + +// SHL/SRL +def : Pat<(shl GPRC:$in, (i32 imm:$imm)), + (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>; +def : Pat<(srl GPRC:$in, (i32 imm:$imm)), + (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>; + +// ROTL +def : Pat<(rotl GPRC:$in, GPRC:$sh), + (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>; +def : Pat<(rotl GPRC:$in, (i32 imm:$imm)), + (RLWINM GPRC:$in, imm:$imm, 0, 31)>; + +// RLWNM +def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm), + (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; + +// Calls +def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)), + (BL_Darwin tglobaladdr:$dst)>; +def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)), + (BL_Darwin texternalsym:$dst)>; +def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)), + (BL_SVR4 tglobaladdr:$dst)>; +def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)), + (BL_SVR4 texternalsym:$dst)>; + + +def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), + (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm), + (TCRETURNdi texternalsym:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), + (TCRETURNri CTRRC:$dst, imm:$imm)>; + + + +// Hi and Lo for Darwin Global Addresses. +def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>; +def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>; +def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>; +def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>; +def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>; +def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>; +def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>; +def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>; +def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS GPRC:$in, tglobaladdr:$g)>; +def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)), + (ADDIS GPRC:$in, tconstpool:$g)>; +def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)), + (ADDIS GPRC:$in, tjumptable:$g)>; +def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)), + (ADDIS GPRC:$in, tblockaddress:$g)>; + +// Fused negative multiply subtract, alternate pattern +def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)), + (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>, + Requires<[FPContractions]>; +def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)), + (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>, + Requires<[FPContractions]>; + +// Standard shifts. These are represented separately from the real shifts above +// so that we can distinguish between shifts that allow 5-bit and 6-bit shift +// amounts. +def : Pat<(sra GPRC:$rS, GPRC:$rB), + (SRAW GPRC:$rS, GPRC:$rB)>; +def : Pat<(srl GPRC:$rS, GPRC:$rB), + (SRW GPRC:$rS, GPRC:$rB)>; +def : Pat<(shl GPRC:$rS, GPRC:$rB), + (SLW GPRC:$rS, GPRC:$rB)>; + +def : Pat<(zextloadi1 iaddr:$src), + (LBZ iaddr:$src)>; +def : Pat<(zextloadi1 xaddr:$src), + (LBZX xaddr:$src)>; +def : Pat<(extloadi1 iaddr:$src), + (LBZ iaddr:$src)>; +def : Pat<(extloadi1 xaddr:$src), + (LBZX xaddr:$src)>; +def : Pat<(extloadi8 iaddr:$src), + (LBZ iaddr:$src)>; +def : Pat<(extloadi8 xaddr:$src), + (LBZX xaddr:$src)>; +def : Pat<(extloadi16 iaddr:$src), + (LHZ iaddr:$src)>; +def : Pat<(extloadi16 xaddr:$src), + (LHZX xaddr:$src)>; +def : Pat<(f64 (extloadf32 iaddr:$src)), + (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>; +def : Pat<(f64 (extloadf32 xaddr:$src)), + (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; + +def : Pat<(f64 (fextend F4RC:$src)), + (COPY_TO_REGCLASS F4RC:$src, F8RC)>; + +// Memory barriers +def : Pat<(membarrier (i32 imm /*ll*/), + (i32 imm /*ls*/), + (i32 imm /*sl*/), + (i32 imm /*ss*/), + (i32 imm /*device*/)), + (SYNC)>; + +def : Pat<(atomic_fence (imm), (imm)), (SYNC)>; + +include "PPCInstrAltivec.td" +include "PPCInstr64Bit.td" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp new file mode 100644 index 0000000..4590f00 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp @@ -0,0 +1,468 @@ +//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the 32-bit PowerPC target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "PPCJITInfo.h" +#include "PPCRelocations.h" +#include "PPCTargetMachine.h" +#include "llvm/Function.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +#define BUILD_ADDIS(RD,RS,IMM16) \ + ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) +#define BUILD_ORI(RD,RS,UIMM16) \ + ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535)) +#define BUILD_ORIS(RD,RS,UIMM16) \ + ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535)) +#define BUILD_RLDICR(RD,RS,SH,ME) \ + ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \ + (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1)) +#define BUILD_MTSPR(RS,SPR) \ + ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1)) +#define BUILD_BCCTRx(BO,BI,LINK) \ + ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1)) +#define BUILD_B(TARGET, LINK) \ + ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1)) + +// Pseudo-ops +#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16) +#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6) +#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9) +#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK) + +static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){ + intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2; + unsigned *AtI = (unsigned*)(intptr_t)At; + + if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range? + AtI[0] = BUILD_B(Offset, isCall); // b/bl target + } else if (!is64Bit) { + AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address) + AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address) + AtI[2] = BUILD_MTCTR(12); // mtctr r12 + AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl + } else { + AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address) + AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address) + AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32 + AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address) + AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address) + AtI[5] = BUILD_MTCTR(12); // mtctr r12 + AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl + } +} + +extern "C" void PPC32CompilationCallback(); +extern "C" void PPC64CompilationCallback(); + +#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ + !(defined(__ppc64__) || defined(__FreeBSD__)) +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because we the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. +asm( + ".text\n" + ".align 2\n" + ".globl _PPC32CompilationCallback\n" +"_PPC32CompilationCallback:\n" + // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the + // FIXME: need to save v[0-19] for altivec? + // FIXME: could shrink frame + // Set up a proper stack frame + // FIXME Layout + // PowerPC32 ABI linkage - 24 bytes + // parameters - 32 bytes + // 13 double registers - 104 bytes + // 8 int registers - 32 bytes + "mflr r0\n" + "stw r0, 8(r1)\n" + "stwu r1, -208(r1)\n" + // Save all int arg registers + "stw r10, 204(r1)\n" "stw r9, 200(r1)\n" + "stw r8, 196(r1)\n" "stw r7, 192(r1)\n" + "stw r6, 188(r1)\n" "stw r5, 184(r1)\n" + "stw r4, 180(r1)\n" "stw r3, 176(r1)\n" + // Save all call-clobbered FP regs. + "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n" + "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n" + "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n" + "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n" + "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n" + "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n" + "stfd f1, 72(r1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 0. + "mr r3, r0\n" + "lwz r2, 208(r1)\n" // stub's frame + "lwz r4, 8(r2)\n" // stub's lr + "li r5, 0\n" // 0 == 32 bit + "bl _PPCCompilationCallbackC\n" + "mtctr r3\n" + // Restore all int arg registers + "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n" + "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n" + "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n" + "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n" + // Restore all FP arg registers + "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n" + "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n" + "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n" + "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n" + "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n" + "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n" + "lfd f1, 72(r1)\n" + // Pop 3 frames off the stack and branch to target + "lwz r1, 208(r1)\n" + "lwz r2, 8(r1)\n" + "mtlr r2\n" + "bctr\n" + ); + +#elif defined(__PPC__) && !defined(__ppc64__) +// Linux & FreeBSD / PPC 32 support + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because we the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. +asm( + ".text\n" + ".align 2\n" + ".globl PPC32CompilationCallback\n" +"PPC32CompilationCallback:\n" + // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the + // FIXME: need to save v[0-19] for altivec? + // FIXME: could shrink frame + // Set up a proper stack frame + // FIXME Layout + // 8 double registers - 64 bytes + // 8 int registers - 32 bytes + "mflr 0\n" + "stw 0, 4(1)\n" + "stwu 1, -104(1)\n" + // Save all int arg registers + "stw 10, 100(1)\n" "stw 9, 96(1)\n" + "stw 8, 92(1)\n" "stw 7, 88(1)\n" + "stw 6, 84(1)\n" "stw 5, 80(1)\n" + "stw 4, 76(1)\n" "stw 3, 72(1)\n" + // Save all call-clobbered FP regs. + "stfd 8, 64(1)\n" + "stfd 7, 56(1)\n" "stfd 6, 48(1)\n" + "stfd 5, 40(1)\n" "stfd 4, 32(1)\n" + "stfd 3, 24(1)\n" "stfd 2, 16(1)\n" + "stfd 1, 8(1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 0. + "mr 3, 0\n" + "lwz 5, 104(1)\n" // stub's frame + "lwz 4, 4(5)\n" // stub's lr + "li 5, 0\n" // 0 == 32 bit + "bl PPCCompilationCallbackC\n" + "mtctr 3\n" + // Restore all int arg registers + "lwz 10, 100(1)\n" "lwz 9, 96(1)\n" + "lwz 8, 92(1)\n" "lwz 7, 88(1)\n" + "lwz 6, 84(1)\n" "lwz 5, 80(1)\n" + "lwz 4, 76(1)\n" "lwz 3, 72(1)\n" + // Restore all FP arg registers + "lfd 8, 64(1)\n" + "lfd 7, 56(1)\n" "lfd 6, 48(1)\n" + "lfd 5, 40(1)\n" "lfd 4, 32(1)\n" + "lfd 3, 24(1)\n" "lfd 2, 16(1)\n" + "lfd 1, 8(1)\n" + // Pop 3 frames off the stack and branch to target + "lwz 1, 104(1)\n" + "lwz 0, 4(1)\n" + "mtlr 0\n" + "bctr\n" + ); +#else +void PPC32CompilationCallback() { + llvm_unreachable("This is not a power pc, you can't execute this!"); +} +#endif + +#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ + defined(__ppc64__) +#ifdef __ELF__ +asm( + ".text\n" + ".align 2\n" + ".globl PPC64CompilationCallback\n" + ".section \".opd\",\"aw\"\n" + ".align 3\n" +"PPC64CompilationCallback:\n" + ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n" + ".size PPC64CompilationCallback,24\n" + ".previous\n" + ".align 4\n" + ".type PPC64CompilationCallback,@function\n" +".L.PPC64CompilationCallback:\n" +#else +asm( + ".text\n" + ".align 2\n" + ".globl _PPC64CompilationCallback\n" +"_PPC64CompilationCallback:\n" +#endif + // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the + // FIXME: need to save v[0-19] for altivec? + // Set up a proper stack frame + // Layout + // PowerPC64 ABI linkage - 48 bytes + // parameters - 64 bytes + // 13 double registers - 104 bytes + // 8 int registers - 64 bytes + "mflr 0\n" + "std 0, 16(1)\n" + "stdu 1, -280(1)\n" + // Save all int arg registers + "std 10, 272(1)\n" "std 9, 264(1)\n" + "std 8, 256(1)\n" "std 7, 248(1)\n" + "std 6, 240(1)\n" "std 5, 232(1)\n" + "std 4, 224(1)\n" "std 3, 216(1)\n" + // Save all call-clobbered FP regs. + "stfd 13, 208(1)\n" "stfd 12, 200(1)\n" + "stfd 11, 192(1)\n" "stfd 10, 184(1)\n" + "stfd 9, 176(1)\n" "stfd 8, 168(1)\n" + "stfd 7, 160(1)\n" "stfd 6, 152(1)\n" + "stfd 5, 144(1)\n" "stfd 4, 136(1)\n" + "stfd 3, 128(1)\n" "stfd 2, 120(1)\n" + "stfd 1, 112(1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 1. + "mr 3, 0\n" // return address (still in r0) + "ld 5, 280(1)\n" // stub's frame + "ld 4, 16(5)\n" // stub's lr + "li 5, 1\n" // 1 == 64 bit +#ifdef __ELF__ + "bl PPCCompilationCallbackC\n" + "nop\n" +#else + "bl _PPCCompilationCallbackC\n" +#endif + "mtctr 3\n" + // Restore all int arg registers + "ld 10, 272(1)\n" "ld 9, 264(1)\n" + "ld 8, 256(1)\n" "ld 7, 248(1)\n" + "ld 6, 240(1)\n" "ld 5, 232(1)\n" + "ld 4, 224(1)\n" "ld 3, 216(1)\n" + // Restore all FP arg registers + "lfd 13, 208(1)\n" "lfd 12, 200(1)\n" + "lfd 11, 192(1)\n" "lfd 10, 184(1)\n" + "lfd 9, 176(1)\n" "lfd 8, 168(1)\n" + "lfd 7, 160(1)\n" "lfd 6, 152(1)\n" + "lfd 5, 144(1)\n" "lfd 4, 136(1)\n" + "lfd 3, 128(1)\n" "lfd 2, 120(1)\n" + "lfd 1, 112(1)\n" + // Pop 3 frames off the stack and branch to target + "ld 1, 280(1)\n" + "ld 0, 16(1)\n" + "mtlr 0\n" + // XXX: any special TOC handling in the ELF case for JIT? + "bctr\n" + ); +#else +void PPC64CompilationCallback() { + llvm_unreachable("This is not a power pc, you can't execute this!"); +} +#endif + +extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { + // Adjust the pointer to the address of the call instruction in the stub + // emitted by emitFunctionStub, rather than the instruction after it. + unsigned *StubCallAddr = StubCallAddrPlus4 - 1; + unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1; + + void *Target = JITCompilerFunction(StubCallAddr); + + // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite + // it to branch directly to the destination. If so, rewrite it so it does not + // need to go through the stub anymore. + unsigned OrigCallInst = *OrigCallAddr; + if ((OrigCallInst >> 26) == 18) { // Direct call. + intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2; + + if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range? + // Clear the original target out. + OrigCallInst &= (63 << 26) | 3; + // Fill in the new target. + OrigCallInst |= (Offset & ((1 << 24)-1)) << 2; + // Replace the call. + *OrigCallAddr = OrigCallInst; + } + } + + // Assert that we are coming from a stub that was created with our + // emitFunctionStub. + if ((*StubCallAddr >> 26) == 18) + StubCallAddr -= 3; + else { + assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!"); + StubCallAddr -= is64Bit ? 9 : 6; + } + + // Rewrite the stub with an unconditional branch to the target, for any users + // who took the address of the stub. + EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit); + sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4); + + // Put the address of the target function to call and the address to return to + // after calling the target function in a place that is easy to get on the + // stack after we restore all regs. + return Target; +} + + + +TargetJITInfo::LazyResolverFn +PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) { + JITCompilerFunction = Fn; + return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback; +} + +TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() { + // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3 + // instructions to save the caller's address if this is a lazy-compilation + // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address + // into a register and jump through it. + StubLayout Result = {10*4, 4}; + return Result; +} + +#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ +defined(__APPLE__) +extern "C" void sys_icache_invalidate(const void *Addr, size_t len); +#endif + +void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + // If this is just a call to an external function, emit a branch instead of a + // call. The code is the same except for one bit of the last instruction. + if (Fn != (void*)(intptr_t)PPC32CompilationCallback && + Fn != (void*)(intptr_t)PPC64CompilationCallback) { + void *Addr = (void*)JCE.getCurrentPCValue(); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit); + sys::Memory::InvalidateInstructionCache(Addr, 7*4); + return Addr; + } + + void *Addr = (void*)JCE.getCurrentPCValue(); + if (is64Bit) { + JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0xf9610060); // std r11, 96(r1) + } else if (TM.getSubtargetImpl()->isDarwinABI()){ + JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0x91610028); // stw r11, 40(r1) + } else { + JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0x91610024); // stw r11, 36(r1) + } + intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue(); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit); + sys::Memory::InvalidateInstructionCache(Addr, 10*4); + return Addr; +} + + +void PPCJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4; + intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); + switch ((PPC::RelocationType)MR->getRelocationType()) { + default: llvm_unreachable("Unknown relocation type!"); + case PPC::reloc_pcrel_bx: + // PC-relative relocation for b and bl instructions. + ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; + assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) && + "Relocation out of range!"); + *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2; + break; + case PPC::reloc_pcrel_bcx: + // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other + // bcx instructions. + ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; + assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) && + "Relocation out of range!"); + *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2; + break; + case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr + case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr + ResultPtr += MR->getConstantVal(); + + // If this is a high-part access, get the high-part. + if (MR->getRelocationType() == PPC::reloc_absolute_high) { + // If the low part will have a carry (really a borrow) from the low + // 16-bits into the high 16, add a bit to borrow from. + if (((int)ResultPtr << 16) < 0) + ResultPtr += 1 << 16; + ResultPtr >>= 16; + } + + // Do the addition then mask, so the addition does not overflow the 16-bit + // immediate section of the instruction. + unsigned LowBits = (*RelocPos + ResultPtr) & 65535; + unsigned HighBits = *RelocPos & ~65535; + *RelocPos = LowBits | HighBits; // Slam into low 16-bits + break; + } + case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr + ResultPtr += MR->getConstantVal(); + // Do the addition then mask, so the addition does not overflow the 16-bit + // immediate section of the instruction. + unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC; + unsigned HighBits = *RelocPos & 0xFFFF0003; + *RelocPos = LowBits | HighBits; // Slam into low 14-bits. + break; + } + } + } +} + +void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit); + sys::Memory::InvalidateInstructionCache(Old, 7*4); +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h new file mode 100644 index 0000000..47ead59 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h @@ -0,0 +1,49 @@ +//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC_JITINFO_H +#define POWERPC_JITINFO_H + +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/CodeGen/JITCodeEmitter.h" + +namespace llvm { + class PPCTargetMachine; + + class PPCJITInfo : public TargetJITInfo { + protected: + PPCTargetMachine &TM; + bool is64Bit; + public: + PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) { + useGOT = 0; + is64Bit = tmIs64Bit; + } + + virtual StubLayout getStubLayout(); + virtual void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE); + virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + virtual void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase); + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + virtual void replaceMachineCodeForFunction(void *Old, void *New); + }; +} + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp new file mode 100644 index 0000000..33af426 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -0,0 +1,173 @@ +//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower PPC MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" +#include "llvm/ADT/SmallString.h" +using namespace llvm; + +static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { + return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>(); +} + + +static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ + MCContext &Ctx = AP.OutContext; + + SmallString<128> Name; + if (!MO.isGlobal()) { + assert(MO.isSymbol() && "Isn't a symbol reference"); + Name += AP.MAI->getGlobalPrefix(); + Name += MO.getSymbolName(); + } else { + const GlobalValue *GV = MO.getGlobal(); + bool isImplicitlyPrivate = false; + if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB || + (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)) + isImplicitlyPrivate = true; + + AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + } + + // If the target flags on the operand changes the name of the symbol, do that + // before we return the symbol. + if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) { + Name += "$stub"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = + getMachOMMI(AP).getFnStubEntry(Sym); + if (StubSym.getPointer()) + return Sym; + + if (MO.isGlobal()) { + StubSym = + MachineModuleInfoImpl:: + StubValueTy(AP.Mang->getSymbol(MO.getGlobal()), + !MO.getGlobal()->hasInternalLinkage()); + } else { + Name.erase(Name.end()-5, Name.end()); + StubSym = + MachineModuleInfoImpl:: + StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false); + } + return Sym; + } + + // If the symbol reference is actually to a non_lazy_ptr, not to the symbol, + // then add the suffix. + if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) { + Name += "$non_lazy_ptr"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + + MachineModuleInfoMachO &MachO = getMachOMMI(AP); + + MachineModuleInfoImpl::StubValueTy &StubSym = + (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ? + MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym); + + if (StubSym.getPointer() == 0) { + assert(MO.isGlobal() && "Extern symbol not handled yet"); + StubSym = MachineModuleInfoImpl:: + StubValueTy(AP.Mang->getSymbol(MO.getGlobal()), + !MO.getGlobal()->hasInternalLinkage()); + } + return Sym; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + +static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, + AsmPrinter &Printer, bool isDarwin) { + MCContext &Ctx = Printer.OutContext; + MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; + + if (MO.getTargetFlags() & PPCII::MO_LO16) + RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_LO16 : MCSymbolRefExpr::VK_PPC_GAS_LO16; + else if (MO.getTargetFlags() & PPCII::MO_HA16) + RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_HA16 : MCSymbolRefExpr::VK_PPC_GAS_HA16; + + // FIXME: This isn't right, but we don't have a good way to express this in + // the MC Level, see below. + if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) + RefKind = MCSymbolRefExpr::VK_None; + + const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx); + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), Ctx), + Ctx); + + // Subtract off the PIC base if required. + if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) { + const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + + const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx); + Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx); + // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16, + // since it is not a symbol! + } + + return MCOperand::CreateExpr(Expr); +} + +void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AsmPrinter &AP, bool isDarwin) { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: + MI->dump(); + assert(0 && "unknown operand type"); + case MachineOperand::MO_Register: + assert(!MO.getSubReg() && "Subregs should be eliminated!"); + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), AP.OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin); + break; + case MachineOperand::MO_BlockAddress: + MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP, + isDarwin); + break; + } + + OutMI.addOperand(MCOp); + } +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h new file mode 100644 index 0000000..e2649c8 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -0,0 +1,132 @@ +//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PowerPC specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PPC_MACHINE_FUNCTION_INFO_H +#define PPC_MACHINE_FUNCTION_INFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// PPCFunctionInfo - This class is derived from MachineFunction private +/// PowerPC target-specific information for each MachineFunction. +class PPCFunctionInfo : public MachineFunctionInfo { +private: + /// FramePointerSaveIndex - Frame index of where the old frame pointer is + /// stored. Also used as an anchor for instructions that need to be altered + /// when using frame pointers (dyna_add, dyna_sub.) + int FramePointerSaveIndex; + + /// ReturnAddrSaveIndex - Frame index of where the return address is stored. + /// + int ReturnAddrSaveIndex; + + /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current + /// function. This is only valid after the initial scan of the function by + /// PEI. + bool MustSaveLR; + + /// SpillsCR - Indicates whether CR is spilled in the current function. + bool SpillsCR; + + /// LRStoreRequired - The bool indicates whether there is some explicit use of + /// the LR/LR8 stack slot that is not obvious from scanning the code. This + /// requires that the code generator produce a store of LR to the stack on + /// entry, even though LR may otherwise apparently not be used. + bool LRStoreRequired; + + /// MinReservedArea - This is the frame size that is at least reserved in a + /// potential caller (parameter+linkage area). + unsigned MinReservedArea; + + /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum + /// amount the stack pointer is adjusted to make the frame bigger for tail + /// calls. Used for creating an area before the register spill area. + int TailCallSPDelta; + + /// HasFastCall - Does this function contain a fast call. Used to determine + /// how the caller's stack pointer should be calculated (epilog/dynamicalloc). + bool HasFastCall; + + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + /// VarArgsStackOffset - StackOffset for start of stack + /// arguments. + int VarArgsStackOffset; + /// VarArgsNumGPR - Index of the first unused integer + /// register for parameter passing. + unsigned VarArgsNumGPR; + /// VarArgsNumFPR - Index of the first unused double + /// register for parameter passing. + unsigned VarArgsNumFPR; + +public: + explicit PPCFunctionInfo(MachineFunction &MF) + : FramePointerSaveIndex(0), + ReturnAddrSaveIndex(0), + SpillsCR(false), + LRStoreRequired(false), + MinReservedArea(0), + TailCallSPDelta(0), + HasFastCall(false), + VarArgsFrameIndex(0), + VarArgsStackOffset(0), + VarArgsNumGPR(0), + VarArgsNumFPR(0) {} + + int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } + void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } + + int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; } + void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; } + + unsigned getMinReservedArea() const { return MinReservedArea; } + void setMinReservedArea(unsigned size) { MinReservedArea = size; } + + int getTailCallSPDelta() const { return TailCallSPDelta; } + void setTailCallSPDelta(int size) { TailCallSPDelta = size; } + + /// MustSaveLR - This is set when the prolog/epilog inserter does its initial + /// scan of the function. It is true if the LR/LR8 register is ever explicitly + /// defined/clobbered in the machine function (e.g. by calls and movpctolr, + /// which is used in PIC generation), or if the LR stack slot is explicitly + /// referenced by builtin_return_address. + void setMustSaveLR(bool U) { MustSaveLR = U; } + bool mustSaveLR() const { return MustSaveLR; } + + void setSpillsCR() { SpillsCR = true; } + bool isCRSpilled() const { return SpillsCR; } + + void setLRStoreRequired() { LRStoreRequired = true; } + bool isLRStoreRequired() const { return LRStoreRequired; } + + void setHasFastCall() { HasFastCall = true; } + bool hasFastCall() const { return HasFastCall;} + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + int getVarArgsStackOffset() const { return VarArgsStackOffset; } + void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; } + + unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; } + void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; } + + unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } + void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } +}; + +} // end of namespace llvm + + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h new file mode 100644 index 0000000..3164e33 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h @@ -0,0 +1,6586 @@ +//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file, which was autogenerated by llvm-PerfectShuffle, contains data +// for the optimal way to build a perfect shuffle without using vperm. +// +//===----------------------------------------------------------------------===// + +// 31 entries have cost 0 +// 292 entries have cost 1 +// 1384 entries have cost 2 +// 3061 entries have cost 3 +// 1733 entries have cost 4 +// 60 entries have cost 5 + +// This table is 6561*4 = 26244 bytes in size. +static const unsigned PerfectShuffleTable[6561+1] = { + 202162278U, // <0,0,0,0>: Cost 1 vspltisw0 LHS + 1140850790U, // <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS + 2617247181U, // <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0> + 2635163787U, // <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0> + 1543507254U, // <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS + 2281701705U, // <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5> + 2617250133U, // <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0> + 2659054575U, // <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0> + 202162278U, // <0,0,0,u>: Cost 1 vspltisw0 LHS + 1141686282U, // <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1> + 67944550U, // <0,0,1,1>: Cost 1 vmrghw LHS, LHS + 1685241958U, // <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS + 2215870716U, // <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0> + 1141727570U, // <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5> + 2215428562U, // <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7> + 2215428589U, // <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7> + 2659062768U, // <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1> + 67945117U, // <0,0,1,u>: Cost 1 vmrghw LHS, LHS + 2684356045U, // <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0> + 2216009830U, // <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS + 2216009901U, // <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2> + 2698290853U, // <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0> + 3289751890U, // <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5> + 3758098275U, // <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1> + 2684356538U, // <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7> + 3758098410U, // <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1> + 2216010397U, // <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS + 2702272651U, // <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0> + 2216656998U, // <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS + 3844669704U, // <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3> + 2216657148U, // <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0> + 2684357122U, // <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6> + 3732820066U, // <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0> + 3778005624U, // <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7> + 3374713464U, // <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7> + 2216657565U, // <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS + 2217361408U, // <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0> + 1143619686U, // <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS + 3291103405U, // <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2> + 3827269988U, // <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5> + 1143619922U, // <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5> + 1610616118U, // <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS + 3758099833U, // <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2> + 3854107016U, // <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5> + 1143620253U, // <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS + 2284396544U, // <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0> + 2218025062U, // <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS + 3758100203U, // <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3> + 3395966100U, // <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3> + 3804549052U, // <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5> + 2302314964U, // <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5> + 2785821138U, // <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7> + 3395966428U, // <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7> + 2787148260U, // <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7> + 2684358997U, // <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0> + 2218631270U, // <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS + 2684359162U, // <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3> + 3758101042U, // <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5> + 3732843830U, // <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS + 3758101227U, // <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1> + 2684359480U, // <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6> + 2724836173U, // <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0> + 2725499806U, // <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0> + 2726163439U, // <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0> + 2219311206U, // <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS + 3868557900U, // <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3> + 3377400112U, // <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3> + 2684360038U, // <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6> + 3732852834U, // <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0> + 3871507060U, // <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7> + 2303658616U, // <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7> + 2726163439U, // <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0> + 202162278U, // <0,0,u,0>: Cost 1 vspltisw0 LHS + 72589414U, // <0,0,u,1>: Cost 1 vmrghw LHS, LHS + 1685242525U, // <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS + 2220073212U, // <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0> + 1146331474U, // <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5> + 1610619034U, // <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS + 2785821138U, // <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7> + 2659120119U, // <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u> + 72589981U, // <0,0,u,u>: Cost 1 vmrghw LHS, LHS + 2698297344U, // <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0> + 1624555622U, // <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS + 2758984428U, // <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1> + 2635237524U, // <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0> + 2693652818U, // <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5> + 2281701714U, // <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5> + 2698297846U, // <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7> + 2659128312U, // <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0> + 1624556189U, // <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS + 1543585802U, // <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1> + 1141728052U, // <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1> + 1141728150U, // <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0> + 2295644334U, // <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3> + 1543589174U, // <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS + 2290999634U, // <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5> + 2617332135U, // <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1> + 2617332720U, // <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1> + 1142171004U, // <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0> + 1561509990U, // <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS + 2623308516U, // <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2> + 2698298984U, // <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2> + 835584U, // <0,1,2,3>: Cost 0 copy LHS + 1561513270U, // <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS + 2647199304U, // <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2> + 2698299322U, // <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7> + 1585402874U, // <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2> + 835584U, // <0,1,2,u>: Cost 0 copy LHS + 2698299540U, // <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0> + 3290399540U, // <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1> + 2698299720U, // <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0> + 2698299804U, // <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3> + 2698299906U, // <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6> + 3832726521U, // <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0> + 2724842160U, // <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0> + 2706926275U, // <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1> + 2698300190U, // <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2> + 2635268198U, // <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS + 2217362228U, // <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1> + 2217362326U, // <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0> + 2635270296U, // <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4> + 2635271478U, // <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS + 1624558902U, // <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS + 2659160910U, // <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1> + 2659161084U, // <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4> + 1624559145U, // <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS + 3832726639U, // <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1> + 2714889871U, // <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1> + 2302314646U, // <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2> + 3834717321U, // <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0> + 3832726679U, // <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5> + 2717544403U, // <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1> + 2718208036U, // <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1> + 3792613493U, // <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1> + 2719535302U, // <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1> + 2659172454U, // <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS + 3832726735U, // <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7> + 2724844026U, // <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3> + 3775361608U, // <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0> + 2659175734U, // <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS + 3832726771U, // <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7> + 2724844344U, // <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6> + 1651102542U, // <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1> + 1651766175U, // <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1> + 2724844536U, // <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0> + 3377397770U, // <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1> + 2698302636U, // <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0> + 2728162531U, // <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1> + 2724844902U, // <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6> + 3377398098U, // <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5> + 2724845076U, // <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0> + 2724845164U, // <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7> + 2724845186U, // <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2> + 1561559142U, // <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS + 1146331956U, // <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1> + 1146332054U, // <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0> + 835584U, // <0,1,u,3>: Cost 0 copy LHS + 1561562422U, // <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS + 1624561818U, // <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS + 2220074191U, // <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7> + 1585452032U, // <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u> + 835584U, // <0,1,u,u>: Cost 0 copy LHS + 2214593997U, // <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0> + 2214675999U, // <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1> + 2214594152U, // <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2> + 1207959654U, // <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS + 3709054262U, // <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS + 3375350836U, // <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5> + 2214594490U, // <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7> + 3288336362U, // <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1> + 1207959659U, // <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS + 2215871994U, // <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0> + 2215470623U, // <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1> + 1141728872U, // <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2> + 1141728934U, // <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1> + 2215872323U, // <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5> + 2215872405U, // <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6> + 1141729210U, // <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7> + 2215430122U, // <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1> + 1141729368U, // <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3> + 3289736698U, // <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0> + 3289744927U, // <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1> + 2216011368U, // <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2> + 2216019622U, // <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1> + 3289769795U, // <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5> + 3289778069U, // <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6> + 2216044474U, // <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7> + 3732960259U, // <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2> + 2216061016U, // <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3> + 2758985382U, // <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1> + 2758985392U, // <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2> + 3290400360U, // <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2> + 2758985408U, // <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0> + 2758985422U, // <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5> + 2785822424U, // <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6> + 3290400698U, // <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7> + 2765915876U, // <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0> + 2758985453U, // <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0> + 3291104762U, // <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0> + 2217362979U, // <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5> + 2217363048U, // <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2> + 2217363110U, // <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1> + 3291105087U, // <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1> + 3291105173U, // <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6> + 2217363386U, // <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7> + 3788639688U, // <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0> + 2217363515U, // <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1> + 3376054371U, // <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0> + 3788639888U, // <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2> + 3376055912U, // <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2> + 2302312550U, // <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS + 3376054375U, // <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4> + 3374728244U, // <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5> + 3805229154U, // <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0> + 3376055512U, // <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7> + 2302312555U, // <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS + 3709100134U, // <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS + 3709100950U, // <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0> + 3709102010U, // <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7> + 2758985658U, // <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7> + 3709103414U, // <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS + 3732992098U, // <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0> + 3292374970U, // <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7> + 3798594383U, // <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2> + 2758985703U, // <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7> + 3788641274U, // <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2> + 3377398508U, // <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1> + 3377398590U, // <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2> + 2303656038U, // <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS + 3709111606U, // <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS + 3377398836U, // <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5> + 3803903447U, // <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2> + 3293054954U, // <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1> + 2303656043U, // <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS + 2220074490U, // <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0> + 2220074527U, // <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1> + 1146332776U, // <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2> + 1146332838U, // <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1> + 2220074819U, // <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5> + 2220074901U, // <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6> + 1146333114U, // <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7> + 2220074986U, // <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1> + 1146333243U, // <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1> + 2629410816U, // <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0> + 2753530006U, // <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2> + 2629412301U, // <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0> + 2214594972U, // <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3> + 2758985908U, // <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5> + 3733016674U, // <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0> + 3777364488U, // <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7> + 2281703354U, // <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7> + 2758985941U, // <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2> + 1141729430U, // <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2> + 2215471334U, // <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1> + 2215471425U, // <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2> + 1141729692U, // <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3> + 1141729794U, // <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6> + 2215430738U, // <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5> + 2215430776U, // <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7> + 2295646138U, // <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7> + 1141730078U, // <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2> + 2758986032U, // <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3> + 3709141910U, // <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0> + 3289753921U, // <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2> + 2770929992U, // <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0> + 3289754114U, // <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6> + 3362095460U, // <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5> + 3832727910U, // <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3> + 3365414842U, // <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7> + 2771298677U, // <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0> + 2216659094U, // <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2> + 3290409190U, // <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1> + 2703624496U, // <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3> + 2216683932U, // <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3> + 2216692226U, // <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6> + 3733041250U, // <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0> + 3832727988U, // <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0> + 3374712762U, // <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7> + 2216725278U, // <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2> + 2217363606U, // <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2> + 3291105510U, // <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1> + 3291105601U, // <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2> + 2217363868U, // <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3> + 2217363970U, // <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6> + 2758986242U, // <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6> + 3727077685U, // <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4> + 3364767674U, // <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7> + 2217364254U, // <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2> + 3832728102U, // <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6> + 3405916003U, // <0,3,5,1>: Cost 4 vmrglw <u,4,0,5>, <2,5,3,1> + 3376055840U, // <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2> + 3376055679U, // <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3> + 3376055194U, // <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4> + 3859565138U, // <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5> + 2727514210U, // <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0> + 3376056250U, // <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7> + 2727514210U, // <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0> + 2758986360U, // <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7> + 3709174678U, // <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0> + 3795284411U, // <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3> + 3709175980U, // <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6> + 3833096860U, // <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7> + 3376728235U, // <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5> + 3859565229U, // <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6> + 2773879472U, // <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0> + 2758986360U, // <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7> + 2303656854U, // <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0> + 3807229018U, // <0,3,7,1>: Cost 4 vsldoi8 <u,2,0,3>, <7,1,2,u> + 2727515284U, // <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3> + 3377399410U, // <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3> + 3377398682U, // <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4> + 3801257409U, // <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7> + 3377399980U, // <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6> + 3375409082U, // <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7> + 2731497082U, // <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3> + 1146333334U, // <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2> + 2220075238U, // <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1> + 2220075329U, // <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2> + 1146333596U, // <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3> + 1146333698U, // <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6> + 2758986566U, // <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6> + 2803739472U, // <0,3,u,6>: Cost 3 vsldoi12 <u,6,7,0>, <3,u,6,7> + 2295703482U, // <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7> + 1146333982U, // <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2> + 2214595473U, // <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0> + 2693677158U, // <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS + 3839437689U, // <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3> + 3709200559U, // <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0> + 2693677394U, // <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5> + 1140854070U, // <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS + 3767419409U, // <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7> + 3854109604U, // <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1> + 1140854313U, // <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS + 1141689234U, // <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1> + 2215431114U, // <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3> + 2215431221U, // <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2> + 2635466928U, // <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1> + 1141689552U, // <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4> + 67947830U, // <0,4,1,5>: Cost 1 vmrghw LHS, RHS + 2215431545U, // <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2> + 2659357716U, // <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1> + 67948073U, // <0,4,1,u>: Cost 1 vmrghw LHS, RHS + 3767420369U, // <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4> + 3767420451U, // <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5> + 3767420520U, // <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2> + 2698323625U, // <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4> + 3709218102U, // <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS + 2216013110U, // <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS + 3767420858U, // <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7> + 3774719981U, // <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4> + 2216013353U, // <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS + 3767421078U, // <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2> + 3776710880U, // <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4> + 3833097325U, // <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4> + 3767421340U, // <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3> + 3767421442U, // <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6> + 2216660278U, // <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS + 3833097361U, // <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4> + 3780692678U, // <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4> + 2216660521U, // <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS + 2617573416U, // <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4> + 2217364450U, // <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0> + 3691316771U, // <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5> + 3709233331U, // <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4> + 2785823952U, // <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4> + 1143622966U, // <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS + 3691319723U, // <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5> + 3854109932U, // <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5> + 1143623209U, // <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS + 2635497574U, // <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS + 2635498390U, // <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0> + 3709240936U, // <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2> + 2635499700U, // <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5> + 2635500854U, // <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS + 2785824044U, // <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6> + 1685245238U, // <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS + 2659390488U, // <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5> + 1685245256U, // <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS + 3839438161U, // <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7> + 3798610347U, // <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5> + 3798610426U, // <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3> + 3795956237U, // <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4> + 3733138742U, // <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS + 2218634550U, // <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS + 3798610744U, // <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6> + 2724868945U, // <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4> + 2725532578U, // <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4> + 3383371465U, // <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0> + 3800601668U, // <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4> + 3775386826U, // <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3> + 3801928934U, // <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4> + 3721202998U, // <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS + 2780368328U, // <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0> + 3383372686U, // <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6> + 3854110170U, // <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0> + 2780368328U, // <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0> + 1146334098U, // <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1> + 2220076002U, // <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0> + 2220076085U, // <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2> + 2635524279U, // <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u> + 1146334416U, // <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4> + 72592694U, // <0,4,u,5>: Cost 1 vmrghw LHS, RHS + 1685245481U, // <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS + 2659415067U, // <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u> + 72592937U, // <0,4,u,u>: Cost 1 vmrghw LHS, RHS + 2281704337U, // <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0> + 2704965734U, // <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS + 3778707666U, // <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3> + 3778707708U, // <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0> + 2687050057U, // <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5> + 2214596612U, // <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5> + 2785824372U, // <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1> + 3854110332U, // <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0> + 2704966301U, // <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS + 1567768678U, // <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS + 2312236570U, // <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1> + 2215431915U, // <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3> + 2641512598U, // <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2> + 1567771538U, // <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1> + 1141690372U, // <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5> + 1141690466U, // <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0> + 2641515514U, // <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2> + 1141690615U, // <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5> + 3772736973U, // <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0> + 3778709024U, // <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2> + 3778709096U, // <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2> + 3778709158U, // <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1> + 3772737275U, // <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5> + 3859566351U, // <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3> + 3778709434U, // <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7> + 3805251562U, // <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1> + 3775391807U, // <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5> + 2704967830U, // <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2> + 3776719073U, // <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5> + 3777382706U, // <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5> + 3778709887U, // <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1> + 2704968148U, // <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5> + 3857428317U, // <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0> + 3364096514U, // <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6> + 3780700871U, // <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5> + 2707622680U, // <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5> + 2728856466U, // <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1> + 3697361674U, // <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4> + 3697362601U, // <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4> + 3364766635U, // <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3> + 2217365428U, // <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6> + 2704969014U, // <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS + 2785824700U, // <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5> + 3364766963U, // <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7> + 2704969257U, // <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS + 3846148050U, // <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0> + 2326203282U, // <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1> + 3291746027U, // <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3> + 3376054482U, // <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3> + 3790655366U, // <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5> + 2785824772U, // <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5> + 2724876386U, // <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0> + 3858903057U, // <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0> + 2736820484U, // <0,5,5,u>: Cost 3 vsldoi8 <u,7,0,5>, <5,u,7,0> + 2659467366U, // <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS + 3859566643U, // <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7> + 3798618618U, // <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3> + 3852857410U, // <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4> + 2659470646U, // <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS + 2659471458U, // <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0> + 3832729696U, // <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7> + 1712083042U, // <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0> + 1712156779U, // <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0> + 2731512826U, // <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2> + 3859566717U, // <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0> + 3798619284U, // <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3> + 3778712803U, // <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1> + 2728858936U, // <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5> + 3859566753U, // <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0> + 3377398135U, // <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6> + 3798619686U, // <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0> + 2731513468U, // <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5> + 1567826022U, // <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS + 2704971566U, // <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS + 2220076779U, // <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3> + 2641569942U, // <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2> + 1567828889U, // <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u> + 1146335236U, // <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5> + 1146335330U, // <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0> + 1713410308U, // <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0> + 1713484045U, // <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0> + 2214596949U, // <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0> + 2214678951U, // <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1> + 2214597114U, // <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3> + 3852857653U, // <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4> + 3832729919U, // <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5> + 3721293427U, // <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0> + 2214597432U, // <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6> + 1207962934U, // <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS + 1207962935U, // <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS + 2215432481U, // <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2> + 2215432615U, // <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1> + 1141690874U, // <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3> + 2215432754U, // <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5> + 2215432817U, // <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5> + 2215432939U, // <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1> + 1141691192U, // <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6> + 1221905718U, // <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS + 1221905719U, // <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS + 3852857787U, // <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3> + 3289764265U, // <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3> + 3289690618U, // <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3> + 3862589907U, // <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0> + 3733253430U, // <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS + 3733254242U, // <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0> + 3777390522U, // <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7> + 2785825274U, // <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3> + 2785825283U, // <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3> + 3777390742U, // <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2> + 3863106066U, // <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0> + 3777390899U, // <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6> + 3290436146U, // <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5> + 3779381762U, // <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6> + 3779381798U, // <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6> + 3733262920U, // <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0> + 2300972342U, // <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS + 2300972343U, // <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS + 3802606482U, // <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1> + 2217365931U, // <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5> + 2217366010U, // <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3> + 3291107890U, // <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5> + 3291099805U, // <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4> + 3777391926U, // <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS + 2217366328U, // <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6> + 2291027254U, // <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS + 2291027255U, // <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS + 3852858033U, // <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6> + 3395964532U, // <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1> + 3864507069U, // <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0> + 3376056678U, // <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3> + 3721334070U, // <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS + 3395964860U, // <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5> + 3864802017U, // <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0> + 2302315830U, // <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS + 2302315831U, // <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS + 3852858108U, // <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0> + 3398624745U, // <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1> + 2218668538U, // <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3> + 3292418610U, // <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5> + 3733286198U, // <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS + 3797299889U, // <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6> + 2785825592U, // <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6> + 2785825602U, // <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7> + 2785825611U, // <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7> + 2785825614U, // <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1> + 2758988632U, // <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2> + 3377400084U, // <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2> + 2792166248U, // <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0> + 2785825654U, // <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5> + 2785825664U, // <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6> + 3859567493U, // <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2> + 2303659318U, // <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS + 2303659319U, // <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS + 2785825695U, // <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1> + 2220077479U, // <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1> + 1146335738U, // <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3> + 2792829881U, // <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0> + 2785825735U, // <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5> + 2785825664U, // <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6> + 1146336056U, // <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6> + 1221963062U, // <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS + 1221963063U, // <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS + 2653593600U, // <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0> + 2706309222U, // <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS + 3709421498U, // <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7> + 2281705978U, // <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3> + 2785825816U, // <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5> + 2785825826U, // <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6> + 2653598037U, // <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0> + 2214598252U, // <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7> + 2706309789U, // <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS + 1141691386U, // <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2> + 2215433290U, // <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1> + 2706310038U, // <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0> + 2322190842U, // <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3> + 1141691750U, // <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6> + 2215433654U, // <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5> + 2653606230U, // <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1> + 1141692012U, // <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7> + 1141692034U, // <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2> + 2785825940U, // <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3> + 3768108576U, // <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2> + 3780052584U, // <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2> + 2794820780U, // <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0> + 3859641528U, // <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3> + 3733327970U, // <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0> + 3778062266U, // <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7> + 3733328944U, // <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2> + 2795189465U, // <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0> + 2324861026U, // <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0> + 3780053233U, // <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3> + 3780053296U, // <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3> + 3778062725U, // <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7> + 3780053506U, // <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6> + 3803941469U, // <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7> + 2706311800U, // <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7> + 3398603586U, // <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7> + 2707639066U, // <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7> + 2217366522U, // <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2> + 3727369110U, // <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0> + 3291108500U, // <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3> + 3727370872U, // <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7> + 2217366886U, // <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6> + 2706312502U, // <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS + 3786026321U, // <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7> + 2217367148U, // <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7> + 2706312745U, // <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS + 2322223202U, // <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0> + 3399946987U, // <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1> + 3291780244U, // <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3> + 3727378582U, // <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2> + 3727379766U, // <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS + 3859568054U, // <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5> + 2785826241U, // <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7> + 3395965762U, // <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7> + 2787153363U, // <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7> + 2785826268U, // <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7> + 3780055420U, // <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3> + 3859568110U, // <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7> + 3874534903U, // <0,7,6,3>: Cost 4 vsldoi12 <u,2,3,0>, <7,6,3,7> + 3859641856U, // <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7> + 3733360738U, // <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0> + 3859568145U, // <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6> + 2797770260U, // <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0> + 2797843997U, // <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0> + 2785826342U, // <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0> + 3727393686U, // <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0> + 3868563003U, // <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3> + 3377397988U, // <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3> + 2219349350U, // <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6> + 3859568217U, // <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6> + 2730202588U, // <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7> + 2785826412U, // <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7> + 2731529854U, // <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7> + 1146336250U, // <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2> + 2706315054U, // <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS + 2653660845U, // <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u> + 2322248186U, // <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3> + 1146336614U, // <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6> + 2706315418U, // <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS + 2653663581U, // <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u> + 1146336876U, // <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7> + 1146336898U, // <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2> + 202162278U, // <0,u,0,0>: Cost 1 vspltisw0 LHS + 1624612966U, // <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS + 2629780986U, // <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0> + 1207959708U, // <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS + 1544097078U, // <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS + 1140856986U, // <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS + 2698355253U, // <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7> + 1207962952U, // <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS + 202162278U, // <0,u,0,u>: Cost 1 vspltisw0 LHS + 1142134483U, // <0,u,1,0>: Cost 2 vmrghw LHS, <u,0,1,2> + 67950382U, // <0,u,1,1>: Cost 1 vmrghw LHS, LHS + 1142175624U, // <0,u,1,2>: Cost 2 vmrghw LHS, <u,2,3,3> + 1142175676U, // <0,u,1,3>: Cost 2 vmrghw LHS, <u,3,0,1> + 1142134847U, // <0,u,1,4>: Cost 2 vmrghw LHS, <u,4,5,6> + 67950746U, // <0,u,1,5>: Cost 1 vmrghw LHS, RHS + 1142175952U, // <0,u,1,6>: Cost 2 vmrghw LHS, <u,6,3,7> + 1221905736U, // <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS + 67950949U, // <0,u,1,u>: Cost 1 vmrghw LHS, LHS + 1562026086U, // <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS + 2216015662U, // <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS + 2698356328U, // <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2> + 835584U, // <0,u,2,3>: Cost 0 copy LHS + 1562029366U, // <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS + 2216016026U, // <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS + 2698356666U, // <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7> + 1585919033U, // <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2> + 835584U, // <0,u,2,u>: Cost 0 copy LHS + 2758989756U, // <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,0,1> + 2216662830U, // <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS + 2703665461U, // <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u> + 2758989782U, // <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,3,0> + 2758989796U, // <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,4,5> + 2216663194U, // <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS + 2706319993U, // <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u> + 2300972360U, // <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS + 2216663397U, // <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS + 2217367251U, // <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, <u,0,1,2> + 1143625518U, // <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS + 2217367432U, // <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, <u,2,3,3> + 2217367484U, // <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, <u,3,0,1> + 1143619922U, // <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5> + 1143625882U, // <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS + 2217367760U, // <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, <u,6,3,7> + 2291027272U, // <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS + 1143626085U, // <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS + 2635792486U, // <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS + 2635793302U, // <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0> + 2302314646U, // <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2> + 2635794648U, // <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5> + 2635795766U, // <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS + 2717601754U, // <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u> + 1685248154U, // <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS + 2302315848U, // <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS + 1685248172U, // <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS + 2759358645U, // <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, <u,6,0,7> + 2218637102U, // <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS + 2724901370U, // <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3> + 2758990032U, // <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,6,3,7> + 2659691830U, // <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS + 2659471458U, // <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0> + 2724901688U, // <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6> + 1651159893U, // <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u> + 1651823526U, // <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u> + 2785827072U, // <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,0,1> + 2803964168U, // <0,u,7,1>: Cost 3 vsldoi12 <u,7,1,0>, <u,7,1,0> + 2727556249U, // <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u> + 2303656092U, // <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS + 2785827112U, // <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,4,5> + 2785827122U, // <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,5,6> + 2730210781U, // <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u> + 2303659336U, // <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS + 2303656097U, // <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS + 202162278U, // <0,u,u,0>: Cost 1 vspltisw0 LHS + 72595246U, // <0,u,u,1>: Cost 1 vmrghw LHS, LHS + 1146337160U, // <0,u,u,2>: Cost 2 vmrghw LHS, <u,2,3,3> + 835584U, // <0,u,u,3>: Cost 0 copy LHS + 1146337343U, // <0,u,u,4>: Cost 2 vmrghw LHS, <u,4,5,6> + 72595610U, // <0,u,u,5>: Cost 1 vmrghw LHS, RHS + 1146337488U, // <0,u,u,6>: Cost 2 vmrghw LHS, <u,6,3,7> + 1221963080U, // <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS + 835584U, // <0,u,u,u>: Cost 0 copy LHS + 2756853760U, // <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0> + 1677803530U, // <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1> + 3759497387U, // <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0> + 2686419196U, // <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0> + 2751766565U, // <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1> + 2687746462U, // <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0> + 3776086518U, // <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7> + 2689073728U, // <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0> + 1678319689U, // <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1> + 2287091712U, // <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0> + 1147568230U, // <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS + 1683112038U, // <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS + 3294970108U, // <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0> + 2623892790U, // <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS + 2647781007U, // <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1> + 2791948430U, // <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7> + 3721524218U, // <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2> + 1683112092U, // <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS + 2222112768U, // <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0> + 1148371046U, // <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS + 3356862524U, // <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2> + 2702345894U, // <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1> + 2222113106U, // <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5> + 2299709908U, // <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5> + 3760162746U, // <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7> + 3369470584U, // <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7> + 1148371613U, // <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS + 2686421142U, // <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2> + 2283128486U, // <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1> + 3296305326U, // <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3> + 3760163199U, // <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1> + 3760163330U, // <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6> + 3779406377U, // <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0> + 3865690416U, // <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7> + 3366824568U, // <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7> + 2707655452U, // <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0> + 2734861202U, // <1,0,4,0>: Cost 3 vsldoi8 <u,4,1,0>, <4,0,5,1> + 2756854098U, // <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5> + 3830595931U, // <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5> + 3296968960U, // <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4> + 3830595949U, // <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5> + 2686422326U, // <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS + 3297378806U, // <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7> + 3810594248U, // <1,0,4,7>: Cost 4 vsldoi8 <u,7,1,0>, <4,7,5,0> + 2686422569U, // <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS + 2284470272U, // <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0> + 2284471974U, // <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1> + 3809267435U, // <1,0,5,2>: Cost 4 vsldoi8 <u,5,1,0>, <5,2,1,3> + 3297968384U, // <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4> + 2284471977U, // <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4> + 3721555603U, // <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5> + 3792679010U, // <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0> + 3792679037U, // <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0> + 2284471981U, // <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u> + 3356893184U, // <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0> + 2224676966U, // <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS + 3298295985U, // <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6> + 3298345212U, // <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0> + 2224972114U, // <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5> + 3808604907U, // <1,0,6,5>: Cost 4 vsldoi8 <u,4,1,0>, <6,5,7,1> + 3799978808U, // <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6> + 2726237006U, // <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1> + 2224677522U, // <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1> + 2726237176U, // <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0> + 2285815462U, // <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1> + 3805951193U, // <1,0,7,2>: Cost 4 vsldoi8 <u,0,1,0>, <7,2,u,0> + 3807941859U, // <1,0,7,3>: Cost 4 vsldoi8 <u,3,1,0>, <7,3,0,1> + 3799979366U, // <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6> + 3803297165U, // <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0> + 3799979540U, // <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0> + 3799979628U, // <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7> + 2731546240U, // <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0> + 2284494848U, // <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0> + 1683112594U, // <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1> + 1683112605U, // <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS + 2734200772U, // <1,0,u,3>: Cost 3 vsldoi8 <u,3,1,0>, <u,3,1,0> + 2757075629U, // <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1> + 2686425242U, // <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS + 2791948430U, // <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7> + 2736855304U, // <1,0,u,7>: Cost 3 vsldoi8 <u,7,1,0>, <u,7,1,0> + 1683112659U, // <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS + 1610694666U, // <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1> + 1616003174U, // <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS + 2283767958U, // <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2> + 3357507596U, // <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3> + 2689745234U, // <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5> + 3357507922U, // <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5> + 3294397647U, // <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7> + 3373433334U, // <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7> + 1616003730U, // <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1> + 1550221414U, // <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS + 269271142U, // <1,1,1,1>: Cost 1 vspltisw1 LHS + 2287093910U, // <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2> + 2287092615U, // <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3> + 1550224694U, // <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS + 2287092050U, // <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5> + 2689746127U, // <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7> + 2659800138U, // <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1> + 269271142U, // <1,1,1,u>: Cost 1 vspltisw1 LHS + 2222113516U, // <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1> + 2756854663U, // <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3> + 1148371862U, // <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0> + 2689746598U, // <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1> + 2618002742U, // <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS + 2299707730U, // <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5> + 2689746874U, // <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7> + 3361506511U, // <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7> + 1148371862U, // <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0> + 2689747094U, // <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2> + 2691074278U, // <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1> + 3356870806U, // <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2> + 2283126958U, // <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3> + 2689747458U, // <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6> + 3356868946U, // <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5> + 3811265144U, // <1,1,3,6>: Cost 4 vsldoi8 <u,u,1,1>, <3,6,0,7> + 3362841807U, // <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7> + 2689747742U, // <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2> + 2623987814U, // <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS + 2758181931U, // <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5> + 2223408022U, // <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0> + 3697731734U, // <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2> + 2283798784U, // <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4> + 1616006454U, // <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS + 3297379535U, // <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7> + 3373466102U, // <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7> + 1616006697U, // <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS + 2760762479U, // <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1> + 2284470282U, // <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1> + 2284472470U, // <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2> + 3358212270U, // <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3> + 2284470285U, // <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4> + 1210728786U, // <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5> + 2737524834U, // <1,1,5,6>: Cost 3 vsldoi8 <u,u,1,1>, <5,6,7,0> + 3360867535U, // <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7> + 1210728786U, // <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5> + 3697746022U, // <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS + 2756854991U, // <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7> + 2737525242U, // <1,1,6,2>: Cost 3 vsldoi8 <u,u,1,1>, <6,2,7,3> + 3839149281U, // <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7> + 3697749302U, // <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS + 3356893522U, // <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5> + 2283151537U, // <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6> + 2791949566U, // <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0> + 2792613127U, // <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0> + 2737525754U, // <1,1,7,0>: Cost 3 vsldoi8 <u,u,1,1>, <7,0,1,2> + 2291786386U, // <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1> + 3365528292U, // <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2> + 3365528455U, // <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3> + 2737526118U, // <1,1,7,4>: Cost 3 vsldoi8 <u,u,1,1>, <7,4,5,6> + 3365527890U, // <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5> + 3365528377U, // <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6> + 2291786959U, // <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7> + 2737526402U, // <1,1,7,u>: Cost 3 vsldoi8 <u,u,1,1>, <7,u,1,2> + 1550221414U, // <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS + 269271142U, // <1,1,u,1>: Cost 1 vspltisw1 LHS + 1148371862U, // <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0> + 2689750972U, // <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, <u,3,0,1> + 1550224694U, // <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS + 1616009370U, // <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS + 2689751248U, // <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, <u,6,3,7> + 2736863497U, // <1,1,u,7>: Cost 3 vsldoi8 <u,7,1,1>, <u,7,1,1> + 269271142U, // <1,1,u,u>: Cost 1 vspltisw1 LHS + 2702360576U, // <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0> + 1628618854U, // <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS + 2685771949U, // <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2> + 2283765862U, // <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS + 2702360914U, // <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5> + 3788046813U, // <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0> + 2688426481U, // <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2> + 2726249024U, // <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0> + 1628619421U, // <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS + 2690417380U, // <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2> + 2702361396U, // <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1> + 2287093352U, // <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2> + 1213349990U, // <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS + 3764159522U, // <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5> + 3295053672U, // <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6> + 2221311930U, // <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7> + 3799991593U, // <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7> + 1213349995U, // <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS + 2624045158U, // <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS + 2702362144U, // <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2> + 2283120232U, // <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2> + 1225965670U, // <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS + 2624048438U, // <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS + 3356860763U, // <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5> + 2222114746U, // <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7> + 2299708632U, // <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7> + 1225965675U, // <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS + 470597734U, // <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS + 1544340276U, // <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1> + 1544341096U, // <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2> + 1544341916U, // <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3> + 470601014U, // <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS + 1592119300U, // <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5> + 1592119802U, // <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3> + 1592120314U, // <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2> + 470603566U, // <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS + 2708335471U, // <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2> + 3838043908U, // <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5> + 3357541992U, // <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2> + 2283798630U, // <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS + 2726251728U, // <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4> + 1628622134U, // <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS + 3297077178U, // <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7> + 2726251976U, // <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0> + 1628622377U, // <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS + 2714308168U, // <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2> + 3297633827U, // <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5> + 2284471912U, // <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2> + 1210728550U, // <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS + 3776106420U, // <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6> + 2726252548U, // <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5> + 2726252642U, // <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0> + 3799994538U, // <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0> + 1210728555U, // <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS + 2720280865U, // <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2> + 2702365096U, // <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2> + 2726253050U, // <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3> + 2283151462U, // <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS + 3697823030U, // <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS + 3298715497U, // <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7> + 2726253368U, // <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6> + 2724926296U, // <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2> + 2283151467U, // <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS + 1652511738U, // <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2> + 3371500916U, // <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1> + 3365529192U, // <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2> + 2291785830U, // <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS + 2726253926U, // <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6> + 3788051845U, // <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1> + 3794023894U, // <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1> + 2726254119U, // <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1> + 1657820802U, // <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2> + 470638699U, // <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS + 1544381236U, // <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1> + 1544382056U, // <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2> + 1544382614U, // <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2> + 470641974U, // <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS + 1628625050U, // <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS + 1592160762U, // <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3> + 1592161274U, // <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2> + 470644526U, // <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS + 2769389708U, // <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1> + 2685780070U, // <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS + 2685780142U, // <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3> + 2686443775U, // <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3> + 2769684656U, // <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1> + 3357507940U, // <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5> + 3759522294U, // <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7> + 3357509562U, // <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7> + 2685780637U, // <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS + 2287092630U, // <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0> + 2221312230U, // <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1> + 2691752839U, // <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3> + 2287093362U, // <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3> + 2287092634U, // <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4> + 3360835107U, // <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5> + 3759523041U, // <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7> + 2287093690U, // <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7> + 2287092638U, // <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u> + 2222114966U, // <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2> + 2222115057U, // <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3> + 2630092320U, // <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2> + 2685781670U, // <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1> + 2222115330U, // <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6> + 3373449572U, // <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5> + 2222115448U, // <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7> + 2299709370U, // <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7> + 2222115614U, // <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2> + 2771380607U, // <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1> + 3356874468U, // <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, <u,0,3,1> + 3759524168U, // <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0> + 2283792796U, // <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3> + 3356869530U, // <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4> + 3721760428U, // <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3> + 3296496248U, // <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7> + 3356870586U, // <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7> + 2771970503U, // <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1> + 2772044240U, // <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1> + 3362186135U, // <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1> + 3297151280U, // <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3> + 3357542002U, // <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3> + 3357540626U, // <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4> + 2685783350U, // <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS + 3357546622U, // <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, <u,5,3,6> + 3357542330U, // <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7> + 2685783593U, // <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS + 2284471190U, // <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0> + 3358213015U, // <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1> + 2630116899U, // <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5> + 2284471922U, // <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3> + 2284471194U, // <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4> + 2284471843U, // <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5> + 3358218366U, // <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, <u,5,3,6> + 2284472250U, // <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7> + 2284471198U, // <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u> + 2224752790U, // <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2> + 3832736385U, // <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7> + 3703866916U, // <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6> + 3356894834U, // <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3> + 3356894106U, // <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4> + 3356894755U, // <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5> + 3356899130U, // <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, <u,1,3,6> + 2283153338U, // <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7> + 2283153338U, // <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7> + 2774035139U, // <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1> + 3703874767U, // <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7> + 3703875109U, // <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7> + 3365529202U, // <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3> + 3365528474U, // <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4> + 3789387159U, // <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1> + 3865692927U, // <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7> + 3363538874U, // <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7> + 2774625035U, // <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1> + 2284495766U, // <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0> + 2685785902U, // <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS + 2630141478U, // <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u> + 2283169880U, // <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3> + 2284495770U, // <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4> + 2685786266U, // <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS + 2222115448U, // <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7> + 2284496826U, // <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7> + 2685786469U, // <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS + 2684461069U, // <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4> + 2686451814U, // <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS + 3759530159U, // <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4> + 2686451968U, // <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4> + 2684461394U, // <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5> + 1701989266U, // <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1> + 3776119286U, // <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7> + 2689106500U, // <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4> + 1702210477U, // <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1> + 2221312914U, // <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1> + 2691097399U, // <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4> + 3760194454U, // <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0> + 3766166489U, // <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4> + 2334870736U, // <1,4,1,4>: Cost 3 vmrglw <u,u,1,1>, <4,4,4,4> + 1147571510U, // <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS + 3760194794U, // <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7> + 3867315188U, // <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0> + 1147571753U, // <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS + 2222115730U, // <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1> + 2222115812U, // <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2> + 3760195176U, // <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2> + 2702378662U, // <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1> + 2323598544U, // <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4> + 1148374326U, // <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS + 3760195514U, // <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7> + 3373451932U, // <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7> + 1148374569U, // <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS + 2702379160U, // <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4> + 3760195840U, // <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0> + 3776121160U, // <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0> + 3760195996U, // <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3> + 2686454274U, // <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6> + 3356870350U, // <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5> + 3800009392U, // <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0> + 3366824604U, // <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7> + 2707688224U, // <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4> + 2775731368U, // <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0> + 3830820018U, // <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1> + 3691980454U, // <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1> + 3357541282U, // <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3> + 2781039824U, // <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4> + 2686455094U, // <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS + 3357541528U, // <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6> + 3810627020U, // <1,4,4,7>: Cost 4 vsldoi8 <u,7,1,4>, <4,7,5,4> + 2686455337U, // <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS + 2624217190U, // <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS + 2284470309U, // <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1> + 2618246822U, // <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1> + 3358212297U, // <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3> + 2284470312U, // <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4> + 2284470637U, // <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5> + 1683115318U, // <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 3721851898U, // <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2> + 1683115336U, // <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 3794039075U, // <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4> + 3830820186U, // <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7> + 3800011258U, // <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3> + 3807973938U, // <1,4,6,3>: Cost 4 vsldoi8 <u,3,1,4>, <6,3,4,5> + 3298716880U, // <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4> + 2224680246U, // <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS + 3800011576U, // <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6> + 2726269774U, // <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1> + 2224680489U, // <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS + 2726269948U, // <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4> + 3383444141U, // <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1> + 3805983961U, // <1,4,7,2>: Cost 4 vsldoi8 <u,0,1,4>, <7,2,u,0> + 3807974667U, // <1,4,7,3>: Cost 4 vsldoi8 <u,3,1,4>, <7,3,4,5> + 2736887142U, // <1,4,7,4>: Cost 3 vsldoi8 <u,7,1,4>, <7,4,5,6> + 3365528403U, // <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5> + 3800012308U, // <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0> + 3800012396U, // <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7> + 2731579012U, // <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4> + 2624241766U, // <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS + 2686457646U, // <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS + 2618271398U, // <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1> + 2734233544U, // <1,4,u,3>: Cost 3 vsldoi8 <u,3,1,4>, <u,3,1,4> + 2689775679U, // <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, <u,4,5,6> + 1152355638U, // <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS + 1683115561U, // <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 2736888076U, // <1,4,u,7>: Cost 3 vsldoi8 <u,7,1,4>, <u,7,1,4> + 1683115579U, // <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 2687123456U, // <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0> + 1613381734U, // <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS + 3759538352U, // <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5> + 3760865532U, // <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0> + 1613381970U, // <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5> + 2687787427U, // <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5> + 2781777524U, // <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1> + 3733828717U, // <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0> + 1613382301U, // <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS + 2781040271U, // <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1> + 2687124276U, // <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1> + 2687124374U, // <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0> + 3760866297U, // <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0> + 2693096491U, // <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5> + 2687124591U, // <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1> + 2687124723U, // <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7> + 3360834803U, // <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7> + 2687124860U, // <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0> + 2323598792U, // <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0> + 2687125027U, // <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5> + 2687125096U, // <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2> + 2687125158U, // <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1> + 2642185188U, // <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2> + 2323598554U, // <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5> + 2687125434U, // <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7> + 3373450483U, // <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7> + 2687125563U, // <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1> + 2687125654U, // <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2> + 2312990234U, // <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1> + 3760867649U, // <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2> + 2687125916U, // <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3> + 2687126018U, // <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6> + 3386731738U, // <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5> + 3356871170U, // <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6> + 3808643779U, // <1,5,3,7>: Cost 4 vsldoi8 <u,4,1,5>, <3,7,0,1> + 2687126302U, // <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2> + 2642198630U, // <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS + 2687126498U, // <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0> + 3715941923U, // <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5> + 3709970701U, // <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4> + 2687126736U, // <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4> + 1613385014U, // <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS + 2283801090U, // <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6> + 3733861489U, // <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4> + 1613385257U, // <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS + 2624290918U, // <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS + 2624291676U, // <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5> + 3698034211U, // <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5> + 2284471211U, // <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3> + 2624294198U, // <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS + 2284471132U, // <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5> + 2284472834U, // <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6> + 2284471539U, // <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7> + 2284471216U, // <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u> + 2785316900U, // <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1> + 2781040691U, // <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7> + 2734903802U, // <1,5,6,2>: Cost 3 vsldoi8 <u,4,1,5>, <6,2,7,3> + 3848736834U, // <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4> + 3298717620U, // <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6> + 3298717700U, // <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5> + 2734904120U, // <1,5,6,6>: Cost 3 vsldoi8 <u,4,1,5>, <6,6,6,6> + 2781040738U, // <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0> + 2781040747U, // <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0> + 2734904314U, // <1,5,7,0>: Cost 3 vsldoi8 <u,4,1,5>, <7,0,1,2> + 2315677210U, // <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1> + 3808646292U, // <1,5,7,2>: Cost 4 vsldoi8 <u,4,1,5>, <7,2,0,3> + 3808646371U, // <1,5,7,3>: Cost 4 vsldoi8 <u,4,1,5>, <7,3,0,1> + 2734904678U, // <1,5,7,4>: Cost 3 vsldoi8 <u,4,1,5>, <7,4,5,6> + 3389418714U, // <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5> + 3365528656U, // <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6> + 2734904940U, // <1,5,7,7>: Cost 3 vsldoi8 <u,4,1,5>, <7,7,7,7> + 2734904962U, // <1,5,7,u>: Cost 3 vsldoi8 <u,4,1,5>, <7,u,1,2> + 2687129299U, // <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, <u,0,1,2> + 1613387566U, // <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS + 2687129480U, // <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, <u,2,3,3> + 2687129532U, // <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, <u,3,0,1> + 1661163546U, // <1,5,u,4>: Cost 2 vsldoi8 <u,4,1,5>, <u,4,1,5> + 1613387930U, // <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS + 2687129808U, // <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, <u,6,3,7> + 2781040900U, // <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0> + 1613388133U, // <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS + 3759546368U, // <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0> + 2685804646U, // <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS + 2685804721U, // <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6> + 3861270834U, // <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1> + 3759546706U, // <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5> + 2687795620U, // <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6> + 2688459253U, // <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6> + 2283769142U, // <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS + 2685805213U, // <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS + 3698073702U, // <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS + 3759547188U, // <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1> + 2221314554U, // <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3> + 3759547401U, // <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7> + 3698076982U, // <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS + 3767510141U, // <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6> + 2334872376U, // <1,6,1,6>: Cost 3 vmrglw <u,u,1,1>, <6,6,6,6> + 1213353270U, // <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS + 1213353271U, // <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS + 3704053862U, // <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS + 3759547961U, // <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0> + 2222117370U, // <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3> + 3759548070U, // <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1> + 3704057142U, // <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS + 3373451057U, // <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5> + 2685806522U, // <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7> + 1225968950U, // <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS + 1225968951U, // <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS + 3759548566U, // <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2> + 3842912793U, // <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7> + 3759548774U, // <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3> + 3759548828U, // <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3> + 3759548930U, // <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6> + 3809315421U, // <1,6,3,5>: Cost 4 vsldoi8 <u,5,1,6>, <3,5,6,7> + 3386733368U, // <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6> + 2283130166U, // <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS + 2283130167U, // <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS + 3704070246U, // <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS + 3862229608U, // <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5> + 3704071741U, // <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4> + 3721988610U, // <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6> + 3704073526U, // <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS + 2685807926U, // <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS + 3865621141U, // <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5> + 2283801910U, // <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS + 2685808169U, // <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS + 3710050406U, // <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS + 3710051571U, // <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7> + 3405989597U, // <1,6,5,2>: Cost 4 vmrglw <u,4,1,5>, <2,3,6,2> + 3358214502U, // <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3> + 3710053686U, // <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS + 3721998025U, // <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5> + 2332250936U, // <1,6,5,6>: Cost 3 vmrglw <u,4,1,5>, <6,6,6,6> + 1210731830U, // <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS + 1210731831U, // <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS + 2791289597U, // <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1> + 3698115430U, // <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6> + 3698116538U, // <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7> + 3356894132U, // <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3> + 3698117942U, // <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS + 3722006218U, // <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6> + 2781041464U, // <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6> + 2283154742U, // <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS + 2283154743U, // <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS + 1718211406U, // <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1> + 2792026967U, // <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1> + 2765411170U, // <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3> + 3854783336U, // <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0> + 2781041526U, // <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5> + 3365528664U, // <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5> + 2791953290U, // <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7> + 2291789110U, // <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS + 1718801302U, // <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1> + 1718875039U, // <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1> + 2685810478U, // <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS + 2792764337U, // <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1> + 3759552444U, // <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, <u,3,0,1> + 2781041607U, // <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5> + 2685810842U, // <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS + 2689792208U, // <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, <u,6,3,7> + 1210756406U, // <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS + 1210756407U, // <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS + 2793280496U, // <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1> + 2694439014U, // <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS + 3393343912U, // <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2> + 3397325306U, // <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3> + 2793575444U, // <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1> + 3722030797U, // <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0> + 2688467446U, // <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7> + 2689131079U, // <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7> + 2694439570U, // <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1> + 2654265354U, // <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1> + 2794017866U, // <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1> + 3768181639U, // <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3> + 2334872058U, // <1,7,1,3>: Cost 3 vmrglw <u,u,1,1>, <6,2,7,3> + 2654268726U, // <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS + 3792069797U, // <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1> + 2694440143U, // <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7> + 2334872386U, // <1,7,1,7>: Cost 3 vmrglw <u,u,1,1>, <6,6,7,7> + 2695767409U, // <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7> + 2654273638U, // <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS + 2222117973U, // <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3> + 2299711912U, // <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2> + 2654275734U, // <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2> + 2654276918U, // <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS + 3385397675U, // <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5> + 2654278056U, // <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2> + 2323599627U, // <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7> + 2654279470U, // <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS + 2795271395U, // <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1> + 3768183059U, // <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1> + 3728025254U, // <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1> + 3768183196U, // <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3> + 3768183298U, // <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6> + 3792071255U, // <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1> + 3780127361U, // <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7> + 3847779617U, // <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0> + 2795861291U, // <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1> + 2795935028U, // <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1> + 3728032975U, // <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7> + 3839153480U, // <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3> + 3397358074U, // <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3> + 3854783835U, // <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4> + 2694442294U, // <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS + 3786100058U, // <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7> + 3722065254U, // <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6> + 2694442537U, // <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS + 2654298214U, // <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS + 3854783893U, // <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u> + 3710126010U, // <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7> + 2332250618U, // <1,7,5,3>: Cost 3 vmrglw <u,4,1,5>, <6,2,7,3> + 2654301494U, // <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS + 2284474795U, // <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5> + 2718330931U, // <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7> + 2332250946U, // <1,7,5,7>: Cost 3 vmrglw <u,4,1,5>, <6,6,7,7> + 2719658197U, // <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7> + 2332921954U, // <1,7,6,0>: Cost 3 vmrglw <u,5,1,6>, <5,6,7,0> + 3768185254U, // <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0> + 3710134202U, // <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7> + 3710134561U, // <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6> + 3710135606U, // <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS + 3864884745U, // <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7> + 3854784017U, // <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6> + 2791953940U, // <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0> + 2792617501U, // <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0> + 2797925927U, // <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1> + 3365528426U, // <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1> + 3728058022U, // <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1> + 3365528509U, // <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3> + 3854784079U, // <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5> + 3722088148U, // <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7> + 3728060845U, // <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7> + 2781042284U, // <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7> + 2798515823U, // <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1> + 2654322705U, // <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u> + 2694444846U, // <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS + 2299711912U, // <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2> + 2323649018U, // <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3> + 2654326070U, // <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS + 2694445210U, // <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS + 2654327214U, // <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u> + 2323649346U, // <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7> + 2694445413U, // <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS + 1610752017U, // <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u> + 1613406310U, // <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS + 2685821107U, // <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u> + 2283765916U, // <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS + 1613406549U, // <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u> + 1725880054U, // <1,u,0,5>: Cost 2 vsldoi12 <u,0,5,1>, <u,0,5,1> + 2688475639U, // <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u> + 2283769160U, // <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS + 1613406877U, // <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS + 1550221414U, // <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS + 269271142U, // <1,u,1,1>: Cost 1 vspltisw1 LHS + 1683117870U, // <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS + 1213350044U, // <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS + 1550224694U, // <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS + 1147574426U, // <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS + 2687149326U, // <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7> + 1213353288U, // <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS + 269271142U, // <1,u,1,u>: Cost 1 vspltisw1 LHS + 2222118611U, // <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, <u,0,1,2> + 1148376878U, // <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS + 1148371862U, // <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0> + 1225965724U, // <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS + 2222118975U, // <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, <u,4,5,6> + 1148377242U, // <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS + 2687150010U, // <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7> + 1225968968U, // <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS + 1148377445U, // <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS + 471040156U, // <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS + 1544782644U, // <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1> + 1544783464U, // <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2> + 1544784022U, // <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2> + 471043382U, // <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS + 1592561668U, // <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5> + 1592562170U, // <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3> + 1592562682U, // <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2> + 471045934U, // <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS + 2708384629U, // <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u> + 2687151101U, // <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0> + 2223408022U, // <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0> + 2283798684U, // <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS + 2642422785U, // <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4> + 1613409590U, // <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS + 2283801090U, // <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6> + 2283801928U, // <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS + 1613409833U, // <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS + 2284471235U, // <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0> + 2284472046U, // <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1> + 2284472533U, // <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2> + 1210728604U, // <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS + 2284471239U, // <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4> + 1210728786U, // <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5> + 1683118234U, // <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 1210731848U, // <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS + 1210728609U, // <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS + 2720330023U, // <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u> + 2757376190U, // <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, <u,6,1,7> + 2726302202U, // <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3> + 2283151516U, // <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS + 2224972114U, // <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5> + 2224683162U, // <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS + 2726302520U, // <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6> + 2283154760U, // <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS + 2283151521U, // <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS + 1652560896U, // <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u> + 2333590225U, // <1,u,7,1>: Cost 3 vmrglw <u,6,1,7>, <0,u,u,1> + 2765412628U, // <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <u,7,2,3> + 2291785884U, // <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS + 2781042984U, // <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <u,7,4,5> + 3365527953U, // <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5> + 2791954748U, // <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <u,7,6,7> + 2291789128U, // <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS + 1657869960U, // <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u> + 471081121U, // <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS + 269271142U, // <1,u,u,1>: Cost 1 vspltisw1 LHS + 1544824424U, // <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2> + 1544824982U, // <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2> + 471084342U, // <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS + 1613412506U, // <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS + 1683118477U, // <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS + 1210756424U, // <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS + 471086894U, // <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS + 2226757632U, // <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0> + 2226757734U, // <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS + 3826622483U, // <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1> + 3843211292U, // <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1> + 3300499794U, // <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5> + 3356256724U, // <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5> + 3825664056U, // <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2> + 3762889289U, // <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0> + 2226758301U, // <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS + 2227429386U, // <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1> + 2227429478U, // <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS + 1691156582U, // <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS + 2666358997U, // <2,0,1,3>: Cost 3 vsldoi4 <u,2,0,1>, <3,0,u,2> + 2227462482U, // <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5> + 3722186464U, // <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1> + 3867099278U, // <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7> + 3366881912U, // <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7> + 1691156636U, // <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS + 2228027392U, // <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0> + 1154285670U, // <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS + 2228027565U, // <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2> + 3301769468U, // <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0> + 2228027730U, // <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5> + 3301769635U, // <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5> + 3780806586U, // <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7> + 3368880760U, // <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7> + 1154286237U, // <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS + 1213440000U, // <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0> + 1213441702U, // <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1> + 2228535470U, // <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3> + 2636515632U, // <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3> + 2287182962U, // <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4> + 2660405346U, // <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0> + 2228535798U, // <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7> + 2660406420U, // <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3> + 1213441709U, // <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u> + 3368894464U, // <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0> + 2764898642U, // <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5> + 3826622811U, // <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5> + 3843211620U, // <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5> + 3838640493U, // <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5> + 2732944694U, // <2,0,4,5>: Cost 3 vsldoi8 <u,1,2,0>, RHS + 3797396857U, // <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2> + 3867099528U, // <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5> + 2764898705U, // <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5> + 3364257792U, // <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0> + 2230124646U, // <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS + 3304235184U, // <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5> + 3364260144U, // <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3> + 3303817554U, // <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5> + 3364260146U, // <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5> + 3867099602U, // <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7> + 3364260472U, // <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7> + 2230125213U, // <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS + 2230796288U, // <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0> + 1157054566U, // <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS + 2230796465U, // <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6> + 3304538364U, // <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0> + 2230796626U, // <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5> + 3797398205U, // <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0> + 3304538614U, // <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7> + 3798725471U, // <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0> + 1157055133U, // <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS + 3371573248U, // <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0> + 2231189606U, // <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS + 3801380003U, // <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0> + 3802043636U, // <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0> + 3806688614U, // <2,0,7,4>: Cost 4 vsldoi8 <u,1,2,0>, <7,4,5,6> + 3356317308U, // <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5> + 3804034535U, // <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0> + 3806688876U, // <2,0,7,7>: Cost 4 vsldoi8 <u,1,2,0>, <7,7,7,7> + 2231190173U, // <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS + 1208836096U, // <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0> + 1208837798U, // <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1> + 1691157149U, // <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS + 2636556597U, // <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u> + 2282579625U, // <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4> + 2660446306U, // <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0> + 2228535798U, // <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7> + 2660447385U, // <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u> + 1208837805U, // <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u> + 3692388523U, // <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0> + 2757526244U, // <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2> + 2330290974U, // <2,1,0,2>: Cost 3 vmrglw <u,1,2,0>, <3,u,1,2> + 3843212020U, // <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0> + 3692391734U, // <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS + 3300533362U, // <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4> + 3794084337U, // <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2> + 3374170614U, // <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7> + 2758042403U, // <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2> + 2690482924U, // <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1> + 2764899124U, // <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1> + 2695791510U, // <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0> + 3362235271U, // <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3> + 3692399926U, // <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS + 3832226649U, // <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2> + 3301205235U, // <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7> + 3768870179U, // <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1> + 2695791988U, // <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1> + 2618663085U, // <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2> + 2228028212U, // <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1> + 2618664552U, // <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2> + 2759000984U, // <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2> + 2618666294U, // <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS + 2295136594U, // <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5> + 3769534376U, // <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7> + 2793358266U, // <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0> + 2618668846U, // <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS + 2282536969U, // <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0> + 1208795146U, // <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1> + 1213442198U, // <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2> + 2287181998U, // <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3> + 2618674486U, // <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS + 1208795474U, // <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5> + 2287182001U, // <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6> + 2287183055U, // <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7> + 1208795153U, // <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u> + 3692421295U, // <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4> + 3838641195U, // <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5> + 2330323742U, // <2,1,4,2>: Cost 3 vmrglw <u,1,2,4>, <3,u,1,2> + 3692423318U, // <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2> + 3692424502U, // <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS + 2695793974U, // <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS + 3799395705U, // <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2> + 3368895695U, // <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7> + 2695794217U, // <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS + 3692429488U, // <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5> + 3364257802U, // <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1> + 3692431253U, // <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6> + 3692431874U, // <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6> + 3692432694U, // <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS + 3364258130U, // <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5> + 3303875827U, // <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7> + 3867100333U, // <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0> + 3692435246U, // <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS + 2618695857U, // <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6> + 2230797108U, // <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1> + 2618697658U, // <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7> + 3692439702U, // <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2> + 2618699062U, // <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS + 3364929874U, // <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5> + 3692442424U, // <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6> + 3798733664U, // <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1> + 2618701614U, // <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS + 3799397370U, // <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2> + 3371573258U, // <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1> + 2330351234U, // <2,1,7,2>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2> + 3799397658U, // <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2> + 3799397734U, // <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6> + 3371573586U, // <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5> + 3799397870U, // <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7> + 3799397956U, // <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3> + 2330351234U, // <2,1,7,u>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2> + 2282577929U, // <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0> + 1208836106U, // <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1> + 1208838294U, // <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2> + 2282578094U, // <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3> + 2282577933U, // <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4> + 1208836434U, // <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5> + 2282578097U, // <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6> + 2287224015U, // <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7> + 1208836113U, // <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u> + 2226759117U, // <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0> + 1624047718U, // <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS + 2697789613U, // <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2> + 2226767526U, // <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1> + 2697789778U, // <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5> + 3300657000U, // <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6> + 2226988986U, // <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7> + 3734271139U, // <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0> + 1624048285U, // <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS + 3831268868U, // <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1> + 2293138804U, // <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1> + 2697790358U, // <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0> + 2293137510U, // <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS + 3771532331U, // <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5> + 3767551106U, // <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2> + 3301173178U, // <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7> + 3372853169U, // <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7> + 2293137515U, // <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS + 1556938854U, // <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS + 2295137733U, // <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1> + 336380006U, // <2,2,2,2>: Cost 1 vspltisw2 LHS + 1221394534U, // <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS + 1556942134U, // <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS + 2295138061U, // <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5> + 2228029370U, // <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7> + 2660545701U, // <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2> + 336380006U, // <2,2,2,u>: Cost 1 vspltisw2 LHS + 2697791638U, // <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2> + 2765489840U, // <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2> + 1213441640U, // <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2> + 135053414U, // <2,2,3,3>: Cost 1 vmrglw LHS, LHS + 2697792002U, // <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6> + 2330313780U, // <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5> + 2287183549U, // <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6> + 2660553894U, // <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3> + 135053419U, // <2,2,3,u>: Cost 1 vmrglw LHS, LHS + 2630697062U, // <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS + 3771534282U, // <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3> + 2764900109U, // <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5> + 2295152742U, // <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS + 2295154282U, // <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4> + 1624050998U, // <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS + 2229675962U, // <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7> + 3368896433U, // <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7> + 1624051241U, // <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS + 3771534920U, // <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2> + 3364258540U, // <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1> + 2296489576U, // <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2> + 2290516070U, // <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS + 3771535284U, // <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6> + 2290517044U, // <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5> + 2697793634U, // <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0> + 3370231729U, // <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7> + 2290516075U, // <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS + 2230797801U, // <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1> + 3304539679U, // <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1> + 2764900273U, // <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7> + 2764900282U, // <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7> + 2230798129U, // <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5> + 3304540008U, // <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6> + 1157056442U, // <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7> + 2725000033U, // <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2> + 1157056442U, // <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7> + 2793359338U, // <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1> + 3371574725U, // <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1> + 2297833064U, // <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2> + 2297831526U, // <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS + 2697794918U, // <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6> + 3371575053U, // <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5> + 3304933297U, // <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7> + 2297833393U, // <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7> + 2297831531U, // <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS + 1556938854U, // <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS + 1624053550U, // <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS + 336380006U, // <2,2,u,2>: Cost 1 vspltisw2 LHS + 135094374U, // <2,2,u,3>: Cost 1 vmrglw LHS, LHS + 1556942134U, // <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS + 1624053914U, // <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS + 1157056442U, // <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7> + 2660594859U, // <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u> + 135094379U, // <2,2,u,u>: Cost 1 vmrglw LHS, LHS + 1611448320U, // <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0> + 537706598U, // <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS + 2689835181U, // <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2> + 2689835260U, // <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0> + 1611448658U, // <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5> + 2732966354U, // <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7> + 2732966390U, // <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7> + 2660603052U, // <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0> + 537707165U, // <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS + 2689835748U, // <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2> + 1611449140U, // <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1> + 1611449238U, // <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0> + 3763577805U, // <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1> + 2689836112U, // <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6> + 2689836143U, // <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1> + 2689836239U, // <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7> + 3366881210U, // <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7> + 1616094588U, // <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0> + 2689836493U, // <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0> + 2685191711U, // <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1> + 1611449960U, // <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2> + 1611450022U, // <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1> + 2689836822U, // <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5> + 2689836904U, // <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6> + 1611450298U, // <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7> + 2295138234U, // <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7> + 1611450456U, // <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3> + 1213440918U, // <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0> + 2282538527U, // <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1> + 1557022322U, // <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3> + 1208796786U, // <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3> + 1213440922U, // <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4> + 2282538531U, // <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5> + 2287188094U, // <2,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6> + 1213441978U, // <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7> + 1208796791U, // <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u> + 1551056998U, // <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS + 1551057818U, // <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4> + 2624800360U, // <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2> + 2624800918U, // <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2> + 1551060278U, // <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS + 537709878U, // <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS + 2732969337U, // <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2> + 2660635824U, // <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4> + 537710121U, // <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS + 2689838664U, // <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2> + 2732969615U, // <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1> + 2732969707U, // <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3> + 3763580721U, // <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1> + 2689839028U, // <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6> + 1659228164U, // <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5> + 1659228258U, // <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0> + 3364259770U, // <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7> + 1659228420U, // <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0> + 2230798486U, // <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2> + 2732970407U, // <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1> + 1659228666U, // <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3> + 2230798748U, // <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3> + 2230798850U, // <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6> + 2732970731U, // <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1> + 1659228984U, // <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6> + 1659229006U, // <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1> + 1659229087U, // <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1> + 1659229178U, // <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2> + 2726999125U, // <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3> + 2727662758U, // <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3> + 2732971235U, // <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1> + 1659229542U, // <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6> + 2732971446U, // <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5> + 2732971484U, // <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7> + 1659229804U, // <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7> + 1659229826U, // <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2> + 1208837014U, // <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0> + 537712430U, // <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS + 1616099205U, // <2,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,0> + 1208837746U, // <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3> + 1208837018U, // <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4> + 537712794U, // <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS + 1616099536U, // <2,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7> + 1208838074U, // <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7> + 537712997U, // <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS + 3771547648U, // <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0> + 2697805926U, // <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS + 3770884269U, // <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2> + 3806716164U, // <2,4,0,3>: Cost 4 vsldoi8 <u,1,2,4>, <0,3,1,u> + 3771547986U, // <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5> + 2226761014U, // <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS + 3853462427U, // <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1> + 3867102116U, // <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1> + 2226761257U, // <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS + 3849186231U, // <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2> + 3301207010U, // <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0> + 3766240150U, // <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0> + 3766240226U, // <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4> + 3301207248U, // <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4> + 2227432758U, // <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS + 3758941400U, // <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7> + 3768894758U, // <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4> + 2227433001U, // <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS + 2228030354U, // <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1> + 3770885657U, // <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4> + 2697807466U, // <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4> + 3368880468U, // <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3> + 2228030672U, // <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4> + 1154288950U, // <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS + 3771549617U, // <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7> + 3368880796U, // <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7> + 1154289193U, // <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS + 2636808294U, // <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS + 2287181861U, // <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1> + 2228866102U, // <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3> + 2636810580U, // <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3> + 1256574160U, // <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4> + 1213441742U, // <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5> + 2228866430U, // <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7> + 2660701368U, // <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3> + 1213441745U, // <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u> + 3704586342U, // <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS + 3782831051U, // <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4> + 3704587900U, // <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4> + 3368896123U, // <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3> + 2793360592U, // <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4> + 2697809206U, // <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS + 3303198078U, // <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7> + 3867102444U, // <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5> + 2697809449U, // <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS + 2630852710U, // <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS + 2624881572U, // <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5> + 2630854269U, // <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5> + 2666686677U, // <2,4,5,3>: Cost 3 vsldoi4 <u,2,4,5>, <3,0,u,2> + 2630855990U, // <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS + 2230127926U, // <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS + 1691159862U, // <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS + 3867102520U, // <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0> + 1691159880U, // <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS + 2230799250U, // <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1> + 3304541130U, // <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3> + 2230799417U, // <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6> + 3304541323U, // <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7> + 2230799568U, // <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4> + 1157057846U, // <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS + 3304541566U, // <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7> + 3798758243U, // <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4> + 1157058089U, // <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS + 3806721018U, // <2,4,7,0>: Cost 4 vsldoi8 <u,1,2,4>, <7,0,1,2> + 3853831590U, // <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2> + 3801412775U, // <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4> + 3802076408U, // <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4> + 3401436368U, // <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4> + 2793360840U, // <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0> + 3804067307U, // <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4> + 3867102682U, // <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0> + 2793360867U, // <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0> + 2630877286U, // <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS + 2282580144U, // <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1> + 2630878848U, // <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u> + 2636851545U, // <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u> + 1256615120U, // <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4> + 1208837838U, // <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5> + 1691160105U, // <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS + 2660742333U, // <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u> + 1208837841U, // <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u> + 3766910976U, // <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0> + 2693169254U, // <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS + 3760939181U, // <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2> + 3843214936U, // <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0> + 3760939355U, // <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5> + 3867102827U, // <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1> + 3867102836U, // <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1> + 3867102844U, // <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0> + 2693169821U, // <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS + 3766911724U, // <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1> + 3766911796U, // <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1> + 2693170070U, // <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0> + 3384798262U, // <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3> + 2693170228U, // <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5> + 3301208068U, // <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5> + 3366879607U, // <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6> + 3867102925U, // <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0> + 2695824760U, // <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5> + 2642845798U, // <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS + 2295139218U, // <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1> + 2699142760U, // <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2> + 3766912678U, // <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1> + 2699142925U, // <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5> + 2228031492U, // <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5> + 2295138818U, // <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6> + 3368879347U, // <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7> + 2295138820U, // <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u> + 2287184866U, // <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0> + 1256573842U, // <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1> + 2642855630U, // <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5> + 2287182763U, // <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3> + 2287184870U, // <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4> + 1256574170U, // <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5> + 1213442562U, // <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6> + 2287183091U, // <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7> + 1213442564U, // <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u> + 3716604006U, // <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS + 3716604822U, // <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0> + 3766914099U, // <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0> + 3368895403U, // <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3> + 3716607031U, // <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4> + 2693172534U, // <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS + 3363588610U, // <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6> + 3368895731U, // <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7> + 2693172777U, // <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS + 3704668262U, // <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS + 3704669078U, // <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0> + 3704669830U, // <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5> + 3364259460U, // <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3> + 3704671542U, // <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS + 2793361412U, // <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5> + 3364258167U, // <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6> + 3867103249U, // <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0> + 2793361412U, // <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5> + 2642878566U, // <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS + 3386166810U, // <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1> + 2723033594U, // <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3> + 3848523842U, // <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4> + 2723033713U, // <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5> + 2230800388U, // <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5> + 2230800482U, // <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0> + 2785841252U, // <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2> + 2785914989U, // <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2> + 3796775930U, // <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2> + 3800757335U, // <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5> + 3853463689U, // <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3> + 3796776218U, // <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2> + 3796776294U, // <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6> + 3803411867U, // <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5> + 3371575081U, // <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6> + 3796776516U, // <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3> + 3371575083U, // <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u> + 2287225826U, // <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0> + 1256614802U, // <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1> + 2642896590U, // <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5> + 2287223723U, // <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3> + 2287225830U, // <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4> + 1256615130U, // <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5> + 1208838658U, // <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6> + 2287224051U, // <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7> + 1208838660U, // <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u> + 3772227584U, // <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0> + 2698485862U, // <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS + 3759620282U, // <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6> + 3710675299U, // <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0> + 3767583058U, // <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5> + 3378153265U, // <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5> + 3865186637U, // <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1> + 2330291510U, // <2,6,0,7>: Cost 3 vmrglw <u,1,2,0>, RHS + 2698486429U, // <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS + 3734569062U, // <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS + 3764929346U, // <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6> + 3772228502U, // <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0> + 3734571158U, // <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2> + 3734572342U, // <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS + 3767583878U, // <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6> + 3768247511U, // <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6> + 2293140790U, // <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS + 2293140791U, // <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS + 3704717414U, // <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS + 3395424589U, // <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1> + 2228031993U, // <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2> + 2698487485U, // <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6> + 3704720694U, // <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS + 3773556575U, // <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6> + 2698487738U, // <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7> + 1221397814U, // <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS + 1221397815U, // <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS + 2636955750U, // <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS + 2330314217U, // <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1> + 2636957626U, // <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7> + 2287184230U, // <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3> + 2636959030U, // <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS + 2648903448U, // <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3> + 1256575800U, // <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6> + 135056694U, // <2,6,3,7>: Cost 1 vmrglw LHS, RHS + 135056695U, // <2,6,3,u>: Cost 1 vmrglw LHS, RHS + 3710705766U, // <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS + 3698762677U, // <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4> + 3710707389U, // <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6> + 3710708071U, // <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4> + 3710709046U, // <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS + 2698489142U, // <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS + 3796782457U, // <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2> + 2295156022U, // <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS + 2295156023U, // <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS + 3303870753U, // <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2> + 3788820134U, // <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6> + 3779530520U, // <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3> + 3303871026U, // <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5> + 3303871117U, // <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6> + 3791474666U, // <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6> + 3792138299U, // <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6> + 2290519350U, // <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS + 2290519351U, // <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS + 2631008358U, // <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS + 3372893673U, // <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1> + 2791445264U, // <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2> + 2230800968U, // <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0> + 2631011638U, // <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS + 3372894001U, // <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5> + 2793362232U, // <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6> + 2295835958U, // <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS + 2295835959U, // <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS + 2793362254U, // <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1> + 2792035160U, // <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2> + 2792108897U, // <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2> + 2769474408U, // <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0> + 2793362294U, // <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5> + 3371575089U, // <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5> + 2792403845U, // <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2> + 2297834806U, // <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS + 2297834807U, // <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS + 2636996710U, // <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS + 2698491694U, // <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS + 2636998631U, // <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7> + 2282580326U, // <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3> + 2636999990U, // <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS + 2698492058U, // <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS + 1256616760U, // <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6> + 135097654U, // <2,6,u,7>: Cost 1 vmrglw LHS, RHS + 135097655U, // <2,6,u,u>: Cost 1 vmrglw LHS, RHS + 2666864742U, // <2,7,0,0>: Cost 3 vsldoi4 <u,2,7,0>, LHS + 1719620602U, // <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2> + 3768254637U, // <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2> + 3393417722U, // <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3> + 2666868022U, // <2,7,0,4>: Cost 3 vsldoi4 <u,2,7,0>, RHS + 3867104290U, // <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6> + 3728667127U, // <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0> + 2666869817U, // <2,7,0,7>: Cost 3 vsldoi4 <u,2,7,0>, <7,0,u,2> + 1720136761U, // <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2> + 3728670822U, // <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS + 3774227252U, // <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1> + 3774227350U, // <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0> + 2323001850U, // <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3> + 3728674102U, // <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS + 3774227567U, // <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1> + 2694513880U, // <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7> + 3396744002U, // <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7> + 2323001850U, // <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3> + 2654937190U, // <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS + 3728679732U, // <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1> + 2700486248U, // <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2> + 2321682938U, // <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3> + 2654940470U, // <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS + 3859584196U, // <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6> + 2700486577U, // <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7> + 2228033132U, // <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7> + 2701813843U, // <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7> + 1581203558U, // <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS + 2654946100U, // <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1> + 2637031354U, // <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7> + 1256575482U, // <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3> + 1581206838U, // <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS + 2654949380U, // <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5> + 1581208058U, // <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3> + 1256575810U, // <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7> + 1581209390U, // <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS + 3728695398U, // <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS + 3869758782U, // <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2> + 3728696936U, // <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2> + 3393450490U, // <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3> + 3728698678U, // <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS + 2700487990U, // <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS + 3728699899U, // <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4> + 3867104626U, // <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0> + 2700488233U, // <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS + 3855160709U, // <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1> + 3728704406U, // <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0> + 3370233956U, // <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2> + 2320380410U, // <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3> + 3728706870U, // <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS + 3867104694U, // <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5> + 3792146492U, // <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7> + 3394122562U, // <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7> + 2320380410U, // <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3> + 2230801402U, // <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2> + 3768258984U, // <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2> + 2730349050U, // <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3> + 3372894575U, // <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3> + 2230801766U, // <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6> + 3304543670U, // <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5> + 3728716285U, // <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6> + 2230802028U, // <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7> + 2730349050U, // <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3> + 2793362983U, // <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1> + 3728721112U, // <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7> + 3371574933U, // <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2> + 2327695866U, // <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3> + 3728723254U, // <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS + 3371574855U, // <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5> + 2730350062U, // <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7> + 2793363052U, // <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7> + 2798671471U, // <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1> + 1581244518U, // <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS + 1724929666U, // <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2> + 2637072314U, // <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7> + 1256616442U, // <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3> + 1581247798U, // <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS + 2700490906U, // <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS + 1581249023U, // <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u> + 1256616770U, // <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7> + 1581250350U, // <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS + 1611489280U, // <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0> + 537747563U, // <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS + 2685231277U, // <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2> + 2685231356U, // <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0> + 1611489618U, // <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5> + 2226763930U, // <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS + 2733007350U, // <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7> + 2660971737U, // <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0> + 537748125U, // <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS + 2689876708U, // <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2> + 1611490100U, // <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1> + 1611490198U, // <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0> + 2293137564U, // <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS + 2689877072U, // <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6> + 2689877103U, // <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1> + 2689877199U, // <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7> + 2293140808U, // <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS + 1616135548U, // <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0> + 1556938854U, // <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS + 1154291502U, // <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS + 336380006U, // <2,u,2,2>: Cost 1 vspltisw2 LHS + 1611490982U, // <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1> + 1556942134U, // <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS + 1154291866U, // <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS + 1611491258U, // <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7> + 1221397832U, // <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS + 336380006U, // <2,u,2,u>: Cost 1 vspltisw2 LHS + 1611491478U, // <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2> + 1213440073U, // <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1> + 1213442261U, // <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2> + 135053468U, // <2,u,3,3>: Cost 1 vmrglw LHS, LHS + 1611491842U, // <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6> + 1213440401U, // <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5> + 1213442589U, // <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6> + 135056712U, // <2,u,3,7>: Cost 1 vmrglw LHS, RHS + 135053473U, // <2,u,3,u>: Cost 1 vmrglw LHS, LHS + 1551425638U, // <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS + 1551426503U, // <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4> + 2625169000U, // <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2> + 2625169558U, // <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2> + 1551428918U, // <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS + 537750838U, // <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS + 2733010297U, // <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2> + 2295156040U, // <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS + 537751081U, // <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS + 2689879624U, // <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2> + 2230130478U, // <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS + 2631149217U, // <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5> + 2290516124U, // <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS + 2689879988U, // <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6> + 1659269124U, // <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5> + 1691162778U, // <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS + 2290519368U, // <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS + 1691162796U, // <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS + 2230802131U, // <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, <u,0,1,2> + 1157060398U, // <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS + 1659269626U, // <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3> + 2764904656U, // <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <u,6,3,7> + 2230802495U, // <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, <u,4,5,6> + 1157060762U, // <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS + 1659269944U, // <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6> + 1659269966U, // <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1> + 1157060965U, // <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS + 1659270138U, // <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2> + 2727040090U, // <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u> + 2727703723U, // <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u> + 2297831580U, // <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS + 1659270502U, // <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6> + 2733012406U, // <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5> + 2730358255U, // <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u> + 1659270764U, // <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7> + 1659270786U, // <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2> + 1213481923U, // <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0> + 537753390U, // <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS + 336380006U, // <2,u,u,2>: Cost 1 vspltisw2 LHS + 135094428U, // <2,u,u,3>: Cost 1 vmrglw LHS, LHS + 1213481927U, // <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4> + 537753754U, // <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS + 1208838685U, // <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6> + 135097672U, // <2,u,u,7>: Cost 1 vmrglw LHS, RHS + 135094433U, // <2,u,u,u>: Cost 1 vmrglw LHS, LHS + 1678557184U, // <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0> + 1678557194U, // <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1> + 2631181989U, // <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0> + 2289223984U, // <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3> + 2756943909U, // <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1> + 3362965729U, // <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5> + 3362966054U, // <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6> + 2289224312U, // <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7> + 1683202121U, // <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1> + 1557446758U, // <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS + 2752741467U, // <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1> + 604815462U, // <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS + 2631190676U, // <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0> + 1557450038U, // <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS + 2667024388U, // <3,0,1,5>: Cost 3 vsldoi4 <u,3,0,1>, <5,5,5,5> + 2800074894U, // <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7> + 2661053667U, // <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1> + 604815516U, // <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS + 2696521165U, // <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0> + 2752741549U, // <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2> + 2691876456U, // <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2> + 2691876518U, // <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1> + 3830685895U, // <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1> + 3765618536U, // <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6> + 2691876794U, // <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7> + 2701166596U, // <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0> + 2756944108U, // <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2> + 2691877014U, // <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2> + 1161003110U, // <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS + 2691877168U, // <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3> + 2691877246U, // <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0> + 2691877378U, // <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6> + 3765619238U, // <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6> + 2691877496U, // <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7> + 3368962680U, // <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7> + 1161003677U, // <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS + 2289254400U, // <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0> + 1678557522U, // <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5> + 2631214761U, // <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4> + 2235580672U, // <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4> + 2756944237U, // <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5> + 1618136374U, // <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS + 3309322742U, // <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7> + 3362998904U, // <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7> + 1683202449U, // <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5> + 3765620296U, // <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2> + 2752299427U, // <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5> + 3789508346U, // <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0> + 3403486842U, // <3,0,5,3>: Cost 4 vmrglw <u,0,3,5>, <7,u,0,3> + 3765620660U, // <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6> + 2733682692U, // <3,0,5,5>: Cost 3 vsldoi8 <u,2,3,0>, <5,5,5,5> + 2800075218U, // <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7> + 3873817044U, // <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0> + 2800075234U, // <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5> + 2752299501U, // <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7> + 2236547174U, // <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS + 2733683194U, // <3,0,6,2>: Cost 3 vsldoi8 <u,2,3,0>, <6,2,7,3> + 3844473352U, // <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7> + 3310289234U, // <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5> + 3873817114U, // <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7> + 2733683512U, // <3,0,6,6>: Cost 3 vsldoi8 <u,2,3,0>, <6,6,6,6> + 2725057384U, // <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0> + 2236547741U, // <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS + 2297905152U, // <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0> + 2297906854U, // <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1> + 2727711916U, // <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0> + 3371649328U, // <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3> + 2733684070U, // <3,0,7,4>: Cost 3 vsldoi8 <u,2,3,0>, <7,4,5,6> + 3734843490U, // <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0> + 3798799895U, // <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3> + 2733684332U, // <3,0,7,7>: Cost 3 vsldoi8 <u,2,3,0>, <7,7,7,7> + 2297906861U, // <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u> + 1557504102U, // <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS + 1678557842U, // <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1> + 604816029U, // <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS + 2691880892U, // <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, <u,3,0,1> + 1557507382U, // <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS + 1618139290U, // <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS + 2691881168U, // <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, <u,6,3,7> + 2661111018U, // <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u> + 604816083U, // <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS + 2619310332U, // <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0> + 2756944612U, // <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2> + 2289221724U, // <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2> + 2619312278U, // <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2> + 2619313462U, // <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS + 2289221970U, // <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5> + 2232599768U, // <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7> + 3362964687U, // <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7> + 2619316014U, // <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS + 2756944683U, // <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1> + 1678558004U, // <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1> + 2691883927U, // <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1> + 3826631496U, // <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3> + 2756944723U, // <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5> + 2756944732U, // <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5> + 3830686561U, // <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1> + 3734869228U, // <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1> + 1678558004U, // <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1> + 2696529358U, // <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1> + 2756944775U, // <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3> + 2294548630U, // <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2> + 1678558102U, // <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0> + 2631273782U, // <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS + 2756944811U, // <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3> + 3830686644U, // <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3> + 2800075706U, // <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0> + 1679000515U, // <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0> + 2619334911U, // <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3> + 2295218186U, // <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1> + 2293229718U, // <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2> + 2619337116U, // <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3> + 2619338038U, // <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS + 2295218514U, // <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5> + 3830686729U, // <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7> + 3368961231U, // <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7> + 2619340590U, // <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS + 2619343104U, // <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4> + 2289254410U, // <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1> + 2289256598U, // <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2> + 2619345410U, // <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6> + 2619346230U, // <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS + 2756944976U, // <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6> + 3362996401U, // <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6> + 3362997455U, // <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7> + 2619348782U, // <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS + 2756945007U, // <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1> + 3830686840U, // <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1> + 3358361750U, // <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2> + 3830686857U, // <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0> + 2756945047U, // <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5> + 2294571346U, // <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5> + 3806105698U, // <3,1,5,6>: Cost 4 vsldoi8 <u,0,3,1>, <5,6,7,0> + 3873817774U, // <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1> + 2756945079U, // <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1> + 3830686912U, // <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1> + 2756945103U, // <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7> + 2236547990U, // <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0> + 3826631905U, // <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7> + 3830686952U, // <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5> + 2756945139U, // <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7> + 3830686972U, // <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7> + 2800076030U, // <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0> + 2756945166U, // <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7> + 3699081318U, // <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS + 2297905162U, // <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1> + 2297907350U, // <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2> + 3365675182U, // <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3> + 3699084598U, // <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS + 2297905490U, // <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5> + 2297905329U, // <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6> + 3368330447U, // <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7> + 2297905169U, // <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u> + 2619375876U, // <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u> + 1678558004U, // <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1> + 2289289366U, // <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2> + 1679000956U, // <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0> + 2619378998U, // <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS + 2756945297U, // <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3> + 2297905329U, // <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6> + 2800076192U, // <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0> + 1683203497U, // <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0> + 3362964203U, // <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0> + 2289222380U, // <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1> + 2289222462U, // <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2> + 1215479910U, // <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS + 3362964207U, // <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4> + 2289222708U, // <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5> + 2232600506U, // <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7> + 3396142296U, // <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7> + 1215479915U, // <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS + 3699105894U, // <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS + 3765633844U, // <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1> + 2691892120U, // <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2> + 2752300575U, // <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1> + 3699109174U, // <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS + 3830687280U, // <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0> + 3830687289U, // <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0> + 3874260548U, // <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2> + 2752742988U, // <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1> + 2631344230U, // <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS + 2697201184U, // <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2> + 1678558824U, // <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2> + 1678558834U, // <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3> + 2631347510U, // <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS + 3368953613U, // <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5> + 2234304442U, // <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7> + 3368953777U, // <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7> + 1679001247U, // <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3> + 1678558886U, // <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1> + 2752300719U, // <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1> + 2752300729U, // <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2> + 1221476454U, // <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS + 1678558926U, // <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5> + 2800076503U, // <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5> + 2234746810U, // <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7> + 2800076516U, // <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0> + 1678558958U, // <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1> + 3699130470U, // <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS + 3362996972U, // <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1> + 2289256040U, // <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2> + 1215512678U, // <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS + 3362998676U, // <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4> + 2691894582U, // <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS + 2235582394U, // <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7> + 3734967544U, // <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4> + 1215512683U, // <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS + 3705110630U, // <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS + 3368313985U, // <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1> + 3368314472U, // <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2> + 2756945768U, // <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6> + 3705113910U, // <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS + 3310061416U, // <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6> + 3310135226U, // <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7> + 3370305457U, // <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7> + 2752743317U, // <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6> + 2631376998U, // <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS + 3705119540U, // <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1> + 2631378621U, // <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6> + 1678559162U, // <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7> + 2631380278U, // <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS + 3370976956U, // <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5> + 2237065146U, // <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7> + 3798815594U, // <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2> + 1679001575U, // <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7> + 2800076778U, // <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1> + 3371647724U, // <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1> + 2297906792U, // <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2> + 1224163430U, // <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS + 3705130294U, // <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS + 3371648052U, // <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5> + 2297906877U, // <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6> + 3371648702U, // <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7> + 1224163435U, // <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS + 1679001659U, // <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1> + 2752743492U, // <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1> + 1678558824U, // <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2> + 1678559320U, // <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3> + 1679001699U, // <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5> + 2691897498U, // <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS + 2237908922U, // <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7> + 2800519289U, // <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0> + 1679001731U, // <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1> + 1215480726U, // <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0> + 1678559382U, // <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2> + 2631403200U, // <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0> + 2289223282U, // <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3> + 2752301232U, // <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1> + 3362965027U, // <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5> + 3362965352U, // <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6> + 2289223610U, // <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7> + 1678559445U, // <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2> + 3830687964U, // <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0> + 2752301286U, // <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1> + 2752301297U, // <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3> + 2305157532U, // <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3> + 3830688000U, // <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0> + 3830688009U, // <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0> + 3830688019U, // <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1> + 3362973626U, // <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7> + 2752743719U, // <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3> + 2631417958U, // <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS + 3826043193U, // <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3> + 1624131186U, // <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3> + 2752301384U, // <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0> + 2631421238U, // <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS + 3826485602U, // <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u> + 2752301414U, // <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3> + 2771249519U, // <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3> + 1628112984U, // <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3> + 1563656294U, // <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS + 2301855911U, // <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1> + 2697873730U, // <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3> + 403488870U, // <3,3,3,3>: Cost 1 vspltisw3 LHS + 1563659574U, // <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS + 2301856239U, // <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5> + 2697874067U, // <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7> + 2295220154U, // <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7> + 403488870U, // <3,3,3,u>: Cost 1 vspltisw3 LHS + 2289255318U, // <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0> + 2631435162U, // <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4> + 2631435972U, // <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4> + 2289256050U, // <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3> + 1215513498U, // <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4> + 1679002114U, // <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6> + 3362998120U, // <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6> + 2289256378U, // <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7> + 1679002141U, // <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6> + 3831130657U, // <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1> + 3376277671U, // <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1> + 3771617012U, // <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3> + 2302536092U, // <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3> + 3831130697U, // <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5> + 2294572579U, // <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5> + 2800519773U, // <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7> + 3368314810U, // <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7> + 2800519791U, // <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7> + 2800077432U, // <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7> + 3310291185U, // <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3> + 2789165706U, // <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7> + 2764982931U, // <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7> + 2800077468U, // <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7> + 3873819301U, // <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7> + 2297235304U, // <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6> + 2725081963U, // <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3> + 2725745596U, // <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3> + 2631458918U, // <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS + 3705201460U, // <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1> + 2631460551U, // <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7> + 2297906802U, // <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3> + 2631462198U, // <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS + 3371648547U, // <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5> + 3371648548U, // <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6> + 1224165306U, // <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7> + 1224165306U, // <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7> + 1215480726U, // <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0> + 1679002398U, // <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2> + 1659967368U, // <3,3,u,2>: Cost 2 vsldoi8 <u,2,3,3>, <u,2,3,3> + 403488870U, // <3,3,u,3>: Cost 1 vspltisw3 LHS + 1563659574U, // <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS + 1679002438U, // <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6> + 2756946764U, // <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3> + 1224165306U, // <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7> + 403488870U, // <3,3,u,u>: Cost 1 vspltisw3 LHS + 2691907584U, // <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0> + 1618165862U, // <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS + 2631476937U, // <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0> + 2232601732U, // <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0> + 2691907922U, // <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5> + 1158860086U, // <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS + 3306343806U, // <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7> + 3366947484U, // <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7> + 1618166429U, // <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS + 2631483494U, // <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS + 2691908404U, // <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1> + 1618166682U, // <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4> + 3765650393U, // <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4> + 2631486774U, // <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS + 2756946914U, // <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0> + 3765650639U, // <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7> + 3735090439U, // <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1> + 1622148480U, // <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4> + 3765650893U, // <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0> + 3831131154U, // <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3> + 2691909224U, // <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2> + 2691909286U, // <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1> + 2699208469U, // <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4> + 2233863478U, // <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS + 2691909562U, // <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7> + 2701199368U, // <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4> + 2691909691U, // <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1> + 2691909782U, // <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2> + 3765651686U, // <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1> + 2691909972U, // <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3> + 2691910044U, // <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3> + 2691910096U, // <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1> + 1161006390U, // <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS + 2691910300U, // <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7> + 3368962716U, // <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7> + 1161006633U, // <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS + 2631508070U, // <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS + 2631508890U, // <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4> + 2631509709U, // <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4> + 2289256788U, // <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3> + 1726336208U, // <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4> + 1618169142U, // <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS + 3362998858U, // <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6> + 2289257116U, // <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7> + 1618169385U, // <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS + 1557774438U, // <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS + 2631516980U, // <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1> + 1557776078U, // <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5> + 2631518358U, // <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2> + 1557777718U, // <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS + 2296563406U, // <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5> + 604818742U, // <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS + 2661381387U, // <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5> + 604818760U, // <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS + 3705266278U, // <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS + 3831131482U, // <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7> + 2733715962U, // <3,4,6,2>: Cost 3 vsldoi8 <u,2,3,4>, <6,2,7,3> + 3844771180U, // <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7> + 2800078197U, // <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7> + 2236550454U, // <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS + 2733716280U, // <3,4,6,6>: Cost 3 vsldoi8 <u,2,3,4>, <6,6,6,6> + 2725090156U, // <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4> + 2236550697U, // <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS + 2733716474U, // <3,4,7,0>: Cost 3 vsldoi8 <u,2,3,4>, <7,0,1,2> + 3371647013U, // <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1> + 2727744688U, // <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4> + 3371649364U, // <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3> + 2733716838U, // <3,4,7,4>: Cost 3 vsldoi8 <u,2,3,4>, <7,4,5,6> + 2297906894U, // <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5> + 3371647180U, // <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6> + 2733717100U, // <3,4,7,7>: Cost 3 vsldoi8 <u,2,3,4>, <7,7,7,7> + 2297906897U, // <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u> + 1557799014U, // <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS + 1618171694U, // <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS + 1557800657U, // <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u> + 2691913660U, // <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, <u,3,0,1> + 1557802294U, // <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS + 1618172058U, // <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS + 604818985U, // <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS + 2661405966U, // <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u> + 604819003U, // <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS + 2643492966U, // <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS + 2756947528U, // <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2> + 2331029019U, // <3,5,0,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2> + 2643495062U, // <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2> + 2756947554U, // <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1> + 2800078443U, // <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1> + 2289224194U, // <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6> + 3362964723U, // <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7> + 2756947590U, // <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1> + 2800078479U, // <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1> + 2333027218U, // <3,5,1,1>: Cost 3 vmrglw <u,5,3,1>, <4,0,5,1> + 2691916699U, // <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5> + 3832901294U, // <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5> + 2800078519U, // <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5> + 3830689467U, // <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0> + 3830689481U, // <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5> + 3873820365U, // <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0> + 2800078551U, // <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1> + 3770967487U, // <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4> + 2697225763U, // <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5> + 3830689523U, // <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2> + 2699216590U, // <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5> + 2699216662U, // <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5> + 2783047439U, // <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3> + 2783121176U, // <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3> + 3856936737U, // <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3> + 2701871194U, // <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5> + 2643517542U, // <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS + 2331052946U, // <3,5,3,1>: Cost 3 vmrglw <u,2,3,3>, <4,0,5,1> + 3699345010U, // <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3> + 2705189276U, // <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3> + 2705189359U, // <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5> + 2331053274U, // <3,5,3,5>: Cost 3 vmrglw <u,2,3,3>, <4,4,5,5> + 2295220738U, // <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6> + 3368961267U, // <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7> + 2295220740U, // <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u> + 2643525734U, // <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS + 2331061138U, // <3,5,4,1>: Cost 3 vmrglw <u,2,3,4>, <4,0,5,1> + 2235584280U, // <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3> + 2643528194U, // <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6> + 2735713498U, // <3,5,4,4>: Cost 3 vsldoi8 <u,5,3,5>, <4,4,5,5> + 2756947892U, // <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6> + 2289256962U, // <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6> + 3362997491U, // <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7> + 2756947919U, // <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6> + 2800078803U, // <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1> + 2800078812U, // <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1> + 2631591639U, // <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5> + 3832901616U, // <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3> + 2800078843U, // <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5> + 1726337028U, // <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5> + 2800078862U, // <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6> + 3368314099U, // <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7> + 1726337028U, // <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5> + 2800078884U, // <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1> + 2800078899U, // <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7> + 2631599832U, // <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6> + 2800078914U, // <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4> + 2800078924U, // <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5> + 2800078935U, // <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7> + 2297235970U, // <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6> + 1726337122U, // <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0> + 1726337131U, // <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0> + 3699376230U, // <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS + 2333739922U, // <3,5,7,1>: Cost 3 vmrglw <u,6,3,7>, <4,0,5,1> + 3699378106U, // <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7> + 3371647915U, // <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3> + 3699379510U, // <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS + 2333740250U, // <3,5,7,5>: Cost 3 vmrglw <u,6,3,7>, <4,4,5,5> + 2297907714U, // <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6> + 3370984691U, // <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7> + 2297907716U, // <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u> + 2800079046U, // <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1> + 2756948176U, // <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2> + 2331029019U, // <3,5,u,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2> + 2800079076U, // <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4> + 2800079085U, // <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4> + 1726337028U, // <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5> + 2289289730U, // <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6> + 1726337284U, // <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0> + 1726337293U, // <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0> + 3773628416U, // <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0> + 2699886694U, // <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS + 2789167401U, // <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1> + 3362965862U, // <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3> + 3773628754U, // <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5> + 3723284326U, // <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0> + 2800079181U, // <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1> + 1215483190U, // <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS + 1215483191U, // <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS + 3873821032U, // <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1> + 3773629236U, // <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1> + 2691924892U, // <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6> + 3830690184U, // <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6> + 3873821072U, // <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5> + 3873821082U, // <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6> + 3403453240U, // <3,6,1,6>: Cost 4 vmrglw <u,0,3,1>, <6,6,6,6> + 2289233206U, // <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS + 2289233207U, // <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS + 2661498982U, // <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS + 3770975780U, // <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6> + 2631640797U, // <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2> + 3771639485U, // <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6> + 2661502262U, // <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS + 2699888488U, // <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6> + 2661503482U, // <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3> + 1715425786U, // <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3> + 1715499523U, // <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3> + 3773630614U, // <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2> + 3372942825U, // <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1> + 2234749434U, // <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3> + 3368962406U, // <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3> + 2699889154U, // <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6> + 3773631068U, // <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6> + 2331054904U, // <3,6,3,6>: Cost 3 vmrglw <u,2,3,3>, <6,6,6,6> + 1221479734U, // <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS + 1221479735U, // <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS + 2235584801U, // <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2> + 3717342106U, // <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4> + 2789167729U, // <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5> + 2235585074U, // <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5> + 2235585165U, // <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6> + 2699889974U, // <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS + 2800079509U, // <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5> + 1215515958U, // <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS + 1215515959U, // <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS + 3873821356U, // <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1> + 3372959209U, // <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1> + 3862909629U, // <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0> + 3773632358U, // <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0> + 3873821396U, // <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5> + 3873821405U, // <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5> + 3862909672U, // <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7> + 2294574390U, // <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS + 2294574391U, // <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS + 2800079613U, // <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1> + 3873821446U, // <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1> + 2789167888U, // <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2> + 3844920090U, // <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3> + 2800079653U, // <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5> + 3723333484U, // <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6> + 1726337848U, // <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6> + 1726337858U, // <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7> + 1726337867U, // <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7> + 1726337870U, // <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1> + 2297906665U, // <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1> + 2792117090U, // <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3> + 2297907558U, // <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3> + 1726337910U, // <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5> + 2297906993U, // <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5> + 2297906832U, // <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6> + 1224166710U, // <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS + 1224166711U, // <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS + 1726337951U, // <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1> + 2699892526U, // <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS + 2789168049U, // <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1> + 2792854460U, // <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3> + 1726337991U, // <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5> + 2699892890U, // <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS + 1726337848U, // <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6> + 1215548726U, // <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS + 1215548727U, // <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS + 2700558336U, // <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0> + 1626816614U, // <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS + 2700558513U, // <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6> + 2331030010U, // <3,7,0,3>: Cost 3 vmrglw <u,2,3,0>, <6,2,7,3> + 2700558674U, // <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5> + 2800079906U, // <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6> + 2655588936U, // <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0> + 2800079919U, // <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1> + 1626817181U, // <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS + 3774300899U, // <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1> + 2700559156U, // <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1> + 2700559254U, // <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0> + 3774301148U, // <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7> + 3774301227U, // <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5> + 3774301295U, // <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1> + 3768329441U, // <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7> + 3403453250U, // <3,7,1,7>: Cost 4 vmrglw <u,0,3,1>, <6,6,7,7> + 2700559740U, // <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0> + 2700559849U, // <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1> + 3770983973U, // <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7> + 2700559976U, // <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2> + 2698569415U, // <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7> + 2700560177U, // <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5> + 3773638505U, // <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7> + 1626818490U, // <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7> + 2795140307U, // <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3> + 1628145756U, // <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7> + 2700560534U, // <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2> + 3774302438U, // <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1> + 2700560742U, // <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3> + 2700560796U, // <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3> + 2700560898U, // <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6> + 3774302821U, // <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6> + 2700561079U, // <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7> + 2700561091U, // <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1> + 2700561182U, // <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2> + 2655617126U, // <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS + 3774303178U, // <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3> + 2655619002U, // <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7> + 2331062778U, // <3,7,4,3>: Cost 3 vmrglw <u,2,3,4>, <6,2,7,3> + 2655620406U, // <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS + 1626819894U, // <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS + 2655621708U, // <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4> + 2800080247U, // <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5> + 1626820137U, // <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS + 3774303816U, // <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2> + 3873822093U, // <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0> + 3774303998U, // <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4> + 3862910368U, // <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1> + 3774304180U, // <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6> + 2800080310U, // <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5> + 2800080321U, // <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7> + 3873822147U, // <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0> + 2800080339U, // <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7> + 2800080348U, // <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7> + 3873822181U, // <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7> + 2789168622U, // <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7> + 2700563016U, // <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0> + 2800080384U, // <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7> + 3862910472U, // <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6> + 2700563256U, // <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6> + 2800080404U, // <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0> + 2793149988U, // <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7> + 2637725798U, // <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS + 3371649227U, // <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1> + 2637727674U, // <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7> + 2297907567U, // <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3> + 2637729078U, // <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS + 3371649312U, // <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5> + 2655646287U, // <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7> + 1726338668U, // <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7> + 1726338668U, // <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7> + 2700564179U, // <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, <u,0,1,2> + 1626822446U, // <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS + 2700564357U, // <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, <u,2,3,0> + 2700564412U, // <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, <u,3,0,1> + 2700564543U, // <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, <u,4,5,6> + 1626822810U, // <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS + 1662654672U, // <3,7,u,6>: Cost 2 vsldoi8 <u,6,3,7>, <u,6,3,7> + 1726338668U, // <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7> + 1626823013U, // <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS + 1678557184U, // <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0> + 1679005395U, // <3,u,0,1>: Cost 2 vsldoi12 LHS, <u,0,1,2> + 2289221787U, // <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2> + 1215479964U, // <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS + 2752747245U, // <3,u,0,4>: Cost 3 vsldoi12 LHS, <u,0,4,1> + 1158863002U, // <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS + 2289224221U, // <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6> + 1215483208U, // <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS + 1679005458U, // <3,u,0,u>: Cost 2 vsldoi12 LHS, <u,0,u,2> + 1558036582U, // <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS + 1678558004U, // <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1> + 604821294U, // <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS + 2752747317U, // <3,u,1,3>: Cost 3 vsldoi12 LHS, <u,1,3,1> + 1558039862U, // <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS + 2756949830U, // <3,u,1,5>: Cost 3 vsldoi12 LHS, <u,1,5,0> + 2800080726U, // <3,u,1,6>: Cost 3 vsldoi12 LHS, <u,1,6,7> + 2289233224U, // <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS + 604821348U, // <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS + 2696586709U, // <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u> + 2757392246U, // <3,u,2,1>: Cost 3 vsldoi12 LHS, <u,2,1,3> + 1624172151U, // <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u> + 1679005576U, // <3,u,2,3>: Cost 2 vsldoi12 LHS, <u,2,3,3> + 2631789878U, // <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS + 2699904874U, // <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u> + 1626826683U, // <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u> + 1726338988U, // <3,u,2,7>: Cost 2 vsldoi12 LHS, <u,2,7,3> + 1683208117U, // <3,u,2,u>: Cost 2 vsldoi12 LHS, <u,2,u,3> + 1679005628U, // <3,u,3,0>: Cost 2 vsldoi12 LHS, <u,3,0,1> + 1161008942U, // <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS + 2752747471U, // <3,u,3,2>: Cost 3 vsldoi12 LHS, <u,3,2,2> + 403488870U, // <3,u,3,3>: Cost 1 vspltisw3 LHS + 1679005668U, // <3,u,3,4>: Cost 2 vsldoi12 LHS, <u,3,4,5> + 1161009306U, // <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS + 2691943104U, // <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7> + 1221479752U, // <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS + 403488870U, // <3,u,3,u>: Cost 1 vspltisw3 LHS + 2289255363U, // <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0> + 1161844526U, // <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS + 2289256661U, // <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2> + 1215512732U, // <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS + 1215513498U, // <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4> + 1679005759U, // <3,u,4,5>: Cost 2 vsldoi12 LHS, <u,4,5,6> + 2289256989U, // <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6> + 1215515976U, // <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS + 1679005786U, // <3,u,4,u>: Cost 2 vsldoi12 LHS, <u,4,u,6> + 1558069350U, // <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS + 2631811892U, // <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1> + 1558071026U, // <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5> + 2752747646U, // <3,u,5,3>: Cost 3 vsldoi12 LHS, <u,5,3,6> + 1558072630U, // <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS + 1726337028U, // <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5> + 604821658U, // <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS + 2294574408U, // <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS + 604821676U, // <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS + 2631819366U, // <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS + 2757392574U, // <3,u,6,1>: Cost 3 vsldoi12 LHS, <u,6,1,7> + 2631821043U, // <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6> + 1679005904U, // <3,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7> + 2631822646U, // <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS + 2236553370U, // <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS + 1726337848U, // <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6> + 1726339309U, // <3,u,6,7>: Cost 2 vsldoi12 LHS, <u,6,7,0> + 1683208445U, // <3,u,6,u>: Cost 2 vsldoi12 LHS, <u,6,u,7> + 1726339328U, // <3,u,7,0>: Cost 2 vsldoi12 LHS, <u,7,0,1> + 2297905225U, // <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1> + 2631829236U, // <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7> + 1224163484U, // <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS + 1726339368U, // <3,u,7,4>: Cost 2 vsldoi12 LHS, <u,7,4,5> + 2297905553U, // <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5> + 2297905392U, // <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6> + 1224166728U, // <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS + 1224163489U, // <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS + 1683208529U, // <3,u,u,0>: Cost 2 vsldoi12 LHS, <u,u,0,1> + 1679006043U, // <3,u,u,1>: Cost 2 vsldoi12 LHS, <u,u,1,2> + 604821861U, // <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS + 403488870U, // <3,u,u,3>: Cost 1 vspltisw3 LHS + 1683208569U, // <3,u,u,4>: Cost 2 vsldoi12 LHS, <u,u,4,5> + 1679006083U, // <3,u,u,5>: Cost 2 vsldoi12 LHS, <u,u,5,6> + 604821901U, // <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS + 1215548744U, // <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS + 604821915U, // <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS + 2759016448U, // <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0> + 1165115494U, // <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS + 3717531337U, // <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0> + 3369675785U, // <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3> + 2751791144U, // <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4> + 2238857630U, // <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0> + 3312591341U, // <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7> + 3369676113U, // <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7> + 1165116061U, // <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS + 2637824102U, // <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS + 2637824922U, // <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4> + 1685274726U, // <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 2637826512U, // <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1> + 2637827382U, // <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS + 2661716070U, // <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4> + 3729486427U, // <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1> + 2661717300U, // <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1> + 1685274780U, // <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 3711574118U, // <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS + 2240200806U, // <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS + 3771663992U, // <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0> + 2698585801U, // <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0> + 3373672105U, // <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4> + 3810813795U, // <4,0,2,5>: Cost 4 vsldoi8 <u,7,4,0>, <2,5,3,1> + 3772327866U, // <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7> + 3386280568U, // <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7> + 2701903966U, // <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0> + 3699638374U, // <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS + 2753560832U, // <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4> + 3772328276U, // <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3> + 3827302674U, // <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4> + 3699641654U, // <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS + 3779627588U, // <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0> + 3772328604U, // <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7> + 3780954854U, // <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0> + 2753560832U, // <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4> + 2725129106U, // <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1> + 1167720550U, // <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS + 3839172953U, // <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3> + 3772329051U, // <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4> + 2241462610U, // <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5> + 2698587446U, // <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS + 3772329297U, // <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7> + 3735483703U, // <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4> + 1167721117U, // <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS + 1168556032U, // <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0> + 94814310U, // <4,0,5,1>: Cost 1 vmrghw RHS, LHS + 2242298029U, // <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2> + 2637859284U, // <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5> + 1168556370U, // <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5> + 2242306530U, // <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5> + 2242298358U, // <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7> + 2661750072U, // <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5> + 94814877U, // <4,0,5,u>: Cost 1 vmrghw RHS, LHS + 3316580362U, // <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1> + 2242846822U, // <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS + 3798872570U, // <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3> + 3796218413U, // <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0> + 3834528273U, // <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7> + 3798872811U, // <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1> + 3316621876U, // <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6> + 2725131121U, // <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0> + 2242847389U, // <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS + 3377692672U, // <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0> + 2243493990U, // <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS + 3775648970U, // <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3> + 3802191110U, // <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0> + 3317236050U, // <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5> + 3803518376U, // <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0> + 3317236214U, // <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7> + 3798873708U, // <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7> + 2243494557U, // <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS + 1170546688U, // <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0> + 96804966U, // <4,0,u,1>: Cost 1 vmrghw RHS, LHS + 1685275293U, // <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 2637883863U, // <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u> + 1170547026U, // <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5> + 2698590362U, // <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS + 2244289014U, // <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7> + 2661774651U, // <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u> + 96805533U, // <4,0,u,u>: Cost 1 vmrghw RHS, LHS + 2667749478U, // <4,1,0,0>: Cost 3 vsldoi4 <u,4,1,0>, LHS + 2689966182U, // <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS + 2238571418U, // <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4> + 3711633880U, // <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0> + 2689966418U, // <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5> + 3361046866U, // <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5> + 3741495802U, // <4,1,0,6>: Cost 4 vsldoi4 <u,4,1,0>, <6,2,7,3> + 3741496314U, // <4,1,0,7>: Cost 4 vsldoi4 <u,4,1,0>, <7,0,1,2> + 2689966765U, // <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1> + 3764372222U, // <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1> + 2758206263U, // <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4> + 2698593178U, // <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4> + 3361057810U, // <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3> + 3827303250U, // <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4> + 2287313234U, // <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5> + 3763709171U, // <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7> + 3361058138U, // <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7> + 2239759744U, // <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4> + 2637906022U, // <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS + 2637906842U, // <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4> + 3763709544U, // <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2> + 1685275546U, // <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4> + 2637909302U, // <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS + 3361063250U, // <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5> + 3763709882U, // <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7> + 3735541054U, // <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2> + 1685644231U, // <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4> + 2702575792U, // <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1> + 3832759257U, // <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4> + 3833349090U, // <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4> + 3763710364U, // <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3> + 2707884546U, // <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6> + 3361071442U, // <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5> + 3772336796U, // <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7> + 3775654595U, // <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1> + 2707884856U, // <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1> + 2667782246U, // <4,1,4,0>: Cost 3 vsldoi4 <u,4,1,4>, LHS + 2241463092U, // <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1> + 2241553306U, // <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4> + 3827303484U, // <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4> + 2667785424U, // <4,1,4,4>: Cost 3 vsldoi4 <u,4,1,4>, <4,4,4,4> + 2689969462U, // <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS + 3763711322U, // <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7> + 3867116636U, // <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0> + 2689969705U, // <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS + 1546273106U, // <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5> + 1168556852U, // <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1> + 1168556950U, // <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0> + 2620016790U, // <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2> + 1546276150U, // <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS + 2620018692U, // <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5> + 2242299087U, // <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7> + 2667795450U, // <4,1,5,7>: Cost 3 vsldoi4 <u,4,1,5>, <7,0,1,2> + 1546278702U, // <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS + 3781628193U, // <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2> + 3832759503U, // <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7> + 3316261786U, // <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4> + 3781628466U, // <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5> + 3827303658U, // <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7> + 3361096018U, // <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5> + 3788264248U, // <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6> + 3788264270U, // <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1> + 3832759566U, // <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7> + 2726466580U, // <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1> + 3377692682U, // <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1> + 3377694870U, // <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2> + 3802199303U, // <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1> + 2731775334U, // <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6> + 3377693010U, // <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5> + 3365749804U, // <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6> + 3788265068U, // <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7> + 2731775644U, // <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1> + 1546297685U, // <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u> + 1170547508U, // <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1> + 1170547606U, // <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0> + 1689257344U, // <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4> + 1546300726U, // <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS + 2284716370U, // <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5> + 2244289743U, // <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7> + 2667820026U, // <4,1,u,7>: Cost 3 vsldoi4 <u,4,1,u>, <7,0,1,2> + 1546303278U, // <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS + 3729621094U, // <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS + 3763716198U, // <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS + 2238858856U, // <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2> + 2295930982U, // <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS + 3763716434U, // <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5> + 2238859107U, // <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1> + 2238859194U, // <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7> + 3312601066U, // <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1> + 2295930987U, // <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS + 3699769446U, // <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS + 3313255971U, // <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5> + 3361056360U, // <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2> + 2287312998U, // <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS + 3788932148U, // <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5> + 3313256290U, // <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0> + 3838289469U, // <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4> + 3369682865U, // <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7> + 2287313003U, // <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS + 3838658133U, // <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1> + 3711722394U, // <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4> + 2759018088U, // <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2> + 2759018098U, // <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3> + 3838658168U, // <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0> + 3369027341U, // <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5> + 2240227258U, // <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7> + 3735614791U, // <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2> + 2759018143U, // <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3> + 2759018150U, // <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1> + 3831948975U, // <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1> + 3832759993U, // <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2> + 2759018180U, // <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4> + 2759018185U, // <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0> + 3839542998U, // <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4> + 3314640826U, // <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7> + 2765948648U, // <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4> + 2759018222U, // <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1> + 3838658295U, // <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1> + 3315205667U, // <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5> + 2241463912U, // <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2> + 1234829414U, // <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS + 2241464085U, // <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4> + 2241546087U, // <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5> + 2241464250U, // <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7> + 3741602873U, // <4,2,4,7>: Cost 4 vsldoi4 <u,4,2,4>, <7,0,u,2> + 1234829419U, // <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS + 2626060390U, // <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS + 2626061364U, // <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5> + 1168557672U, // <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2> + 1222230118U, // <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS + 2626063670U, // <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS + 2242299752U, // <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6> + 1168558010U, // <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7> + 2242299882U, // <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1> + 1222230123U, // <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS + 3711754342U, // <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS + 3711755162U, // <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4> + 3838658481U, // <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7> + 2759018426U, // <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7> + 3838658499U, // <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7> + 3735646310U, // <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4> + 3316590522U, // <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7> + 3798889331U, // <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2> + 2759018471U, // <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7> + 3874564074U, // <4,2,7,0>: Cost 4 vsldoi12 <u,2,3,4>, <2,7,0,1> + 3800880230U, // <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2> + 3371722344U, // <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2> + 2303950950U, // <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS + 3371722346U, // <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4> + 3371722509U, // <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5> + 3317237690U, // <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7> + 3317237738U, // <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1> + 2303950955U, // <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS + 2759018555U, // <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1> + 2626085943U, // <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u> + 1170548328U, // <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2> + 1222254694U, // <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS + 2759018595U, // <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5> + 2244290408U, // <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6> + 1170548666U, // <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7> + 2769266813U, // <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4> + 1222254699U, // <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS + 2238859414U, // <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2> + 2759018646U, // <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2> + 3312314708U, // <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3> + 2238859676U, // <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3> + 2295931802U, // <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4> + 3735670886U, // <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4> + 3312315036U, // <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7> + 3369674682U, // <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7> + 2759018709U, // <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2> + 3361055638U, // <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0> + 3831949542U, // <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1> + 2703917978U, // <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4> + 3361056370U, // <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3> + 2295939994U, // <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4> + 3361056291U, // <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5> + 3378972520U, // <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6> + 3361056698U, // <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7> + 2703917978U, // <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4> + 3832760624U, // <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3> + 3711796122U, // <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4> + 3832760641U, // <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2> + 2770962764U, // <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4> + 2759018836U, // <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3> + 3827304802U, // <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u> + 3832760678U, // <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3> + 3859597679U, // <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3> + 2771331449U, // <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4> + 2240841878U, // <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2> + 3776997635U, // <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3> + 2703919444U, // <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3> + 2759018908U, // <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3> + 2759018918U, // <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4> + 3386951446U, // <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5> + 3777661596U, // <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7> + 3375007674U, // <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7> + 2707901242U, // <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3> + 2759018960U, // <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1> + 2759018970U, // <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2> + 2632099605U, // <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4> + 2241464732U, // <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3> + 2759019000U, // <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5> + 2753563138U, // <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6> + 3777662316U, // <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7> + 2308573114U, // <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7> + 2759019032U, // <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1> + 1168558230U, // <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2> + 2242300134U, // <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1> + 2632107798U, // <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5> + 1168558492U, // <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3> + 1168558594U, // <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6> + 2295973654U, // <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5> + 2242300536U, // <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7> + 2295973818U, // <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7> + 1168558878U, // <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2> + 3832760952U, // <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7> + 3711828890U, // <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4> + 3316484436U, // <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3> + 3711830512U, // <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6> + 2759019164U, // <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7> + 3361097251U, // <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5> + 3316624045U, // <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6> + 2773912244U, // <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4> + 2759019164U, // <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7> + 3377693590U, // <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0> + 3365751680U, // <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1> + 2727810232U, // <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3> + 3377694322U, // <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3> + 2303951770U, // <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4> + 3741700198U, // <4,3,7,5>: Cost 4 vsldoi4 <u,4,3,7>, <5,6,7,4> + 3377695216U, // <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6> + 3375703994U, // <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7> + 2731792030U, // <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3> + 1170548886U, // <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2> + 2759019294U, // <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2> + 2632132377U, // <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u> + 1170549148U, // <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3> + 1170549250U, // <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6> + 2759019334U, // <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6> + 2244291192U, // <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7> + 2295998394U, // <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7> + 1170549534U, // <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2> + 1165118354U, // <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1> + 1637482598U, // <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS + 3711854285U, // <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4> + 3827305344U, // <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1> + 2711224658U, // <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5> + 1165118774U, // <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS + 3312602489U, // <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2> + 3369675420U, // <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7> + 1165119017U, // <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS + 3369682633U, // <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0> + 2287313581U, // <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1> + 2759019466U, // <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3> + 3369683284U, // <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3> + 2311204048U, // <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4> + 2239319350U, // <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS + 3784967411U, // <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7> + 3369683612U, // <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7> + 2763000832U, // <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3> + 3711869030U, // <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS + 3711869850U, // <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4> + 2240203830U, // <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3> + 2698618573U, // <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4> + 2711226133U, // <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4> + 2240204086U, // <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS + 2711226298U, // <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7> + 3832761416U, // <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3> + 2701936738U, // <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4> + 2711226518U, // <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2> + 3777005828U, // <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4> + 3832761453U, // <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4> + 2301266260U, // <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3> + 2705254903U, // <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4> + 2240843062U, // <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS + 3832761489U, // <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4> + 3375008412U, // <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7> + 2301266260U, // <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3> + 1570373734U, // <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS + 2308574089U, // <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1> + 2644117096U, // <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2> + 2638146039U, // <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4> + 229035318U, // <4,4,4,4>: Cost 1 vspltisw0 RHS + 1167723830U, // <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS + 2644120058U, // <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3> + 2662036827U, // <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4> + 229035318U, // <4,4,4,u>: Cost 1 vspltisw0 RHS + 1168558994U, // <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1> + 2638152602U, // <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4> + 2242300981U, // <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2> + 2638154232U, // <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5> + 1168559322U, // <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5> + 94817590U, // <4,4,5,5>: Cost 1 vmrghw RHS, RHS + 1685278006U, // <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS + 2242309576U, // <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0> + 94817833U, // <4,4,5,u>: Cost 1 vmrghw RHS, RHS + 3316591506U, // <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1> + 3758428587U, // <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5> + 2711228922U, // <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3> + 3796251185U, // <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4> + 2711229085U, // <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4> + 2242850102U, // <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS + 2242850169U, // <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2> + 2725163893U, // <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4> + 2242850345U, // <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS + 2711229434U, // <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2> + 3377694410U, // <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1> + 3868593584U, // <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3> + 3377695060U, // <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3> + 2729145691U, // <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4> + 2243497270U, // <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS + 3871542744U, // <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7> + 2303953564U, // <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7> + 2243497513U, // <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS + 1170549650U, // <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1> + 1637488430U, // <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS + 2244291637U, // <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2> + 2638178811U, // <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u> + 229035318U, // <4,4,u,4>: Cost 1 vspltisw0 RHS + 96808246U, // <4,4,u,5>: Cost 1 vmrghw RHS, RHS + 1685278249U, // <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS + 2244292040U, // <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0> + 96808489U, // <4,4,u,u>: Cost 1 vmrghw RHS, RHS + 2698625024U, // <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0> + 1624883302U, // <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS + 2638186190U, // <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5> + 2638187004U, // <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0> + 2687345005U, // <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5> + 2238861316U, // <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5> + 2662077302U, // <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5> + 2662077792U, // <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0> + 1624883869U, // <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS + 3361057762U, // <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0> + 2691326803U, // <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5> + 2698625942U, // <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0> + 3361055659U, // <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3> + 3761087567U, // <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5> + 2693981335U, // <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5> + 2305231362U, // <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6> + 3361055987U, // <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7> + 2695972234U, // <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5> + 2638200934U, // <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS + 3761088035U, // <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5> + 2697963133U, // <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5> + 1624884942U, // <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5> + 2698626838U, // <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5> + 3772368744U, // <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6> + 2698627002U, // <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7> + 3775023122U, // <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5> + 1628203107U, // <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5> + 2698627222U, // <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2> + 3765070057U, // <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4> + 2698627404U, // <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4> + 2698627484U, // <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3> + 2698627580U, // <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0> + 3779668553U, // <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5> + 2725169844U, // <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4> + 2707253995U, // <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5> + 2698627870U, // <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2> + 2638217318U, // <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS + 2308574098U, // <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1> + 2698628150U, // <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3> + 2638219776U, // <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4> + 2698628314U, // <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5> + 1624886582U, // <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS + 2698628478U, // <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7> + 2662110564U, // <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4> + 1624886825U, // <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS + 1570455654U, // <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS + 2312564250U, // <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1> + 2644199118U, // <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5> + 2295974966U, // <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3> + 1570458842U, // <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5> + 1168568324U, // <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5> + 1168568418U, // <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0> + 2295975294U, // <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7> + 1168716036U, // <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0> + 1564491878U, // <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS + 2626290768U, // <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6> + 2632263465U, // <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6> + 1564494338U, // <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6> + 1564495158U, // <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS + 2638237464U, // <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3> + 2656154253U, // <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6> + 27705344U, // <4,5,6,7>: Cost 0 copy RHS + 27705344U, // <4,5,6,u>: Cost 0 copy RHS + 2725172218U, // <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2> + 3859599489U, // <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4> + 2698630320U, // <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4> + 2728490251U, // <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5> + 2725172576U, // <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0> + 3317239812U, // <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5> + 2725172760U, // <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4> + 2725172844U, // <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7> + 2725172866U, // <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2> + 1564508262U, // <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS + 1624889134U, // <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS + 2698631045U, // <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, <u,2,3,0> + 1564510724U, // <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u> + 1564511542U, // <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS + 1624889498U, // <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS + 1170550882U, // <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0> + 27705344U, // <4,5,u,7>: Cost 0 copy RHS + 27705344U, // <4,5,u,u>: Cost 0 copy RHS + 3312595285U, // <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0> + 3763748966U, // <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS + 2238861818U, // <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3> + 3767730432U, // <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4> + 3763749202U, // <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5> + 2238862059U, // <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1> + 2238862136U, // <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6> + 2295934262U, // <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS + 2295934263U, // <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS + 3378973999U, // <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0> + 3378974648U, // <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1> + 3779675034U, // <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4> + 3378974002U, // <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3> + 3378974003U, // <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4> + 3767731352U, // <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6> + 3378974734U, // <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6> + 2287316278U, // <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS + 2287316279U, // <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS + 3735904358U, // <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS + 3763750435U, // <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5> + 3313938937U, // <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2> + 3772376782U, // <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5> + 3852890591U, // <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3> + 3735908454U, // <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4> + 3801573306U, // <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7> + 2785858042U, // <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3> + 2785858051U, // <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3> + 3863065101U, // <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4> + 3314586024U, // <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2> + 3863212575U, // <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4> + 3863286312U, // <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4> + 3767732738U, // <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6> + 3779676746U, // <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6> + 3398898488U, // <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6> + 2301267254U, // <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS + 2301267255U, // <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS + 3852890715U, // <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1> + 3315208615U, // <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1> + 2241466874U, // <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3> + 3852890745U, // <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4> + 2241467037U, // <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4> + 2241549039U, // <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5> + 2241467192U, // <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6> + 1234832694U, // <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS + 1234832695U, // <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS + 2242302241U, // <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2> + 2242310567U, // <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1> + 1168568826U, // <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3> + 2242302514U, // <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5> + 2242302605U, // <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6> + 2242310891U, // <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1> + 1168569144U, // <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6> + 1222233398U, // <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS + 1222233399U, // <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS + 3316576545U, // <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2> + 3316584871U, // <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1> + 2242851322U, // <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3> + 3316601394U, // <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5> + 3852890916U, // <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4> + 3316617963U, // <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1> + 2242884408U, // <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6> + 2785858370U, // <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7> + 2785858379U, // <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7> + 2785858382U, // <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1> + 3859600215U, // <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1> + 3317240314U, // <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3> + 2792199020U, // <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4> + 2785858422U, // <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5> + 3856651132U, // <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2> + 3317240632U, // <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6> + 2303954230U, // <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS + 2303954231U, // <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS + 2244292897U, // <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2> + 2244293031U, // <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1> + 1170551290U, // <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3> + 2244293170U, // <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5> + 2244293261U, // <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6> + 2244293355U, // <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1> + 1170551608U, // <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6> + 1222257974U, // <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS + 1222257975U, // <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS + 2238862330U, // <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2> + 2706604134U, // <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS + 3312604308U, // <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3> + 3768402176U, // <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4> + 2238862648U, // <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5> + 3859600418U, // <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6> + 3729994393U, // <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0> + 2238862956U, // <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7> + 2706604701U, // <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS + 3385610338U, // <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0> + 3780346676U, // <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1> + 2706604954U, // <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4> + 3385610746U, // <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3> + 3385610342U, // <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4> + 3385610667U, // <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5> + 3768403178U, // <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7> + 3385611074U, // <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7> + 2706604954U, // <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4> + 3859600532U, // <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3> + 3712091034U, // <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4> + 3774375528U, // <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2> + 2794853552U, // <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4> + 2785858744U, // <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3> + 3735982182U, // <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4> + 3774375875U, // <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7> + 3735983476U, // <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2> + 2795222237U, // <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4> + 3780348054U, // <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2> + 3730015130U, // <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4> + 3780348244U, // <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3> + 3778357673U, // <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7> + 2325155942U, // <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4> + 3779684939U, // <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7> + 2706606748U, // <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7> + 3398898498U, // <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7> + 2707934014U, // <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7> + 2785858868U, // <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1> + 3780348874U, // <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3> + 3780349000U, // <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3> + 2308575738U, // <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3> + 2656283856U, // <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4> + 2706607414U, // <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS + 2656285341U, // <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4> + 2241468012U, // <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7> + 2706607657U, // <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS + 1168569338U, // <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2> + 2242311242U, // <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1> + 2242303178U, // <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3> + 2242311395U, // <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1> + 1168569702U, // <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6> + 2242311606U, // <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5> + 2242311662U, // <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7> + 1168569964U, // <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7> + 1168569986U, // <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2> + 3316593658U, // <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2> + 3316593738U, // <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1> + 3316634800U, // <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4> + 3386978810U, // <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3> + 2785859072U, // <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7> + 3736014950U, // <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4> + 3316594158U, // <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7> + 2797803032U, // <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4> + 2797876769U, // <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4> + 2243499002U, // <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2> + 3718103962U, // <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4> + 3317257418U, // <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3> + 3377695816U, // <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3> + 2243532134U, // <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6> + 3317282230U, // <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5> + 2730497536U, // <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7> + 2243556972U, // <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7> + 2243565186U, // <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2> + 1170551802U, // <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2> + 2706609966U, // <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS + 2244293797U, // <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2> + 2244293859U, // <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1> + 1170552166U, // <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6> + 2706610330U, // <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS + 2244294126U, // <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7> + 1170552428U, // <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7> + 1170552450U, // <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2> + 1165118354U, // <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1> + 1624907878U, // <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS + 2638407377U, // <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u> + 2295931036U, // <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS + 2687369584U, // <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u> + 1165121690U, // <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS + 2662298489U, // <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u> + 2295934280U, // <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS + 1624908445U, // <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS + 2638413926U, // <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS + 2691351382U, // <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u> + 1685280558U, // <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 2287313052U, // <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS + 2299257799U, // <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4> + 2694005914U, // <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u> + 2305231362U, // <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6> + 2287316296U, // <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS + 1685280612U, // <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 2638422118U, // <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS + 2240206638U, // <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS + 2697987712U, // <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u> + 1624909521U, // <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u> + 2759391121U, // <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, <u,2,4,3> + 2240207002U, // <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS + 2698651578U, // <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7> + 2785859500U, // <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <u,2,7,3> + 1628227686U, // <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u> + 2759022524U, // <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,0,1> + 2801342408U, // <4,u,3,1>: Cost 3 vsldoi12 <u,3,1,4>, <u,3,1,4> + 2703960409U, // <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u> + 2759022554U, // <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,3,4> + 2759022564U, // <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,4,5> + 2240845978U, // <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS + 2706614941U, // <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u> + 2301267272U, // <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS + 2759022596U, // <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,u,1> + 1570668646U, // <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS + 1167726382U, // <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS + 2698652753U, // <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3> + 1234829468U, // <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS + 229035318U, // <4,u,4,4>: Cost 1 vspltisw0 RHS + 1624911158U, // <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS + 2698653081U, // <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7> + 1234832712U, // <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS + 229035318U, // <4,u,4,u>: Cost 1 vspltisw0 RHS + 1168561875U, // <4,u,5,0>: Cost 2 vmrghw RHS, <u,0,1,2> + 94820142U, // <4,u,5,1>: Cost 1 vmrghw RHS, LHS + 1168562053U, // <4,u,5,2>: Cost 2 vmrghw RHS, <u,2,3,0> + 1222230172U, // <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS + 1168562239U, // <4,u,5,4>: Cost 2 vmrghw RHS, <u,4,5,6> + 94820506U, // <4,u,5,5>: Cost 1 vmrghw RHS, RHS + 1685280922U, // <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS + 1222233416U, // <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS + 94820709U, // <4,u,5,u>: Cost 1 vmrghw RHS, LHS + 1564713062U, // <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS + 2626511979U, // <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6> + 2632484676U, // <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6> + 1564715549U, // <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6> + 1564716342U, // <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS + 2242853018U, // <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS + 2656375464U, // <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6> + 27705344U, // <4,u,6,7>: Cost 0 copy RHS + 27705344U, // <4,u,6,u>: Cost 0 copy RHS + 2785859840U, // <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,0,1> + 2243499822U, // <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS + 2727851197U, // <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u> + 2303951004U, // <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS + 2785859880U, // <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,4,5> + 2243500186U, // <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS + 2730505729U, // <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u> + 2303954248U, // <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS + 2303951009U, // <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS + 1564729446U, // <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS + 96810798U, // <4,u,u,1>: Cost 1 vmrghw RHS, LHS + 1685281125U, // <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS + 1222254748U, // <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS + 229035318U, // <4,u,u,4>: Cost 1 vspltisw0 RHS + 96811162U, // <4,u,u,5>: Cost 1 vmrghw RHS, RHS + 1685281165U, // <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS + 27705344U, // <4,u,u,7>: Cost 0 copy RHS + 27705344U, // <4,u,u,u>: Cost 0 copy RHS + 2754232320U, // <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0> + 2754232330U, // <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1> + 3718194894U, // <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5> + 3376385762U, // <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3> + 2754232357U, // <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1> + 3845816370U, // <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5> + 3782353389U, // <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7> + 3376386090U, // <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7> + 2757402697U, // <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1> + 2626543718U, // <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS + 2626544751U, // <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1> + 1680490598U, // <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 3766428665U, // <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0> + 2626546998U, // <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS + 2650435539U, // <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1> + 3783017715U, // <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7> + 3385019000U, // <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7> + 1680490652U, // <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 3376398336U, // <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0> + 2245877862U, // <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS + 3773064808U, // <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2> + 2705295054U, // <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5> + 3827974343U, // <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1> + 3845816530U, // <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3> + 3779037114U, // <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7> + 3810887658U, // <5,0,2,7>: Cost 4 vsldoi8 <u,7,5,0>, <2,7,0,1> + 2245878429U, // <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS + 2710603926U, // <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2> + 3827974396U, // <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0> + 3779037516U, // <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4> + 3779037596U, // <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3> + 2705295868U, // <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0> + 3379726804U, // <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5> + 3802925748U, // <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4> + 3363138168U, // <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7> + 2707950400U, // <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0> + 2626568294U, // <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS + 1680490834U, // <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5> + 3828048219U, // <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5> + 2710604932U, // <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0> + 2754232685U, // <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5> + 2705296694U, // <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS + 3779038590U, // <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7> + 2713259464U, // <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0> + 1680490834U, // <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5> + 2311307264U, // <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0> + 1174437990U, // <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS + 3779038946U, // <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3> + 3845816752U, // <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0> + 2248180050U, // <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5> + 2248180194U, // <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5> + 3779039274U, // <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7> + 3385051768U, // <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7> + 1174438557U, // <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS + 2302689280U, // <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0> + 1175208038U, // <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS + 3787002362U, // <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3> + 3376432160U, // <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3> + 2248950098U, // <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5> + 2248950180U, // <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6> + 3376433702U, // <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6> + 2729186166U, // <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5> + 1175208605U, // <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS + 2713261050U, // <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2> + 3365823599U, // <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1> + 3808900317U, // <5,0,7,2>: Cost 4 vsldoi8 <u,4,5,0>, <7,2,u,4> + 3784348899U, // <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1> + 2729186656U, // <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0> + 3787003268U, // <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0> + 3802928664U, // <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4> + 3787003431U, // <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1> + 2731841188U, // <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0> + 2626601062U, // <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS + 1683145366U, // <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5> + 1680491165U, // <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 2705295054U, // <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5> + 2754233005U, // <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1> + 2705299610U, // <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS + 3779041488U, // <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, <u,6,3,7> + 2737150252U, // <5,0,u,7>: Cost 3 vsldoi8 <u,7,5,0>, <u,7,5,0> + 1680491219U, // <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 2713927680U, // <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0> + 1640185958U, // <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS + 2310607866U, // <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2> + 3787669756U, // <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0> + 2713928018U, // <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5> + 2306621778U, // <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5> + 3787670006U, // <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7> + 3736188301U, // <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0> + 1640186525U, // <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS + 2650505318U, // <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS + 2754233140U, // <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1> + 2311276694U, // <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2> + 2311278315U, // <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3> + 2758435667U, // <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5> + 2754233180U, // <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5> + 3385016497U, // <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6> + 2311278643U, // <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7> + 2758730615U, // <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5> + 3700367462U, // <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS + 3830629255U, // <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3> + 2713929320U, // <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2> + 2754233238U, // <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0> + 2759099300U, // <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5> + 2754233259U, // <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3> + 2713929658U, // <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7> + 3872359354U, // <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0> + 2754233283U, // <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0> + 2713929878U, // <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2> + 3363135498U, // <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1> + 3363137686U, // <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2> + 2713930140U, // <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3> + 2713930242U, // <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6> + 2289394002U, // <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5> + 3787672184U, // <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7> + 3787672259U, // <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1> + 2713930526U, // <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2> + 1634880402U, // <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1> + 2760205355U, // <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5> + 2760279092U, // <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5> + 3787672708U, // <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0> + 2713930960U, // <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4> + 1640189238U, // <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS + 3786345848U, // <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1> + 3787009481U, // <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1> + 1640189466U, // <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1> + 2754233455U, // <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1> + 2713931407U, // <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1> + 2713931499U, // <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3> + 3827975305U, // <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0> + 2754233495U, // <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5> + 2288746834U, // <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5> + 2713931827U, // <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7> + 3787673725U, // <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0> + 2754233527U, // <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1> + 2668462182U, // <5,1,6,0>: Cost 3 vsldoi4 <u,5,1,6>, LHS + 2290746002U, // <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1> + 2302691478U, // <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2> + 3364488071U, // <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3> + 2302689536U, // <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4> + 2754233587U, // <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7> + 2713932600U, // <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6> + 2713932622U, // <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1> + 2302689297U, // <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u> + 2713932794U, // <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2> + 3365822474U, // <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1> + 3365824662U, // <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2> + 3787674851U, // <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1> + 2713933158U, // <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6> + 2292080978U, // <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5> + 3365823613U, // <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6> + 2713933420U, // <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7> + 2713933442U, // <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2> + 1658771190U, // <5,1,u,0>: Cost 2 vsldoi8 <u,0,5,1>, <u,0,5,1> + 1640191790U, // <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS + 2762933624U, // <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5> + 2754233724U, // <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0> + 2763081098U, // <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5> + 1640192154U, // <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS + 2713934032U, // <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, <u,6,3,7> + 2713934080U, // <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, <u,7,0,1> + 1640192357U, // <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS + 3779051520U, // <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0> + 2705309798U, // <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS + 3838813637U, // <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1> + 2302640230U, // <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS + 3765117266U, // <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5> + 3381027892U, // <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5> + 3842794985U, // <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1> + 3408232554U, // <5,2,0,7>: Cost 4 vmrglw <u,7,5,0>, <0,1,2,7> + 2302640235U, // <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS + 3700432998U, // <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS + 3765117785U, // <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2> + 2311276136U, // <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2> + 1237532774U, // <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS + 3700436278U, // <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS + 3381036084U, // <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5> + 3385018045U, // <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6> + 3385017560U, // <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7> + 1237532779U, // <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS + 3700441190U, // <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS + 3700442242U, // <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2> + 2754233960U, // <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2> + 2754233970U, // <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3> + 2765071997U, // <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5> + 3834021508U, // <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3> + 3842795152U, // <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6> + 3376402492U, // <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7> + 2754234015U, // <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3> + 2754234022U, // <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1> + 3827975855U, // <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1> + 2644625102U, // <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5> + 2289393766U, // <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS + 1691993806U, // <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5> + 2785052375U, // <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5> + 3854812897U, // <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6> + 3802942187U, // <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5> + 1692288754U, // <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5> + 3839846139U, // <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5> + 2709294052U, // <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2> + 2766251789U, // <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5> + 2765735702U, // <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5> + 3840141087U, // <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5> + 2705313078U, // <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS + 2712612217U, // <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2> + 3787017674U, // <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2> + 2765735747U, // <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5> + 3834021704U, // <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1> + 3834021714U, // <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2> + 2311308904U, // <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2> + 1237565542U, // <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS + 3834021744U, // <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5> + 3369124916U, // <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5> + 2248181690U, // <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7> + 3786354825U, // <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3> + 1237565547U, // <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS + 3700473958U, // <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS + 3700475014U, // <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6> + 2296718952U, // <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2> + 1228947558U, // <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS + 3700477238U, // <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS + 3834021836U, // <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7> + 2248951738U, // <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7> + 3370461105U, // <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7> + 1228947563U, // <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS + 3786355706U, // <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2> + 3783038037U, // <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3> + 3365824104U, // <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2> + 2292080742U, // <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS + 3842131986U, // <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5> + 3371795508U, // <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5> + 3786356206U, // <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7> + 3786356332U, // <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7> + 2292080747U, // <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS + 2754234427U, // <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1> + 2705315630U, // <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS + 2296735336U, // <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2> + 1228963942U, // <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS + 1695311971U, // <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5> + 2705315994U, // <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS + 2769201269U, // <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5> + 3370477489U, // <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7> + 1695606919U, // <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5> + 3827976331U, // <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0> + 2754234518U, // <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2> + 3706472290U, // <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0> + 3700500630U, // <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2> + 2754234544U, // <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1> + 3376383766U, // <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5> + 3769770513U, // <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7> + 3376383930U, // <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7> + 2754234581U, // <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2> + 2311275414U, // <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0> + 2305967971U, // <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1> + 2692047787U, // <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3> + 2311276146U, // <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3> + 2311275418U, // <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4> + 3765789807U, // <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1> + 3765789939U, // <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7> + 2311276474U, // <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7> + 2696029585U, // <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3> + 2311288709U, // <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0> + 3765790243U, // <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5> + 3827976513U, // <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2> + 2765736268U, // <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4> + 2246248962U, // <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6> + 3765790563U, // <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1> + 3827976550U, // <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3> + 3842795887U, // <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3> + 2769054073U, // <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4> + 3827976575U, // <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1> + 3765790963U, // <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5> + 3839478162U, // <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2> + 2754234780U, // <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3> + 2771708327U, // <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5> + 3363137059U, // <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5> + 3375081320U, // <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6> + 3363137466U, // <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7> + 2772003275U, // <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5> + 2772077012U, // <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5> + 3765791714U, // <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0> + 2709965878U, // <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3> + 2772298223U, // <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5> + 2772371960U, // <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5> + 2754234882U, // <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6> + 3839478282U, // <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5> + 3376416698U, // <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7> + 2754234909U, // <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6> + 2311308182U, // <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0> + 3765792421U, // <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5> + 2715938575U, // <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3> + 2311308914U, // <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3> + 2311308186U, // <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4> + 2248182354U, // <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5> + 3765792837U, // <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7> + 2311309242U, // <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7> + 2311308190U, // <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u> + 2632777830U, // <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS + 3706520372U, // <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1> + 2632779624U, // <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6> + 2632780290U, // <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6> + 2632781110U, // <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS + 2248952413U, // <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7> + 2302691176U, // <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6> + 2302691258U, // <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7> + 2632783662U, // <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS + 3365823382U, // <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0> + 3706529011U, // <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7> + 3706529641U, // <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7> + 3365824114U, // <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3> + 2774362859U, // <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5> + 3365824035U, // <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5> + 3383740183U, // <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6> + 3363833786U, // <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7> + 2774657807U, // <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5> + 2632794214U, // <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS + 2754235166U, // <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2> + 2632796010U, // <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u> + 2632796676U, // <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u> + 2632797494U, // <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS + 2754235206U, // <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6> + 2302691176U, // <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6> + 2302707642U, // <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7> + 2754235229U, // <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2> + 3765133325U, // <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4> + 2705326182U, // <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS + 3718489806U, // <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5> + 3718490624U, // <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4> + 2709307730U, // <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5> + 2302641870U, // <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5> + 3376383695U, // <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6> + 3384351018U, // <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, <u,7,4,7> + 2705326749U, // <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS + 2305971057U, // <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0> + 3765134171U, // <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4> + 3766461338U, // <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4> + 3766461437U, // <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4> + 2311277776U, // <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4> + 2754235362U, // <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0> + 3783050483U, // <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7> + 3385019036U, // <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7> + 2311276241U, // <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u> + 3718504550U, // <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS + 3783050787U, // <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5> + 3773097576U, // <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2> + 2705327822U, // <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5> + 3773097767U, // <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4> + 2765737014U, // <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3> + 3779069882U, // <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7> + 3376401052U, // <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7> + 2245881370U, // <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1> + 3779070102U, // <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2> + 3363135525U, // <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1> + 3779070284U, // <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4> + 3779070364U, // <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3> + 2705328640U, // <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4> + 2307311310U, // <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5> + 3866021012U, // <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7> + 3363138204U, // <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7> + 2707983172U, // <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4> + 2708646805U, // <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4> + 2709310438U, // <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4> + 3779071030U, // <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3> + 2710637704U, // <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4> + 2754235600U, // <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4> + 1704676570U, // <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5> + 3779071358U, // <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7> + 2713292236U, // <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4> + 1704897781U, // <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5> + 2626871398U, // <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS + 2626872471U, // <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5> + 2765737230U, // <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3> + 3700615318U, // <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2> + 2626874678U, // <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS + 1174441270U, // <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS + 1680493878U, // <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 3385051804U, // <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7> + 1680493896U, // <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 2248952722U, // <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1> + 2302692152U, // <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1> + 3382406107U, // <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2> + 3700623874U, // <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6> + 2248953040U, // <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4> + 1175211318U, // <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS + 3376432280U, // <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6> + 2729218934U, // <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5> + 1175211561U, // <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS + 3787035642U, // <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2> + 3365822501U, // <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1> + 3808933085U, // <5,4,7,2>: Cost 4 vsldoi8 <u,4,5,4>, <7,2,u,4> + 3784381707U, // <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5> + 2713294182U, // <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6> + 2309998286U, // <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5> + 3383740111U, // <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6> + 3787036239U, // <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5> + 2731873960U, // <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4> + 2626895974U, // <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS + 2626897050U, // <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u> + 2644813518U, // <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5> + 2705327822U, // <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5> + 2626899254U, // <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS + 1707331102U, // <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5> + 1680494121U, // <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 2737183024U, // <5,4,u,7>: Cost 3 vsldoi8 <u,7,5,4>, <u,7,5,4> + 1680494139U, // <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 2302642684U, // <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0> + 1640218726U, // <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS + 3376384510U, // <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2> + 3376385078U, // <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3> + 2754236002U, // <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1> + 2717942242U, // <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5> + 2244907106U, // <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0> + 3376385406U, // <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7> + 1640219293U, // <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS + 2305969365U, // <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0> + 1237536282U, // <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1> + 2713961366U, // <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0> + 3766469630U, // <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5> + 2782326455U, // <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5> + 2311277786U, // <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5> + 2311277058U, // <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6> + 3385017587U, // <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7> + 1237536282U, // <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1> + 3376400892U, // <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0> + 3827977963U, // <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3> + 2302659070U, // <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2> + 2765737726U, // <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4> + 3839479558U, // <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3> + 2781073167U, // <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3> + 2713962426U, // <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7> + 3376401790U, // <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7> + 2769055531U, // <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4> + 2713962646U, // <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2> + 3765143786U, // <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5> + 3839479621U, // <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3> + 2289394603U, // <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3> + 2713963010U, // <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6> + 2313285150U, // <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5> + 3363138050U, // <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6> + 3363136755U, // <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7> + 2713963294U, // <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2> + 2713963410U, // <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1> + 3827978127U, // <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5> + 3839479704U, // <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5> + 3376417846U, // <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3> + 1637567706U, // <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5> + 1640222006U, // <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS + 2310640998U, // <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6> + 3376418174U, // <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7> + 1640222238U, // <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5> + 1577091174U, // <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS + 2311310226U, // <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1> + 2713964303U, // <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3> + 2311311119U, // <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3> + 1577094454U, // <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS + 296144182U, // <5,5,5,5>: Cost 1 vspltisw1 RHS + 2311309826U, // <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6> + 2311311447U, // <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7> + 296144182U, // <5,5,5,u>: Cost 1 vspltisw1 RHS + 2248953460U, // <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1> + 2326580114U, // <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1> + 2713965050U, // <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3> + 3700697602U, // <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6> + 2785644620U, // <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5> + 2781073495U, // <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7> + 1228950018U, // <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6> + 2713965390U, // <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1> + 1228950018U, // <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6> + 2713965562U, // <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2> + 3383741330U, // <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1> + 3718620878U, // <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5> + 3365823403U, // <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3> + 2713965926U, // <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6> + 2717947318U, // <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5> + 3365825026U, // <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6> + 2292081907U, // <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7> + 2713966210U, // <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2> + 1577091174U, // <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS + 1640224558U, // <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS + 2713966469U, // <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, <u,2,3,0> + 2713966524U, // <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, <u,3,0,1> + 1577094454U, // <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS + 296144182U, // <5,5,u,5>: Cost 1 vspltisw1 RHS + 1228950018U, // <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6> + 2713966848U, // <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, <u,7,0,1> + 296144182U, // <5,5,u,u>: Cost 1 vspltisw1 RHS + 2705342464U, // <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0> + 1631600742U, // <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS + 3773112493U, // <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2> + 2705342720U, // <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4> + 2705342802U, // <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5> + 3779084708U, // <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6> + 3779084790U, // <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7> + 2302643510U, // <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS + 1631601309U, // <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS + 3767141092U, // <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2> + 2705343284U, // <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1> + 2705343382U, // <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0> + 3779085282U, // <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4> + 2693399632U, // <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6> + 3767805089U, // <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6> + 2311279416U, // <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6> + 1237536054U, // <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS + 1237536055U, // <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS + 3773113789U, // <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2> + 3779085855U, // <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1> + 2699372136U, // <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2> + 2705344166U, // <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1> + 2699372329U, // <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6> + 2705344360U, // <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6> + 2705344442U, // <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7> + 2302659894U, // <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS + 2702026861U, // <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6> + 2705344662U, // <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2> + 3767142661U, // <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5> + 3773114689U, // <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2> + 2705344924U, // <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3> + 1631603202U, // <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6> + 3842945597U, // <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7> + 3779086962U, // <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1> + 2289397046U, // <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS + 1634257734U, // <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6> + 2644926566U, // <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS + 3779087306U, // <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3> + 2790142577U, // <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5> + 2644929026U, // <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6> + 2711317723U, // <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6> + 1631604022U, // <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS + 2712644989U, // <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6> + 2302676278U, // <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS + 1631604265U, // <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS + 3842945708U, // <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1> + 3767144133U, // <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1> + 2705346328U, // <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3> + 3779088207U, // <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4> + 2717290420U, // <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6> + 2705346574U, // <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6> + 2705346596U, // <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1> + 1237568822U, // <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS + 1237568823U, // <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS + 2650914918U, // <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS + 3364490949U, // <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1> + 2248954362U, // <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3> + 2302693144U, // <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3> + 2650918198U, // <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS + 2650918926U, // <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6> + 2302693390U, // <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6> + 1228950838U, // <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS + 1228950839U, // <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS + 497467494U, // <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS + 1571210036U, // <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1> + 1571210856U, // <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2> + 1571211414U, // <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2> + 497470774U, // <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS + 1571213316U, // <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5> + 1571213818U, // <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3> + 1571214956U, // <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7> + 497473326U, // <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS + 497475686U, // <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS + 1631606574U, // <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS + 1571219048U, // <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2> + 1571219606U, // <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2> + 497478967U, // <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS + 1631606938U, // <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS + 1571222010U, // <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3> + 1228967222U, // <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS + 497481518U, // <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS + 3768475648U, // <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0> + 2694733926U, // <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS + 3718711395U, // <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5> + 3384349178U, // <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3> + 2694734162U, // <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5> + 3384347884U, // <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5> + 3730658026U, // <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0> + 3718714362U, // <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2> + 2694734493U, // <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS + 2311278690U, // <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0> + 2305970923U, // <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1> + 3768476566U, // <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0> + 2311279098U, // <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3> + 2311278694U, // <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4> + 3768476783U, // <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1> + 2694735091U, // <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7> + 2311279426U, // <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7> + 2696062357U, // <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7> + 3383701602U, // <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0> + 3768477219U, // <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5> + 3768477288U, // <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2> + 2309960186U, // <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3> + 3383701606U, // <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4> + 3768477545U, // <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7> + 3766486970U, // <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7> + 3383702338U, // <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7> + 2309960186U, // <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3> + 3768477846U, // <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2> + 3768477975U, // <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5> + 3786393932U, // <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4> + 3768478108U, // <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3> + 2795599115U, // <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5> + 3385037470U, // <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5> + 3780422309U, // <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7> + 3848107301U, // <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4> + 2795894063U, // <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5> + 2795967800U, // <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5> + 3768478690U, // <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0> + 3718744163U, // <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5> + 3784404107U, // <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7> + 2796262748U, // <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5> + 2694737206U, // <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS + 2712653182U, // <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7> + 2713316815U, // <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7> + 2694737449U, // <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS + 2311311458U, // <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0> + 3768479433U, // <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5> + 3768479521U, // <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3> + 2311311866U, // <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3> + 2311311462U, // <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4> + 2248185270U, // <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5> + 2718625879U, // <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7> + 2311312194U, // <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7> + 2311311466U, // <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u> + 2248954874U, // <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2> + 3322696778U, // <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1> + 2248955028U, // <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3> + 2656963074U, // <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6> + 2248955238U, // <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6> + 2248955329U, // <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7> + 2656965360U, // <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6> + 2248955500U, // <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7> + 2248955522U, // <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2> + 3718766694U, // <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS + 3724739827U, // <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7> + 3718768739U, // <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5> + 3365826337U, // <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3> + 2798253647U, // <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5> + 3365826258U, // <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5> + 3730715377U, // <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7> + 2310665836U, // <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7> + 2798548595U, // <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5> + 2311336034U, // <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0> + 2694739758U, // <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS + 2248955028U, // <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3> + 2311336442U, // <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3> + 2311336038U, // <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4> + 2694740122U, // <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS + 2656981746U, // <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u> + 2311336770U, // <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7> + 2694740325U, // <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS + 2705358848U, // <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0> + 1631617126U, // <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS + 2310607866U, // <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2> + 2302640284U, // <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS + 2754238189U, // <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <u,0,4,1> + 2305296114U, // <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5> + 2244907106U, // <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0> + 2302643528U, // <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS + 1631617693U, // <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS + 2627133542U, // <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS + 1237536282U, // <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1> + 1680496430U, // <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 1237532828U, // <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS + 2693416018U, // <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u> + 2756892486U, // <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, <u,1,5,0> + 2694743284U, // <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u> + 1237536072U, // <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS + 1680496484U, // <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 2311288709U, // <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0> + 2245883694U, // <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS + 2699388520U, // <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2> + 2754238344U, // <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,3,3> + 2699388715U, // <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u> + 2757408666U, // <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, <u,2,5,3> + 2705360826U, // <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7> + 2302659912U, // <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS + 2754238389U, // <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,u,3> + 2754238396U, // <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <u,3,0,1> + 3827980229U, // <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <u,3,1,1> + 2644625102U, // <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5> + 2289393820U, // <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS + 1631619588U, // <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u> + 2785056749U, // <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <u,3,5,5> + 3363138077U, // <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6> + 2289397064U, // <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS + 1634274120U, // <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u> + 1634937753U, // <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u> + 1728272410U, // <5,u,4,1>: Cost 2 vsldoi12 <u,4,1,5>, <u,4,1,5> + 2710006843U, // <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u> + 2765740076U, // <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <u,4,3,5> + 1637592285U, // <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u> + 1631620406U, // <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS + 2712661375U, // <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u> + 2302676296U, // <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS + 1631620649U, // <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS + 1577091174U, // <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS + 1174443822U, // <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS + 2766035058U, // <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, <u,5,2,3> + 1237565596U, // <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS + 1577094454U, // <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS + 296144182U, // <5,u,5,5>: Cost 1 vspltisw1 RHS + 1680496794U, // <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 1237568840U, // <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS + 296144182U, // <5,u,5,u>: Cost 1 vspltisw1 RHS + 2633146470U, // <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS + 1175213870U, // <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS + 2633148309U, // <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6> + 1228947612U, // <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS + 2633149750U, // <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS + 1175214234U, // <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS + 1228950018U, // <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6> + 1228950856U, // <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS + 1228947617U, // <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS + 497614950U, // <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS + 1571357492U, // <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1> + 1571358312U, // <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2> + 1571358870U, // <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2> + 497618248U, // <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS + 1571360772U, // <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5> + 1571361274U, // <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3> + 1571361786U, // <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2> + 497620782U, // <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS + 497623142U, // <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS + 1631622958U, // <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS + 1680496997U, // <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS + 1228963996U, // <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS + 497626441U, // <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS + 296144182U, // <5,u,u,5>: Cost 1 vspltisw1 RHS + 1680497037U, // <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS + 1228967240U, // <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS + 497628974U, // <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS + 2772451328U, // <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0> + 2772451338U, // <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1> + 3771146417U, // <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6> + 3383095739U, // <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3> + 3846193189U, // <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1> + 3724832803U, // <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0> + 3383095985U, // <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6> + 3383096067U, // <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7> + 2772451401U, // <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1> + 2651095142U, // <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS + 2251612262U, // <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS + 1698709606U, // <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 2651097602U, // <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6> + 2651098422U, // <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS + 2651099172U, // <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1> + 2657071869U, // <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1> + 3724841978U, // <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2> + 1698709660U, // <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 2252292096U, // <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0> + 1178550374U, // <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS + 3826655418U, // <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6> + 3777783485U, // <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6> + 2252292434U, // <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5> + 3785746280U, // <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6> + 2252292593U, // <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2> + 3736794583U, // <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2> + 1178550941U, // <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS + 3375153152U, // <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0> + 2772451584U, // <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4> + 3777784163U, // <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0> + 3846193426U, // <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4> + 2712005122U, // <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6> + 3724857382U, // <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3> + 3802335864U, // <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7> + 3801672410U, // <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6> + 2772451647U, // <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4> + 3383123968U, // <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0> + 2772451666U, // <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5> + 3773803577U, // <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6> + 3724864002U, // <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6> + 3846193517U, // <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5> + 2712005935U, // <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0> + 3327009265U, // <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2> + 3383126648U, // <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7> + 2772451729U, // <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5> + 3373178880U, // <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0> + 2254266470U, // <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS + 3785748248U, // <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3> + 3790393190U, // <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0> + 3328000338U, // <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5> + 3785748494U, // <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6> + 3785748516U, // <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1> + 3379153528U, // <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7> + 2254267037U, // <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS + 2254897152U, // <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0> + 1181155430U, // <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS + 3785748923U, // <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3> + 3785749042U, // <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5> + 2254897490U, // <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5> + 3785749169U, // <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6> + 2724614962U, // <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0> + 3787739982U, // <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1> + 1181155997U, // <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS + 1235664896U, // <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0> + 1235666598U, // <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1> + 3712943720U, // <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2> + 2639202936U, // <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7> + 2639203638U, // <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS + 2309409236U, // <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5> + 3712946517U, // <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0> + 2309409400U, // <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7> + 1235666605U, // <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u> + 1235673088U, // <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0> + 1235674790U, // <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1> + 1698710173U, // <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 2639211129U, // <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u> + 2639211830U, // <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS + 2712008858U, // <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS + 2657129220U, // <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u> + 2309417592U, // <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7> + 1698710227U, // <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 3775799296U, // <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0> + 2702057574U, // <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS + 3373143763U, // <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, <u,0,1,2> + 3695045122U, // <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6> + 3775799634U, // <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5> + 3383091538U, // <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5> + 3368493233U, // <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6> + 3362522319U, // <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7> + 2702058141U, // <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS + 3834250027U, // <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1> + 2772452148U, // <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1> + 3832038210U, // <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6> + 3373150660U, // <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3> + 3834250067U, // <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5> + 3373146450U, // <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5> + 3826656102U, // <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6> + 3362530511U, // <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7> + 2772452148U, // <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1> + 2669092966U, // <6,1,2,0>: Cost 3 vsldoi4 <u,6,1,2>, LHS + 2252292916U, // <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1> + 2252293014U, // <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0> + 2772452246U, // <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0> + 2669096246U, // <6,1,2,4>: Cost 3 vsldoi4 <u,6,1,2>, RHS + 3846194091U, // <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3> + 2702059450U, // <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7> + 3870081978U, // <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0> + 2702059633U, // <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1> + 3775801494U, // <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2> + 3777128723U, // <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1> + 3775801702U, // <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3> + 3775801756U, // <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3> + 3775801858U, // <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6> + 3375153490U, // <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5> + 3826656265U, // <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7> + 3775802051U, // <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1> + 3775802142U, // <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2> + 3846194206U, // <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1> + 3846194219U, // <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5> + 3846194228U, // <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5> + 3846194236U, // <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4> + 3846194246U, // <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5> + 2760508496U, // <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6> + 3368526001U, // <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6> + 3870082144U, // <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4> + 2760729707U, // <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6> + 2714668660U, // <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1> + 3834619005U, // <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6> + 3834692742U, // <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6> + 3846194317U, // <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4> + 3834840216U, // <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6> + 3834913953U, // <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6> + 2719977570U, // <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0> + 3367208143U, // <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7> + 2719977724U, // <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1> + 2669125734U, // <6,1,6,0>: Cost 3 vsldoi4 <u,6,1,6>, LHS + 2254897972U, // <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1> + 2254898070U, // <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0> + 3775803929U, // <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7> + 2669129014U, // <6,1,6,4>: Cost 3 vsldoi4 <u,6,1,6>, RHS + 2322006354U, // <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5> + 2725950264U, // <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6> + 3793720142U, // <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1> + 2254898556U, // <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0> + 2627330150U, // <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS + 1235664906U, // <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1> + 1235667094U, // <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2> + 2309406894U, // <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3> + 2627333430U, // <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS + 1235665234U, // <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5> + 2309406897U, // <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6> + 2309407222U, // <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7> + 1235664913U, // <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u> + 2627338342U, // <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS + 1235673098U, // <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1> + 1235675286U, // <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2> + 2772452732U, // <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0> + 2627341622U, // <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS + 1235673426U, // <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5> + 2309415089U, // <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6> + 2309415414U, // <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7> + 1235673105U, // <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u> + 3324683725U, // <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0> + 2725290086U, // <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS + 3771162801U, // <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6> + 2309349478U, // <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS + 3730951478U, // <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS + 3840738784U, // <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1> + 3842655721U, // <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1> + 3736925671U, // <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0> + 2309349483U, // <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS + 3367840468U, // <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0> + 3325355551U, // <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1> + 3373147752U, // <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2> + 2299404390U, // <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS + 3701099830U, // <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS + 3767846054U, // <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2> + 3826656825U, // <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0> + 3373147838U, // <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7> + 2299404395U, // <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS + 2657222758U, // <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS + 3771164219U, // <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2> + 2766481000U, // <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2> + 2772452978U, // <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3> + 2657226038U, // <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS + 3790407528U, // <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6> + 2252294074U, // <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7> + 2252294148U, // <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0> + 2772453023U, // <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3> + 2772453030U, // <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1> + 3834250930U, // <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4> + 2765596349U, // <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6> + 2301411430U, // <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS + 2772453070U, // <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5> + 2765817560U, // <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6> + 2252933050U, // <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7> + 2796340968U, // <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4> + 2766038771U, // <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6> + 3725008998U, // <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS + 3368530217U, // <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1> + 3840222989U, // <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5> + 2309382246U, // <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS + 3725012278U, // <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS + 2766481193U, // <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6> + 3842656049U, // <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5> + 3327010820U, // <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0> + 2766702404U, // <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6> + 3713073254U, // <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS + 3789082310U, // <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2> + 3840665439U, // <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6> + 2766997352U, // <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6> + 3713076534U, // <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS + 3791736842U, // <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2> + 3373180605U, // <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6> + 3793064108U, // <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2> + 2767366037U, // <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6> + 3701137510U, // <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS + 3701138647U, // <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6> + 2254898792U, // <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2> + 1248264294U, // <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS + 3701140790U, // <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS + 3725029435U, // <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6> + 2254899130U, // <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7> + 2725294981U, // <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2> + 1248264299U, // <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS + 2633375846U, // <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS + 2309407468U, // <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1> + 1235666536U, // <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2> + 161923174U, // <6,2,7,3>: Cost 1 vmrglw RHS, LHS + 2633379126U, // <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS + 2309407796U, // <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5> + 2309408445U, // <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6> + 2309407960U, // <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7> + 161923179U, // <6,2,7,u>: Cost 1 vmrglw RHS, LHS + 2633384038U, // <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS + 2309415660U, // <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1> + 1235674728U, // <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2> + 161931366U, // <6,2,u,3>: Cost 1 vmrglw RHS, LHS + 2633387318U, // <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS + 2769135725U, // <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6> + 2309416637U, // <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6> + 2309416152U, // <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7> + 161931371U, // <6,2,u,u>: Cost 1 vmrglw RHS, LHS + 3777806336U, // <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0> + 2704064614U, // <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS + 3765862577U, // <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6> + 3843393708U, // <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6> + 2250516994U, // <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6> + 3725054014U, // <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0> + 3383093096U, // <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6> + 3368495034U, // <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7> + 2704065181U, // <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS + 2251622550U, // <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2> + 3777807156U, // <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1> + 3765863348U, // <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3> + 3373147762U, // <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3> + 3834251525U, // <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5> + 3373147683U, // <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5> + 3391727545U, // <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6> + 2299406266U, // <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7> + 2251622550U, // <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2> + 2252294294U, // <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2> + 3326036198U, // <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1> + 3771836045U, // <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3> + 2252294556U, // <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3> + 2252294658U, // <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6> + 3840739677U, // <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3> + 2704066490U, // <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7> + 3368511418U, // <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7> + 2252294942U, // <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2> + 3707158630U, // <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS + 3765864692U, // <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6> + 2704066918U, // <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3> + 2772453788U, // <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3> + 2772453799U, // <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5> + 3789752888U, // <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6> + 3840739770U, // <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6> + 2301413306U, // <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7> + 2775108043U, // <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5> + 2651340902U, // <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS + 3846195674U, // <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2> + 3845974503U, // <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6> + 2651343362U, // <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6> + 2651344182U, // <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS + 1698712066U, // <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6> + 3383125864U, // <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6> + 3368527802U, // <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7> + 1698933277U, // <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6> + 3373179798U, // <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0> + 3707176179U, // <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7> + 2716012312U, // <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3> + 3373180530U, // <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3> + 2254309890U, // <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6> + 3785773070U, // <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6> + 3840739932U, // <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6> + 2299439034U, // <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7> + 2719994110U, // <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3> + 2254899350U, // <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2> + 3328641254U, // <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1> + 2633443257U, // <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6> + 2254899612U, // <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3> + 2254899714U, // <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6> + 3785773772U, // <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6> + 2725966648U, // <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6> + 2322007994U, // <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7> + 2254899998U, // <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2> + 1559707750U, // <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS + 2633450292U, // <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1> + 1559709626U, // <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7> + 1235666546U, // <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3> + 1559711030U, // <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS + 2309408291U, // <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5> + 2633454152U, // <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0> + 1235666874U, // <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7> + 1559713582U, // <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS + 1559715942U, // <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS + 2633458484U, // <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1> + 1559717819U, // <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u> + 1235674738U, // <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3> + 1559719222U, // <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS + 1701366598U, // <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6> + 2633462353U, // <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0> + 1235675066U, // <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7> + 1559721774U, // <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS + 3785777152U, // <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0> + 2712035430U, // <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS + 3771179185U, // <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6> + 3846196096U, // <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1> + 3785777490U, // <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5> + 2250517814U, // <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS + 3324259703U, // <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0> + 3383092458U, // <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7> + 2712035997U, // <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS + 3325356946U, // <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1> + 3785777972U, // <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1> + 3846196170U, // <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3> + 3325365380U, // <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0> + 3852168155U, // <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2> + 2251615542U, // <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS + 3325357432U, // <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1> + 3870084088U, // <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4> + 2251615785U, // <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS + 2252295058U, // <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1> + 3771180605U, // <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4> + 3785778792U, // <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2> + 3777816253U, // <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6> + 2252295376U, // <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4> + 1178553654U, // <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS + 2252295545U, // <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2> + 3326037448U, // <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0> + 1178553897U, // <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS + 3785779350U, // <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2> + 3383118648U, // <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1> + 3777816935U, // <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4> + 3785779612U, // <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3> + 2712037890U, // <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6> + 2252754230U, // <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS + 3784452764U, // <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7> + 3801705178U, // <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6> + 2252754473U, // <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS + 3787770770U, // <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1> + 3383126840U, // <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1> + 3327380534U, // <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3> + 3784453265U, // <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4> + 2253630672U, // <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4> + 2778426587U, // <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6> + 3383128789U, // <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6> + 3381799580U, // <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7> + 2778647798U, // <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6> + 2651422822U, // <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS + 3701277928U, // <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5> + 3701278650U, // <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7> + 2651425282U, // <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6> + 2651426102U, // <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS + 2651426892U, // <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5> + 1698712886U, // <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 3725169658U, // <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2> + 1698712904U, // <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 2254900114U, // <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1> + 3389115192U, // <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1> + 3785781727U, // <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3> + 3785781810U, // <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5> + 2254900432U, // <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4> + 1181158710U, // <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS + 2254900605U, // <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6> + 3787772750U, // <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1> + 1181158953U, // <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS + 2639495270U, // <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS + 2639496090U, // <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4> + 3707267011U, // <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7> + 2639497884U, // <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7> + 1237658832U, // <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4> + 1235666638U, // <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5> + 3713241753U, // <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0> + 2309409436U, // <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7> + 1235666641U, // <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u> + 2639503462U, // <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS + 2639504282U, // <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4> + 3701303226U, // <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7> + 2639506077U, // <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u> + 1235676368U, // <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4> + 1235674830U, // <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5> + 1698713129U, // <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 2309417628U, // <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7> + 1698713147U, // <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 3775832064U, // <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0> + 2702090342U, // <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS + 3775832241U, // <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6> + 3719227906U, // <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6> + 3775832402U, // <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5> + 3385085146U, // <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5> + 2309351938U, // <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6> + 3376459134U, // <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7> + 2702090909U, // <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS + 3719233546U, // <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1> + 3775832884U, // <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1> + 3775832982U, // <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0> + 3846196909U, // <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4> + 3719236984U, // <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1> + 3856150209U, // <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6> + 3834252997U, // <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1> + 3870084817U, // <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4> + 3769861532U, // <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5> + 2645500006U, // <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS + 3719242548U, // <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1> + 3775833704U, // <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2> + 3775833766U, // <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1> + 2645503353U, // <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2> + 2252296196U, // <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5> + 2702092218U, // <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7> + 3719246842U, // <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2> + 2702092405U, // <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5> + 3775834262U, // <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2> + 3777161495U, // <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5> + 3775834470U, // <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3> + 3775834524U, // <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3> + 3775834626U, // <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6> + 3385109722U, // <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5> + 2309376514U, // <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6> + 3775834819U, // <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1> + 2309376514U, // <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6> + 3719258214U, // <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS + 3385117586U, // <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1> + 3327242008U, // <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3> + 3719260674U, // <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6> + 3719261563U, // <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4> + 2702093622U, // <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS + 2309384706U, // <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6> + 3870085060U, // <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4> + 2702093865U, // <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS + 3719266406U, // <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS + 3789106889U, // <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5> + 3785789208U, // <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3> + 3373183950U, // <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3> + 2717355964U, // <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5> + 2791772164U, // <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5> + 2772455438U, // <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6> + 3373183549U, // <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7> + 2720010496U, // <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5> + 2772455460U, // <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1> + 2322008978U, // <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1> + 3840225335U, // <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2> + 2772455490U, // <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4> + 2772455500U, // <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5> + 2254901252U, // <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5> + 2772455520U, // <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7> + 2785874024U, // <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6> + 2772455532U, // <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1> + 2627625062U, // <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS + 1235667858U, // <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1> + 2309409278U, // <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2> + 2309407659U, // <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3> + 2627628342U, // <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS + 1235668186U, // <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5> + 1235667458U, // <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6> + 2309407987U, // <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7> + 1235667460U, // <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u> + 2627633254U, // <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS + 1235676050U, // <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1> + 2309417470U, // <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2> + 2309415851U, // <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3> + 2627636534U, // <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS + 1235676378U, // <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5> + 1235675650U, // <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6> + 2309416179U, // <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7> + 1235675652U, // <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u> + 2309352751U, // <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0> + 1650917478U, // <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS + 2250584570U, // <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3> + 3846197554U, // <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1> + 2724659538U, // <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5> + 3725275225U, // <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0> + 2791772493U, // <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1> + 2309352758U, // <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS + 1650918045U, // <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS + 3325358368U, // <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1> + 2299406449U, // <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1> + 2724660118U, // <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0> + 3373148518U, // <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3> + 3834253712U, // <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5> + 3373147953U, // <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5> + 2323297080U, // <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6> + 2299407670U, // <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS + 2299407671U, // <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS + 2252296489U, // <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1> + 3326038394U, // <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1> + 1178554874U, // <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3> + 2724660902U, // <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1> + 2252296817U, // <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5> + 3840741864U, // <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3> + 2252296976U, // <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2> + 2785874426U, // <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3> + 1178554874U, // <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3> + 2724661398U, // <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2> + 3375154665U, // <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1> + 3375154909U, // <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2> + 2301413734U, // <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3> + 2772455986U, // <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5> + 3375154993U, // <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5> + 2323313464U, // <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6> + 2301414710U, // <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS + 2301414711U, // <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS + 2724662162U, // <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1> + 3326939559U, // <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1> + 2253271546U, // <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3> + 3383127346U, // <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3> + 2309385523U, // <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4> + 1650920758U, // <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS + 2724662653U, // <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6> + 2309385526U, // <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS + 1650921001U, // <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS + 3725312102U, // <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS + 3373180393U, // <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1> + 3791769368U, // <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3> + 3373181286U, // <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3> + 3725315382U, // <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS + 2299439221U, // <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5> + 2724663394U, // <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0> + 2299440438U, // <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS + 2299440439U, // <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS + 1583808614U, // <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS + 2322010445U, // <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1> + 2254574074U, // <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3> + 2322010609U, // <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3> + 1583811894U, // <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS + 2322010773U, // <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5> + 363253046U, // <6,6,6,6>: Cost 1 vspltisw2 RHS + 1248267574U, // <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS + 363253046U, // <6,6,6,u>: Cost 1 vspltisw2 RHS + 2309410095U, // <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0> + 2309408233U, // <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1> + 2311402373U, // <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2> + 2309409126U, // <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3> + 2309410099U, // <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4> + 2309408561U, // <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5> + 1237660472U, // <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6> + 161926454U, // <6,6,7,7>: Cost 1 vmrglw RHS, RHS + 161926455U, // <6,6,7,u>: Cost 1 vmrglw RHS, RHS + 1583808614U, // <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS + 1650923310U, // <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS + 1178554874U, // <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3> + 2309417318U, // <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3> + 1583811894U, // <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS + 1650923674U, // <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS + 363253046U, // <6,6,u,6>: Cost 1 vspltisw2 RHS + 161934646U, // <6,6,u,7>: Cost 1 vmrglw RHS, RHS + 161934647U, // <6,6,u,u>: Cost 1 vmrglw RHS, RHS + 1638318080U, // <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0> + 564576358U, // <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS + 2712060077U, // <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2> + 2712060156U, // <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0> + 1638318418U, // <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5> + 1577865314U, // <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0> + 2712060406U, // <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7> + 2651608058U, // <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2> + 564576925U, // <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS + 2712060643U, // <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1> + 1638318900U, // <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1> + 1638318998U, // <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0> + 3766559753U, // <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7> + 2712060971U, // <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5> + 2712061039U, // <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1> + 2712061135U, // <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7> + 3373148612U, // <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7> + 1638319484U, // <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0> + 2712061373U, // <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2> + 2712061471U, // <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1> + 1638319720U, // <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2> + 1638319782U, // <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1> + 2712061709U, // <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5> + 2712061800U, // <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6> + 1638320058U, // <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7> + 2252297836U, // <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7> + 1638320187U, // <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1> + 1638320278U, // <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2> + 2712062182U, // <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1> + 2712062256U, // <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3> + 1638320540U, // <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3> + 1638320642U, // <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6> + 2712062546U, // <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5> + 2712062584U, // <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7> + 2712062659U, // <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1> + 1638320926U, // <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2> + 1638321042U, // <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1> + 2712062922U, // <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3> + 2712063029U, // <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2> + 2712063108U, // <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0> + 1638321360U, // <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4> + 564579638U, // <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS + 2712063357U, // <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6> + 2712063439U, // <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7> + 564579881U, // <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS + 2712063560U, // <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2> + 2714054287U, // <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1> + 2712063742U, // <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4> + 3373181295U, // <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3> + 2712063924U, // <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6> + 1638322180U, // <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5> + 1638322274U, // <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0> + 3373181380U, // <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7> + 1640313092U, // <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0> + 2712064289U, // <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2> + 2712064423U, // <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1> + 1638322682U, // <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3> + 2712064562U, // <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5> + 2712064653U, // <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6> + 2712064747U, // <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1> + 1638323000U, // <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6> + 1638323022U, // <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1> + 1638323168U, // <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3> + 1237659746U, // <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0> + 2309411158U, // <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1> + 2639718330U, // <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7> + 1235669498U, // <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3> + 1237659750U, // <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4> + 2309411243U, // <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5> + 1583895362U, // <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7> + 1235669826U, // <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7> + 1235669503U, // <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u> + 1638323923U, // <6,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2> + 564582190U, // <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS + 1638324101U, // <6,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0> + 1638324156U, // <6,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1> + 1638324287U, // <6,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6> + 564582554U, // <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS + 1638324432U, // <6,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7> + 1235678018U, // <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7> + 564582757U, // <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS + 1638326272U, // <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0> + 564584550U, // <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS + 2712068269U, // <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2> + 2309349532U, // <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS + 1638326610U, // <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5> + 1577939051U, // <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0> + 2712068598U, // <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7> + 2309352776U, // <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS + 564585117U, // <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS + 2712068835U, // <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1> + 1638327092U, // <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1> + 1698715438U, // <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 2299404444U, // <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS + 2712069163U, // <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5> + 2712069231U, // <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1> + 2712069327U, // <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7> + 2299407688U, // <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS + 1698715492U, // <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS + 2712069565U, // <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2> + 1178556206U, // <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS + 1638327912U, // <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2> + 1638327974U, // <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1> + 2712069901U, // <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5> + 1178556570U, // <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS + 1638328250U, // <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7> + 2252298496U, // <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, <u,7,0,1> + 1638328379U, // <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1> + 1638328470U, // <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2> + 2712070374U, // <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1> + 2704107883U, // <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u> + 1638328732U, // <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3> + 1638328834U, // <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6> + 2712070738U, // <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5> + 2712070776U, // <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7> + 2301414728U, // <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS + 1638329118U, // <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2> + 1638329234U, // <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1> + 2712071114U, // <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3> + 2712071221U, // <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2> + 2309382300U, // <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS + 1638329552U, // <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4> + 564587831U, // <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS + 2712071545U, // <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2> + 2309385544U, // <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS + 564588073U, // <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS + 2712071752U, // <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2> + 2714062479U, // <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1> + 2712071934U, // <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4> + 2299437212U, // <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS + 2712072116U, // <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6> + 1638330372U, // <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5> + 1698715802U, // <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 2299440456U, // <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS + 1698715820U, // <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS + 1583808614U, // <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS + 1181161262U, // <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS + 1638330874U, // <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3> + 1248264348U, // <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS + 1583811894U, // <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS + 1181161626U, // <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS + 363253046U, // <6,u,6,6>: Cost 1 vspltisw2 RHS + 1638331214U, // <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1> + 363253046U, // <6,u,6,u>: Cost 1 vspltisw2 RHS + 1560076390U, // <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS + 1235664969U, // <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1> + 1560078311U, // <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7> + 161923228U, // <6,u,7,3>: Cost 1 vmrglw RHS, LHS + 1560079670U, // <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS + 1235665297U, // <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5> + 1235667485U, // <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6> + 161926472U, // <6,u,7,7>: Cost 1 vmrglw RHS, RHS + 161923233U, // <6,u,7,u>: Cost 1 vmrglw RHS, LHS + 1560084582U, // <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS + 564590382U, // <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS + 1560086504U, // <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u> + 161931420U, // <6,u,u,3>: Cost 1 vmrglw RHS, LHS + 1560087862U, // <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS + 564590746U, // <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS + 363253046U, // <6,u,u,6>: Cost 1 vspltisw2 RHS + 161934664U, // <6,u,u,7>: Cost 1 vmrglw RHS, RHS + 161931425U, // <6,u,u,u>: Cost 1 vmrglw RHS, LHS + 1705426944U, // <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0> + 1705426954U, // <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1> + 3713550266U, // <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7> + 2316063892U, // <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3> + 2779168805U, // <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1> + 2663698530U, // <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0> + 2657727309U, // <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0> + 2316064220U, // <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7> + 1705427017U, // <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1> + 1583988838U, // <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS + 2779168859U, // <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1> + 631685222U, // <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS + 2639817411U, // <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1> + 1583992118U, // <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS + 2657734660U, // <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5> + 1583993678U, // <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1> + 2657735672U, // <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0> + 631685276U, // <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS + 2779168933U, // <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3> + 2767667377U, // <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6> + 2718713448U, // <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2> + 2718713510U, // <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1> + 3841409228U, // <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6> + 3852910802U, // <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3> + 2718713786U, // <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7> + 3847160036U, // <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3> + 2767667440U, // <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6> + 2718714006U, // <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2> + 2779169020U, // <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0> + 3852910853U, // <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0> + 2718714268U, // <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3> + 2718714370U, // <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6> + 2718714461U, // <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7> + 2706770608U, // <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0> + 3847160114U, // <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0> + 2779169083U, // <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0> + 2718714770U, // <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1> + 1705427282U, // <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5> + 3713583034U, // <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7> + 3713583814U, // <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4> + 2779169133U, // <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5> + 1644973366U, // <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS + 2657760081U, // <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4> + 2259468868U, // <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4> + 1705427345U, // <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5> + 2718715508U, // <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1> + 2260123750U, // <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS + 3792457451U, // <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3> + 3852911024U, // <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0> + 2718715836U, // <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5> + 2718715908U, // <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5> + 1644974178U, // <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0> + 3792457853U, // <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0> + 1646301444U, // <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0> + 2720706901U, // <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0> + 2779169270U, // <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7> + 2718716410U, // <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3> + 2722697800U, // <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0> + 3852911121U, // <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7> + 3852911130U, // <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7> + 2718716728U, // <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6> + 2718716750U, // <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1> + 2779169333U, // <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7> + 2718716922U, // <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2> + 1187872870U, // <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS + 2718717076U, // <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3> + 3847160408U, // <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6> + 2718717286U, // <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6> + 2718717377U, // <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7> + 2718717404U, // <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7> + 2718717478U, // <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0> + 1187873437U, // <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS + 1584046182U, // <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS + 1705427602U, // <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1> + 631685789U, // <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS + 2639874762U, // <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u> + 1584049462U, // <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS + 1644976282U, // <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS + 1584051029U, // <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u> + 2718718208U, // <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, <u,7,0,1> + 631685843U, // <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS + 2721374218U, // <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1> + 2779169507U, // <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1> + 2779169516U, // <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1> + 3852911348U, // <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0> + 2669743414U, // <7,1,0,4>: Cost 3 vsldoi4 <u,7,1,0>, RHS + 2316058962U, // <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5> + 2316059044U, // <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6> + 2669745146U, // <7,1,0,7>: Cost 3 vsldoi4 <u,7,1,0>, <7,0,1,2> + 2779169570U, // <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1> + 2779169579U, // <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1> + 1705427764U, // <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1> + 2779169598U, // <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2> + 3713632972U, // <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1> + 2779169619U, // <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5> + 2779169628U, // <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5> + 2657809239U, // <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1> + 3835290474U, // <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1> + 1705427764U, // <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1> + 2779169660U, // <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1> + 2779169671U, // <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3> + 2779169680U, // <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3> + 1705427862U, // <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0> + 2779169700U, // <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5> + 2779169707U, // <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3> + 2657817432U, // <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2> + 2803057594U, // <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0> + 1705427907U, // <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0> + 3776538827U, // <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1> + 2319400970U, // <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1> + 2316085398U, // <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2> + 3852911591U, // <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0> + 3852911600U, // <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0> + 2319401298U, // <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5> + 3833668617U, // <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7> + 3367265487U, // <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7> + 2319400977U, // <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u> + 2724031378U, // <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1> + 2779169835U, // <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5> + 2779169844U, // <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5> + 3852911672U, // <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0> + 2669776182U, // <7,1,4,4>: Cost 3 vsldoi4 <u,7,1,4>, RHS + 2779169872U, // <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6> + 3835290712U, // <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5> + 2669778278U, // <7,1,4,7>: Cost 3 vsldoi4 <u,7,1,4>, <7,4,5,6> + 2779169898U, // <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5> + 2779169903U, // <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1> + 3835585661U, // <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6> + 3841410182U, // <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6> + 3852911753U, // <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0> + 2779169943U, // <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5> + 2318754130U, // <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5> + 2718724195U, // <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1> + 3859178670U, // <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1> + 2779169975U, // <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1> + 2720715094U, // <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1> + 2761549007U, // <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7> + 2779170008U, // <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7> + 3835438305U, // <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7> + 3835512042U, // <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7> + 2761843955U, // <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7> + 3835659516U, // <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7> + 2803057918U, // <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0> + 2762065166U, // <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7> + 2669797478U, // <7,1,7,0>: Cost 3 vsldoi4 <u,7,1,7>, LHS + 2322087946U, // <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1> + 2317448186U, // <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2> + 3395829934U, // <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3> + 2669800758U, // <7,1,7,4>: Cost 3 vsldoi4 <u,7,1,7>, RHS + 2322088274U, // <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5> + 3375923377U, // <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6> + 2731996780U, // <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7> + 2322087953U, // <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u> + 2779170146U, // <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1> + 1705427764U, // <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1> + 2779170164U, // <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1> + 1705428348U, // <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0> + 2779170186U, // <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5> + 2763171221U, // <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7> + 2657866590U, // <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u> + 2803058080U, // <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0> + 1705428393U, // <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0> + 3713695846U, // <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS + 2779170237U, // <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2> + 2779170245U, // <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1> + 1242316902U, // <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS + 3713699126U, // <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS + 3852912096U, // <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1> + 2767668713U, // <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1> + 2256488426U, // <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1> + 1242316907U, // <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS + 3852912132U, // <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1> + 3852912141U, // <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1> + 3852912149U, // <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0> + 2779170335U, // <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1> + 3852912172U, // <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5> + 3840747062U, // <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6> + 3841410617U, // <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0> + 3795125538U, // <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0> + 2779170380U, // <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1> + 2779170389U, // <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1> + 3852912222U, // <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1> + 1705428584U, // <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2> + 1705428594U, // <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3> + 2779170429U, // <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5> + 3852912259U, // <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2> + 2767668880U, // <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6> + 3841336981U, // <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2> + 1705428639U, // <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3> + 1705428646U, // <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1> + 2779170479U, // <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1> + 2767668925U, // <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6> + 1245659238U, // <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS + 1705428686U, // <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5> + 2779170519U, // <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5> + 2657899362U, // <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3> + 2319406574U, // <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7> + 1705428718U, // <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1> + 3713728614U, // <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS + 3852912388U, // <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5> + 2779170573U, // <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5> + 1242349670U, // <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS + 3713731894U, // <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS + 2779170601U, // <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6> + 2767669041U, // <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5> + 3389834456U, // <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7> + 1242349675U, // <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS + 3852912456U, // <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1> + 3852912466U, // <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2> + 3852912475U, // <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2> + 2779170664U, // <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6> + 3852912496U, // <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5> + 3792474116U, // <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5> + 2718732388U, // <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2> + 3841337228U, // <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6> + 2779170709U, // <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6> + 2640003174U, // <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS + 2721386920U, // <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2> + 2767595441U, // <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7> + 1693927354U, // <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7> + 2640006454U, // <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS + 3841558476U, // <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7> + 2657923941U, // <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6> + 3841337310U, // <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7> + 1694296039U, // <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7> + 2803058666U, // <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1> + 3852912632U, // <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6> + 2322089576U, // <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2> + 1248346214U, // <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS + 3841337362U, // <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5> + 3395830836U, // <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5> + 2261616570U, // <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7> + 3371943857U, // <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7> + 1248346219U, // <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS + 1705429051U, // <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1> + 2779170884U, // <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1> + 1705428584U, // <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2> + 1695254620U, // <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7> + 1705429091U, // <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5> + 2779170924U, // <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5> + 2767669361U, // <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1> + 2803058809U, // <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0> + 1695623305U, // <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7> + 2779170955U, // <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0> + 1705429142U, // <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2> + 2634057732U, // <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0> + 2779170983U, // <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1> + 2779170992U, // <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1> + 3852912829U, // <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5> + 2657948520U, // <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0> + 2316060602U, // <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7> + 1705429205U, // <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2> + 3852912860U, // <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0> + 2779171046U, // <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1> + 2779171057U, // <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3> + 3852912887U, // <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0> + 3852912896U, // <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0> + 3852912905U, // <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0> + 3835291923U, // <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1> + 3841411356U, // <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1> + 2779171111U, // <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3> + 2779171120U, // <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3> + 3852912952U, // <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2> + 2779171137U, // <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2> + 2779171144U, // <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0> + 2779171156U, // <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3> + 3852912989U, // <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3> + 2767669606U, // <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3> + 2767669615U, // <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3> + 2779171189U, // <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0> + 2779171198U, // <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0> + 3852913032U, // <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1> + 2704140655U, // <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3> + 1705429404U, // <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3> + 2779171238U, // <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4> + 3852913070U, // <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3> + 2657973099U, // <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3> + 2767669700U, // <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7> + 1705429404U, // <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3> + 2779171280U, // <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1> + 2779171290U, // <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2> + 2634090504U, // <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4> + 2779171311U, // <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5> + 2779171319U, // <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4> + 1705429506U, // <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6> + 2722057593U, // <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2> + 2316093370U, // <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7> + 1705429533U, // <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6> + 3852913185U, // <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1> + 3795799695U, // <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1> + 3852913203U, // <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1> + 3852913214U, // <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3> + 3852913225U, // <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5> + 2779171410U, // <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5> + 2718740581U, // <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3> + 3841411685U, // <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6> + 2720067847U, // <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3> + 2773420664U, // <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7> + 3847236225U, // <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7> + 1648316922U, // <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3> + 2773641875U, // <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7> + 2773715612U, // <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7> + 3847531173U, // <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7> + 2722059024U, // <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2> + 2767669943U, // <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7> + 1652298720U, // <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3> + 2767669955U, // <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1> + 3841411788U, // <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1> + 2767669978U, // <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6> + 2722059546U, // <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2> + 2767669995U, // <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5> + 3852913396U, // <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5> + 2722059758U, // <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7> + 2302183354U, // <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7> + 2767670027U, // <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1> + 2774747930U, // <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7> + 1705429790U, // <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2> + 1660262316U, // <7,3,u,2>: Cost 2 vsldoi8 <u,2,7,3>, <u,2,7,3> + 1705429404U, // <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3> + 2775042878U, // <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7> + 1705429830U, // <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6> + 2779171660U, // <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3> + 2767670101U, // <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3> + 1705429853U, // <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2> + 2718744576U, // <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0> + 1645002854U, // <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS + 3852913527U, // <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1> + 3852913536U, // <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1> + 2316061904U, // <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4> + 1705429906U, // <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1> + 2658022257U, // <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0> + 2256489928U, // <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0> + 1707420589U, // <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1> + 3852913590U, // <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1> + 2718745396U, // <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1> + 2779171786U, // <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3> + 3852913616U, // <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0> + 3852913627U, // <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2> + 2779171810U, // <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0> + 3792487631U, // <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7> + 3394456220U, // <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7> + 2779171837U, // <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0> + 3852913673U, // <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3> + 3852913682U, // <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3> + 2718746216U, // <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2> + 2718746278U, // <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1> + 2779171885U, // <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3> + 2779171893U, // <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2> + 2718746554U, // <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7> + 3847457864U, // <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3> + 2779171921U, // <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3> + 2718746774U, // <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2> + 3852913762U, // <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2> + 3852913772U, // <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3> + 2718747036U, // <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3> + 2718747138U, // <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6> + 2779171972U, // <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0> + 2706803380U, // <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4> + 3847457946U, // <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4> + 2781162655U, // <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0> + 2718747538U, // <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1> + 3852913842U, // <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1> + 3852913852U, // <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2> + 2316096696U, // <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3> + 1705430224U, // <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4> + 1705430234U, // <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5> + 2658055029U, // <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4> + 2316097024U, // <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7> + 1707420917U, // <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5> + 1584316518U, // <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS + 2658059060U, // <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1> + 2640144314U, // <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7> + 2640145131U, // <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5> + 1584319798U, // <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS + 2779172134U, // <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0> + 631688502U, // <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS + 2658063354U, // <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2> + 631688520U, // <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS + 3852914001U, // <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7> + 3852914010U, // <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7> + 2718749178U, // <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3> + 2722730572U, // <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4> + 2723394205U, // <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4> + 2779172221U, // <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6> + 2718749496U, // <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6> + 2718749518U, // <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1> + 2779172249U, // <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7> + 2718749690U, // <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2> + 3847458214U, // <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2> + 2718749880U, // <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3> + 3847458236U, // <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6> + 2718750004U, // <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1> + 1187876150U, // <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS + 2718750208U, // <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7> + 2718750286U, // <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4> + 1187876393U, // <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS + 1584341094U, // <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS + 1645008686U, // <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS + 2640168890U, // <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7> + 2640169710U, // <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u> + 1584344374U, // <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS + 1705430554U, // <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1> + 631688745U, // <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS + 2718750976U, // <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, <u,7,0,1> + 631688763U, // <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS + 2646147174U, // <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS + 2779172424U, // <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2> + 3852914258U, // <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3> + 3852914268U, // <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4> + 2779172450U, // <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1> + 2316061914U, // <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5> + 2316061186U, // <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6> + 2646152186U, // <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2> + 2779172486U, // <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1> + 2781163151U, // <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1> + 2321378194U, // <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1> + 3852914339U, // <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3> + 3852914350U, // <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5> + 2781163191U, // <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5> + 3852914363U, // <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0> + 3835588297U, // <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5> + 3835588306U, // <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5> + 2781163223U, // <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1> + 3852914400U, // <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1> + 2781163243U, // <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3> + 3852914419U, // <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2> + 2779172606U, // <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4> + 3780552497U, // <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5> + 2781163279U, // <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3> + 2779172632U, // <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3> + 3835588385U, // <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3> + 2779172650U, // <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3> + 3852914481U, // <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1> + 2319403922U, // <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1> + 2319404409U, // <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2> + 3852914510U, // <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3> + 3779226131U, // <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5> + 2319404250U, // <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5> + 2319403522U, // <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6> + 3852914547U, // <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4> + 2319403524U, // <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u> + 2646179942U, // <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS + 2316094354U, // <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1> + 3852914582U, // <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3> + 3852914592U, // <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4> + 2646183372U, // <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4> + 2779172788U, // <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6> + 2316093954U, // <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6> + 2646185318U, // <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6> + 2779172815U, // <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6> + 2781163475U, // <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1> + 2781163484U, // <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1> + 3852914662U, // <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2> + 3852914672U, // <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3> + 2781163515U, // <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5> + 1705431044U, // <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5> + 2779172878U, // <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6> + 3835588632U, // <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7> + 1705431044U, // <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5> + 2779172900U, // <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1> + 2781163571U, // <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7> + 3852914743U, // <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2> + 2779172930U, // <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4> + 2779172940U, // <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5> + 2781163607U, // <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7> + 2779172960U, // <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7> + 1705431138U, // <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0> + 1705578603U, // <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0> + 2646204518U, // <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS + 2322090898U, // <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1> + 3719947880U, // <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2> + 3719948438U, // <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2> + 2646207951U, // <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7> + 2322091226U, // <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5> + 2322090498U, // <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6> + 2646210156U, // <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7> + 2646210350U, // <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS + 2779173062U, // <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1> + 2779173072U, // <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2> + 2319404409U, // <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2> + 2779173092U, // <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4> + 2779173101U, // <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4> + 1705431044U, // <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5> + 2779173118U, // <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3> + 1705578756U, // <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0> + 1707421965U, // <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0> + 3852914966U, // <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0> + 2779173153U, // <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2> + 2256491002U, // <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3> + 3852914994U, // <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1> + 3852915003U, // <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1> + 2316062652U, // <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5> + 2316063544U, // <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6> + 1242320182U, // <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS + 1242320183U, // <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS + 3852915048U, // <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1> + 3377866217U, // <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1> + 3852915068U, // <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3> + 3833672072U, // <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6> + 3852915088U, // <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5> + 3395122056U, // <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5> + 3389813560U, // <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6> + 2779173287U, // <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1> + 2779320752U, // <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1> + 2658181222U, // <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS + 3852915140U, // <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3> + 2257973754U, // <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3> + 3841413589U, // <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2> + 2658184502U, // <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS + 3852915176U, // <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3> + 2658186117U, // <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2> + 1705431546U, // <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3> + 1705579011U, // <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3> + 3714015334U, // <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS + 3777243425U, // <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6> + 2319405957U, // <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2> + 3375229286U, // <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3> + 2779173426U, // <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5> + 3375228721U, // <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5> + 2319405880U, // <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6> + 1245662518U, // <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS + 1245662519U, // <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS + 3852915291U, // <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1> + 3389834729U, // <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1> + 2259472890U, // <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3> + 3852915321U, // <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4> + 3852915330U, // <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4> + 2779173517U, // <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6> + 2316096312U, // <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6> + 1242352950U, // <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS + 1242352951U, // <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS + 3852915372U, // <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1> + 3835294392U, // <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4> + 3852915395U, // <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6> + 3852915404U, // <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6> + 3852915412U, // <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5> + 3377899313U, // <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5> + 2718765160U, // <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6> + 2779173611U, // <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1> + 2779321076U, // <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1> + 2658213990U, // <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS + 3852915462U, // <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1> + 2718765562U, // <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3> + 3714042622U, // <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6> + 2658217270U, // <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS + 2724074224U, // <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6> + 1705431864U, // <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6> + 1705431874U, // <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7> + 1705579339U, // <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7> + 1705431886U, // <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1> + 2779173719U, // <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1> + 2779173729U, // <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2> + 2779173736U, // <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0> + 1705431926U, // <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5> + 2779173759U, // <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5> + 2779173765U, // <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2> + 1248349494U, // <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS + 1705431958U, // <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1> + 1705579423U, // <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1> + 2779173801U, // <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2> + 2779321266U, // <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2> + 2779321273U, // <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0> + 1705579463U, // <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5> + 2779173841U, // <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6> + 1705431864U, // <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6> + 1705432032U, // <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3> + 1705579495U, // <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1> + 1242320994U, // <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0> + 1705432058U, // <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2> + 3841414146U, // <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1> + 2316063226U, // <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3> + 2779173908U, // <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1> + 2658242658U, // <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0> + 2658243468U, // <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0> + 2316063554U, // <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7> + 1705432121U, // <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2> + 3852915777U, // <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1> + 2779173962U, // <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1> + 2779173973U, // <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3> + 3389813242U, // <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3> + 3852915813U, // <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1> + 3852915821U, // <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0> + 3835294839U, // <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1> + 2329343596U, // <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7> + 2779174027U, // <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3> + 2803061908U, // <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3> + 3852915869U, // <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3> + 2779174053U, // <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2> + 2779174060U, // <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0> + 2803061944U, // <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3> + 3852915905U, // <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3> + 2767672522U, // <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3> + 2791855315U, // <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3> + 2768999644U, // <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3> + 2779174115U, // <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1> + 3852915948U, // <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1> + 3841414394U, // <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6> + 1245663738U, // <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3> + 2779174155U, // <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5> + 3852915988U, // <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5> + 2706827959U, // <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7> + 2319405890U, // <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7> + 1245663738U, // <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3> + 2779174200U, // <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5> + 3852916030U, // <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2> + 3714099130U, // <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7> + 2316095994U, // <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3> + 1242353766U, // <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4> + 1705432422U, // <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6> + 2658276240U, // <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4> + 2316096322U, // <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7> + 1705432449U, // <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6> + 3852916101U, // <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1> + 3854906765U, // <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0> + 3852916121U, // <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3> + 3389846010U, // <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3> + 3852916141U, // <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5> + 2779174326U, // <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5> + 2779174337U, // <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7> + 2329376364U, // <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7> + 2779321811U, // <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7> + 2658287718U, // <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS + 3852916197U, // <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7> + 2779174382U, // <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7> + 2316112378U, // <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3> + 2658290998U, // <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS + 3852916233U, // <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7> + 1651004226U, // <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7> + 2779174420U, // <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0> + 1652331492U, // <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7> + 1590526054U, // <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS + 2328728623U, // <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1> + 2724746451U, // <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3> + 2322092538U, // <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3> + 1590529334U, // <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS + 2328728951U, // <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5> + 2724746770U, // <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7> + 430361910U, // <7,7,7,7>: Cost 1 vspltisw3 RHS + 430361910U, // <7,7,7,u>: Cost 1 vspltisw3 RHS + 1242320994U, // <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0> + 1705580162U, // <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2> + 2779321996U, // <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3> + 1245663738U, // <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3> + 1242353766U, // <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4> + 1705580202U, // <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6> + 1662949620U, // <7,7,u,6>: Cost 2 vsldoi8 <u,6,7,7>, <u,6,7,7> + 430361910U, // <7,7,u,7>: Cost 1 vspltisw3 RHS + 430361910U, // <7,7,u,u>: Cost 1 vspltisw3 RHS + 1705426944U, // <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0> + 1705432787U, // <7,u,0,1>: Cost 2 vsldoi12 RHS, <u,0,1,2> + 2316060885U, // <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2> + 1242316956U, // <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS + 2779174637U, // <7,u,0,4>: Cost 3 vsldoi12 RHS, <u,0,4,1> + 1182750874U, // <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS + 2316061213U, // <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6> + 1242320200U, // <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS + 1705432850U, // <7,u,0,u>: Cost 2 vsldoi12 RHS, <u,0,u,2> + 1584578662U, // <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS + 1705427764U, // <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1> + 631691054U, // <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS + 2640407307U, // <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1> + 1584581942U, // <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS + 2779174726U, // <7,u,1,5>: Cost 3 vsldoi12 RHS, <u,1,5,0> + 1584583574U, // <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1> + 2779322201U, // <7,u,1,7>: Cost 3 vsldoi12 RHS, <u,1,7,1> + 631691108U, // <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS + 2779174763U, // <7,u,2,0>: Cost 3 vsldoi12 RHS, <u,2,0,1> + 2779174774U, // <7,u,2,1>: Cost 3 vsldoi12 RHS, <u,2,1,3> + 1705428584U, // <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2> + 1705432965U, // <7,u,2,3>: Cost 2 vsldoi12 RHS, <u,2,3,0> + 2779174801U, // <7,u,2,4>: Cost 3 vsldoi12 RHS, <u,2,4,3> + 2779174810U, // <7,u,2,5>: Cost 3 vsldoi12 RHS, <u,2,5,3> + 2767673251U, // <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <u,2,6,3> + 1705580460U, // <7,u,2,7>: Cost 2 vsldoi12 RHS, <u,2,7,3> + 1705433010U, // <7,u,2,u>: Cost 2 vsldoi12 RHS, <u,2,u,0> + 1705433020U, // <7,u,3,0>: Cost 2 vsldoi12 RHS, <u,3,0,1> + 2779174853U, // <7,u,3,1>: Cost 3 vsldoi12 RHS, <u,3,1,1> + 2767673299U, // <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <u,3,2,6> + 1245659292U, // <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS + 1705433060U, // <7,u,3,4>: Cost 2 vsldoi12 RHS, <u,3,4,5> + 2779174893U, // <7,u,3,5>: Cost 3 vsldoi12 RHS, <u,3,5,5> + 2706836152U, // <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u> + 1245662536U, // <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS + 1705433092U, // <7,u,3,u>: Cost 2 vsldoi12 RHS, <u,3,u,1> + 2779174925U, // <7,u,4,0>: Cost 3 vsldoi12 RHS, <u,4,0,1> + 1185732398U, // <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS + 2316093653U, // <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2> + 1242349724U, // <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS + 1705430224U, // <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4> + 1705433151U, // <7,u,4,5>: Cost 2 vsldoi12 RHS, <u,4,5,6> + 2316093981U, // <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6> + 1242352968U, // <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS + 1705433178U, // <7,u,4,u>: Cost 2 vsldoi12 RHS, <u,4,u,6> + 1584611430U, // <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS + 2781165670U, // <7,u,5,1>: Cost 3 vsldoi12 RHS, <u,5,1,0> + 2640439226U, // <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7> + 2640440079U, // <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5> + 1584614710U, // <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS + 1705431044U, // <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5> + 631691418U, // <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS + 2779322525U, // <7,u,5,7>: Cost 3 vsldoi12 RHS, <u,5,7,1> + 631691436U, // <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS + 2779175087U, // <7,u,6,0>: Cost 3 vsldoi12 RHS, <u,6,0,1> + 2779175102U, // <7,u,6,1>: Cost 3 vsldoi12 RHS, <u,6,1,7> + 1648357887U, // <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u> + 1705433296U, // <7,u,6,3>: Cost 2 vsldoi12 RHS, <u,6,3,7> + 2779175127U, // <7,u,6,4>: Cost 3 vsldoi12 RHS, <u,6,4,5> + 2779175138U, // <7,u,6,5>: Cost 3 vsldoi12 RHS, <u,6,5,7> + 1651012419U, // <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u> + 1705580788U, // <7,u,6,7>: Cost 2 vsldoi12 RHS, <u,6,7,7> + 1705433341U, // <7,u,6,u>: Cost 2 vsldoi12 RHS, <u,6,u,7> + 1705580800U, // <7,u,7,0>: Cost 2 vsldoi12 RHS, <u,7,0,1> + 1187878702U, // <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS + 2768042263U, // <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, <u,7,2,6> + 1248346268U, // <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS + 1705580840U, // <7,u,7,4>: Cost 2 vsldoi12 RHS, <u,7,4,5> + 1187879066U, // <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS + 2779322679U, // <7,u,7,6>: Cost 3 vsldoi12 RHS, <u,7,6,2> + 430361910U, // <7,u,7,7>: Cost 1 vspltisw3 RHS + 430361910U, // <7,u,7,u>: Cost 1 vspltisw3 RHS + 1705433425U, // <7,u,u,0>: Cost 2 vsldoi12 RHS, <u,u,0,1> + 1705433435U, // <7,u,u,1>: Cost 2 vsldoi12 RHS, <u,u,1,2> + 631691621U, // <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS + 1705433451U, // <7,u,u,3>: Cost 2 vsldoi12 RHS, <u,u,3,0> + 1705433465U, // <7,u,u,4>: Cost 2 vsldoi12 RHS, <u,u,4,5> + 1705433475U, // <7,u,u,5>: Cost 2 vsldoi12 RHS, <u,u,5,6> + 631691661U, // <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS + 430361910U, // <7,u,u,7>: Cost 1 vspltisw3 RHS + 631691675U, // <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS + 202162278U, // <u,0,0,0>: Cost 1 vspltisw0 LHS + 1678598154U, // <u,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1> + 2634500154U, // <u,0,0,2>: Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0> + 2289596269U, // <u,0,0,3>: Cost 3 vmrglw <1,2,u,0>, <u,2,0,3> + 1548815670U, // <u,0,0,4>: Cost 2 vsldoi4 <0,u,0,0>, RHS + 2663698530U, // <u,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0> + 2658390942U, // <u,0,0,6>: Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0> + 2289596597U, // <u,0,0,7>: Cost 3 vmrglw <1,2,u,0>, <u,6,0,7> + 202162278U, // <u,0,0,u>: Cost 1 vspltisw0 LHS + 1560764518U, // <u,0,1,0>: Cost 2 vsldoi4 <2,u,0,1>, LHS + 115720294U, // <u,0,1,1>: Cost 1 vmrghw LHS, LHS + 604856427U, // <u,0,1,2>: Cost 1 vsldoi12 LHS, LHS + 2634508438U, // <u,0,1,3>: Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2> + 1560767798U, // <u,0,1,4>: Cost 2 vsldoi4 <2,u,0,1>, RHS + 2652426438U, // <u,0,1,5>: Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1> + 1584657311U, // <u,0,1,6>: Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1> + 2658399226U, // <u,0,1,7>: Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2> + 604856476U, // <u,0,1,u>: Cost 1 vsldoi12 LHS, LHS + 2696889850U, // <u,0,2,0>: Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0> + 1190174822U, // <u,0,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS + 2692245096U, // <u,0,2,2>: Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2> + 2692245158U, // <u,0,2,3>: Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1> + 2263916882U, // <u,0,2,4>: Cost 3 vmrghw <u,2,3,0>, <0,4,1,5> + 2299709908U, // <u,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5> + 2692245434U, // <u,0,2,6>: Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7> + 2701535281U, // <u,0,2,7>: Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0> + 1190175389U, // <u,0,2,u>: Cost 2 vmrghw <u,2,3,0>, LHS + 1209237504U, // <u,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0> + 1209239206U, // <u,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1> + 2704189813U, // <u,0,3,2>: Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0> + 2692245916U, // <u,0,3,3>: Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3> + 2282981033U, // <u,0,3,4>: Cost 3 vmrglw LHS, <2,3,0,4> + 2664386658U, // <u,0,3,5>: Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0> + 2691877496U, // <u,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7> + 2664388218U, // <u,0,3,7>: Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3> + 1209239213U, // <u,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u> + 2289623040U, // <u,0,4,0>: Cost 3 vmrglw <1,2,u,4>, <0,0,0,0> + 1678598482U, // <u,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5> + 2634532926U, // <u,0,4,2>: Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4> + 2235580672U, // <u,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4> + 1143619922U, // <u,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5> + 1618505014U, // <u,0,4,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS + 2658423714U, // <u,0,4,6>: Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4> + 2713259464U, // <u,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0> + 1683243409U, // <u,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5> + 1192443904U, // <u,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0> + 118702182U, // <u,0,5,1>: Cost 1 vmrghw RHS, LHS + 2266185901U, // <u,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2> + 2640513816U, // <u,0,5,3>: Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5> + 1192444242U, // <u,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5> + 2718789636U, // <u,0,5,5>: Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5> + 1645047915U, // <u,0,5,6>: Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0> + 2664404604U, // <u,0,5,7>: Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5> + 118702749U, // <u,0,5,u>: Cost 1 vmrghw RHS, LHS + 2302910464U, // <u,0,6,0>: Cost 3 vmrglw <3,4,u,6>, <0,0,0,0> + 1192886374U, // <u,0,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS + 2718790138U, // <u,0,6,2>: Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3> + 2722771537U, // <u,0,6,3>: Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0> + 2266628434U, // <u,0,6,4>: Cost 3 vmrghw <u,6,3,7>, <0,4,1,5> + 2248950180U, // <u,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6> + 2718790456U, // <u,0,6,6>: Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6> + 2718790478U, // <u,0,6,7>: Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1> + 1192886941U, // <u,0,6,u>: Cost 2 vmrghw <u,6,3,7>, LHS + 1235812352U, // <u,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0> + 1235814054U, // <u,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1> + 2728080601U, // <u,0,7,2>: Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0> + 2640530202U, // <u,0,7,3>: Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7> + 2640530742U, // <u,0,7,4>: Cost 3 vsldoi4 <3,u,0,7>, RHS + 2309556692U, // <u,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5> + 2730735133U, // <u,0,7,6>: Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0> + 2309556856U, // <u,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7> + 1235814061U, // <u,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u> + 202162278U, // <u,0,u,0>: Cost 1 vspltisw0 LHS + 120365158U, // <u,0,u,1>: Cost 1 vmrghw LHS, LHS + 604856989U, // <u,0,u,2>: Cost 1 vsldoi12 LHS, LHS + 2692249532U, // <u,0,u,3>: Cost 3 vsldoi8 <1,2,u,0>, <u,3,0,1> + 1560825142U, // <u,0,u,4>: Cost 2 vsldoi4 <2,u,0,u>, RHS + 1618507930U, // <u,0,u,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS + 1584714662U, // <u,0,u,6>: Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u> + 2309565048U, // <u,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7> + 604857043U, // <u,0,u,u>: Cost 1 vsldoi12 LHS, LHS + 1611210825U, // <u,1,0,0>: Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1> + 1616519270U, // <u,1,0,1>: Cost 2 vsldoi8 <0,u,u,1>, LHS + 2287605459U, // <u,1,0,2>: Cost 3 vmrglw <0,u,u,0>, <u,0,1,2> + 2640546588U, // <u,1,0,3>: Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0> + 2622631222U, // <u,1,0,4>: Cost 3 vsldoi4 <0,u,1,0>, RHS + 2289590610U, // <u,1,0,5>: Cost 3 vmrglw <1,2,u,0>, <0,4,1,5> + 2664436630U, // <u,1,0,6>: Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1> + 2664437376U, // <u,1,0,7>: Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0> + 1616519889U, // <u,1,0,u>: Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1> + 1548894866U, // <u,1,1,0>: Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1> + 269271142U, // <u,1,1,1>: Cost 1 vspltisw1 LHS + 1189462934U, // <u,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0> + 2622638230U, // <u,1,1,3>: Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2> + 1548897590U, // <u,1,1,4>: Cost 2 vsldoi4 <0,u,1,1>, RHS + 2756985692U, // <u,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5> + 2658472872U, // <u,1,1,6>: Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1> + 2287614142U, // <u,1,1,7>: Cost 3 vmrglw <0,u,u,1>, <u,6,1,7> + 269271142U, // <u,1,1,u>: Cost 1 vspltisw1 LHS + 1566818406U, // <u,1,2,0>: Cost 2 vsldoi4 <3,u,1,2>, LHS + 2756985735U, // <u,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3> + 1148371862U, // <u,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0> + 835584U, // <u,1,2,3>: Cost 0 copy LHS + 1566821686U, // <u,1,2,4>: Cost 2 vsldoi4 <3,u,1,2>, RHS + 2756985771U, // <u,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3> + 2690262970U, // <u,1,2,6>: Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7> + 1590711938U, // <u,1,2,7>: Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2> + 835584U, // <u,1,2,u>: Cost 0 copy LHS + 2282979337U, // <u,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0> + 1209237514U, // <u,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1> + 1209239702U, // <u,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2> + 2282979502U, // <u,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3> + 2282979341U, // <u,1,3,4>: Cost 3 vmrglw LHS, <0,0,1,4> + 1209237842U, // <u,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5> + 2282979505U, // <u,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6> + 2287625423U, // <u,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7> + 1209237521U, // <u,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u> + 1635101613U, // <u,1,4,0>: Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1> + 2289623050U, // <u,1,4,1>: Cost 3 vmrglw <1,2,u,4>, <0,0,1,1> + 2289625238U, // <u,1,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,1,2> + 2640579360U, // <u,1,4,3>: Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4> + 2622663990U, // <u,1,4,4>: Cost 3 vsldoi4 <0,u,1,4>, RHS + 1616522550U, // <u,1,4,5>: Cost 2 vsldoi8 <0,u,u,1>, RHS + 2664469398U, // <u,1,4,6>: Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1> + 2664470148U, // <u,1,4,7>: Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4> + 1616522793U, // <u,1,4,u>: Cost 2 vsldoi8 <0,u,u,1>, RHS + 1548927638U, // <u,1,5,0>: Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5> + 1192444724U, // <u,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1> + 1192444822U, // <u,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0> + 2622670998U, // <u,1,5,3>: Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2> + 1548930358U, // <u,1,5,4>: Cost 2 vsldoi4 <0,u,1,5>, RHS + 1210728786U, // <u,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5> + 2714153058U, // <u,1,5,6>: Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0> + 2670449658U, // <u,1,5,7>: Cost 3 vsldoi4 <u,u,1,5>, <7,0,1,2> + 1548932910U, // <u,1,5,u>: Cost 2 vsldoi4 <0,u,1,5>, LHS + 2622677655U, // <u,1,6,0>: Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6> + 2756986063U, // <u,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7> + 2302912662U, // <u,1,6,2>: Cost 3 vmrglw <3,4,u,6>, <3,0,1,2> + 3696421014U, // <u,1,6,3>: Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2> + 2622680374U, // <u,1,6,4>: Cost 3 vsldoi4 <0,u,1,6>, RHS + 2756986099U, // <u,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7> + 2714153784U, // <u,1,6,6>: Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6> + 1651692438U, // <u,1,6,7>: Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1> + 1652356071U, // <u,1,6,u>: Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1> + 2628657254U, // <u,1,7,0>: Cost 3 vsldoi4 <1,u,1,7>, LHS + 1235812362U, // <u,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1> + 1235814550U, // <u,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2> + 2309554350U, // <u,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3> + 2628660534U, // <u,1,7,4>: Cost 3 vsldoi4 <1,u,1,7>, RHS + 1235812690U, // <u,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5> + 2309554353U, // <u,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6> + 2309554678U, // <u,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7> + 1235812369U, // <u,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u> + 1548952217U, // <u,1,u,0>: Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u> + 269271142U, // <u,1,u,1>: Cost 1 vspltisw1 LHS + 1209280662U, // <u,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2> + 835584U, // <u,1,u,3>: Cost 0 copy LHS + 1548954934U, // <u,1,u,4>: Cost 2 vsldoi4 <0,u,1,u>, RHS + 1209278802U, // <u,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5> + 2283020465U, // <u,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6> + 1590761096U, // <u,1,u,7>: Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u> + 835584U, // <u,1,u,u>: Cost 0 copy LHS + 2702876672U, // <u,2,0,0>: Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0> + 1629134950U, // <u,2,0,1>: Cost 2 vsldoi8 <3,0,u,2>, LHS + 2289591912U, // <u,2,0,2>: Cost 3 vmrglw <1,2,u,0>, <2,2,2,2> + 1215848550U, // <u,2,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS + 2702877010U, // <u,2,0,4>: Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5> + 2289222708U, // <u,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5> + 2779178473U, // <u,2,0,6>: Cost 3 vsldoi12 RHS, <2,0,6,1> + 2726249024U, // <u,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0> + 1215848555U, // <u,2,0,u>: Cost 2 vmrglw <1,2,u,0>, LHS + 2690933539U, // <u,2,1,0>: Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2> + 2628683124U, // <u,2,1,1>: Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1> + 1189463656U, // <u,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2> + 1213866086U, // <u,2,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS + 2628685110U, // <u,2,1,4>: Cost 3 vsldoi4 <1,u,2,1>, RHS + 2263205736U, // <u,2,1,5>: Cost 3 vmrghw LHS, <2,5,3,6> + 1189463994U, // <u,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7> + 2263205866U, // <u,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1> + 1213866091U, // <u,2,1,u>: Cost 2 vmrglw <0,u,u,1>, LHS + 1556938854U, // <u,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS + 2697569869U, // <u,2,2,1>: Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2> + 336380006U, // <u,2,2,2>: Cost 1 vspltisw2 LHS + 1678599794U, // <u,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3> + 1556942134U, // <u,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS + 2295138061U, // <u,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5> + 2702878650U, // <u,2,2,6>: Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7> + 2300229831U, // <u,2,2,7>: Cost 3 vmrglw <3,0,u,2>, <u,6,2,7> + 336380006U, // <u,2,2,u>: Cost 1 vspltisw2 LHS + 475243165U, // <u,2,3,0>: Cost 1 vsldoi4 LHS, LHS + 1548985140U, // <u,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1> + 1209239144U, // <u,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2> + 135495782U, // <u,2,3,3>: Cost 1 vmrglw LHS, LHS + 475245878U, // <u,2,3,4>: Cost 1 vsldoi4 LHS, RHS + 1596764164U, // <u,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5> + 1596764666U, // <u,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3> + 1596765178U, // <u,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2> + 135495787U, // <u,2,3,u>: Cost 1 vmrglw LHS, LHS + 2708851630U, // <u,2,4,0>: Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2> + 2217362979U, // <u,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5> + 2289624680U, // <u,2,4,2>: Cost 3 vmrglw <1,2,u,4>, <2,2,2,2> + 1215881318U, // <u,2,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS + 2726767824U, // <u,2,4,4>: Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4> + 1629138230U, // <u,2,4,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS + 2779178801U, // <u,2,4,6>: Cost 3 vsldoi12 RHS, <2,4,6,5> + 2726251976U, // <u,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0> + 1215881323U, // <u,2,4,u>: Cost 2 vmrglw <1,2,u,4>, LHS + 2628714598U, // <u,2,5,0>: Cost 3 vsldoi4 <1,u,2,5>, LHS + 2628715896U, // <u,2,5,1>: Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5> + 1192445544U, // <u,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2> + 1213898854U, // <u,2,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS + 2628717878U, // <u,2,5,4>: Cost 3 vsldoi4 <1,u,2,5>, RHS + 2726768644U, // <u,2,5,5>: Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5> + 1192445882U, // <u,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7> + 2266187754U, // <u,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1> + 1213898859U, // <u,2,5,u>: Cost 2 vmrglw <0,u,u,5>, LHS + 2634694758U, // <u,2,6,0>: Cost 3 vsldoi4 <2,u,2,6>, LHS + 2721460657U, // <u,2,6,1>: Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2> + 2296940136U, // <u,2,6,2>: Cost 3 vmrglw <2,4,u,6>, <2,2,2,2> + 1678600122U, // <u,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7> + 2634698038U, // <u,2,6,4>: Cost 3 vsldoi4 <2,u,2,6>, RHS + 3370682125U, // <u,2,6,5>: Cost 4 vmrglw <2,4,u,6>, <2,4,2,5> + 1157056442U, // <u,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7> + 2725442455U, // <u,2,6,7>: Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2> + 1678600167U, // <u,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7> + 1653027897U, // <u,2,7,0>: Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2> + 2309554924U, // <u,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1> + 1235813992U, // <u,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2> + 162070630U, // <u,2,7,3>: Cost 1 vmrglw RHS, LHS + 2634706230U, // <u,2,7,4>: Cost 3 vsldoi4 <2,u,2,7>, RHS + 2309555252U, // <u,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5> + 2309555901U, // <u,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6> + 2309555416U, // <u,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7> + 162070635U, // <u,2,7,u>: Cost 1 vmrglw RHS, LHS + 475284130U, // <u,2,u,0>: Cost 1 vsldoi4 LHS, LHS + 1549026100U, // <u,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1> + 336380006U, // <u,2,u,2>: Cost 1 vspltisw2 LHS + 135536742U, // <u,2,u,3>: Cost 1 vmrglw LHS, LHS + 475286838U, // <u,2,u,4>: Cost 1 vsldoi4 LHS, RHS + 1629141146U, // <u,2,u,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS + 1194108858U, // <u,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7> + 1596806138U, // <u,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2> + 135536747U, // <u,2,u,u>: Cost 1 vmrglw LHS, LHS + 1611890688U, // <u,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0> + 538149020U, // <u,3,0,1>: Cost 1 vsldoi8 LHS, LHS + 2685632685U, // <u,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2> + 2685632764U, // <u,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0> + 1611891026U, // <u,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5> + 2733408722U, // <u,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7> + 2658612153U, // <u,3,0,6>: Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0> + 2289592250U, // <u,3,0,7>: Cost 3 vmrglw <1,2,u,0>, <2,6,3,7> + 538149533U, // <u,3,0,u>: Cost 1 vsldoi8 LHS, LHS + 1189464214U, // <u,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2> + 1611891508U, // <u,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1> + 1611891606U, // <u,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0> + 1189464476U, // <u,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3> + 1189464578U, // <u,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6> + 2690278511U, // <u,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1> + 2690278607U, // <u,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7> + 2287609786U, // <u,3,1,7>: Cost 3 vmrglw <0,u,u,1>, <2,6,3,7> + 1611892092U, // <u,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0> + 2685634042U, // <u,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,u,0> + 2685634079U, // <u,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1> + 1611892328U, // <u,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2> + 1611892390U, // <u,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1> + 2685634371U, // <u,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,u,5> + 2685634453U, // <u,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,u,6> + 1611892666U, // <u,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7> + 2300225466U, // <u,3,2,7>: Cost 3 vmrglw <3,0,u,2>, <2,6,3,7> + 1611892795U, // <u,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,0,1> + 1209238422U, // <u,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0> + 2282980247U, // <u,3,3,1>: Cost 3 vmrglw LHS, <1,2,3,1> + 1561004120U, // <u,3,3,2>: Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3> + 403488870U, // <u,3,3,3>: Cost 1 vspltisw3 LHS + 1209238426U, // <u,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4> + 2282980899U, // <u,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5> + 2282985598U, // <u,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6> + 1209239482U, // <u,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7> + 403488870U, // <u,3,3,u>: Cost 1 vspltisw3 LHS + 1555038310U, // <u,3,4,0>: Cost 2 vsldoi4 <1,u,3,4>, LHS + 1555039616U, // <u,3,4,1>: Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4> + 2628781672U, // <u,3,4,2>: Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2> + 2289624690U, // <u,3,4,3>: Cost 3 vmrglw <1,2,u,4>, <2,2,3,3> + 1555041590U, // <u,3,4,4>: Cost 2 vsldoi4 <1,u,3,4>, RHS + 538152246U, // <u,3,4,5>: Cost 1 vsldoi8 LHS, RHS + 2658644925U, // <u,3,4,6>: Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4> + 2289625018U, // <u,3,4,7>: Cost 3 vmrglw <1,2,u,4>, <2,6,3,7> + 538152489U, // <u,3,4,u>: Cost 1 vsldoi8 LHS, RHS + 1192446102U, // <u,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2> + 2733411983U, // <u,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1> + 2634762330U, // <u,3,5,2>: Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5> + 1192446364U, // <u,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3> + 1192446466U, // <u,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6> + 1659670532U, // <u,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5> + 1659670626U, // <u,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0> + 2287642554U, // <u,3,5,7>: Cost 3 vmrglw <0,u,u,5>, <2,6,3,7> + 1659670788U, // <u,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0> + 2634768486U, // <u,3,6,0>: Cost 3 vsldoi4 <2,u,3,6>, LHS + 2733412775U, // <u,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1> + 1648390659U, // <u,3,6,2>: Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3> + 2634770973U, // <u,3,6,3>: Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6> + 2634771766U, // <u,3,6,4>: Cost 3 vsldoi4 <2,u,3,6>, RHS + 2733413099U, // <u,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1> + 1659671352U, // <u,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6> + 1659671374U, // <u,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1> + 1652372457U, // <u,3,6,u>: Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3> + 1561034854U, // <u,3,7,0>: Cost 2 vsldoi4 <2,u,3,7>, LHS + 2634777396U, // <u,3,7,1>: Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1> + 1561036892U, // <u,3,7,2>: Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7> + 1235814002U, // <u,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3> + 1561038134U, // <u,3,7,4>: Cost 2 vsldoi4 <2,u,3,7>, RHS + 2309555747U, // <u,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5> + 2309556072U, // <u,3,7,6>: Cost 3 vmrglw RHS, <2,5,3,6> + 1235814330U, // <u,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7> + 1561040686U, // <u,3,7,u>: Cost 2 vsldoi4 <2,u,3,7>, LHS + 1611896531U, // <u,3,u,0>: Cost 2 vsldoi8 LHS, <u,0,1,2> + 538154798U, // <u,3,u,1>: Cost 1 vsldoi8 LHS, LHS + 1611896712U, // <u,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,3> + 403488870U, // <u,3,u,3>: Cost 1 vspltisw3 LHS + 1611896895U, // <u,3,u,4>: Cost 2 vsldoi8 LHS, <u,4,5,6> + 538155162U, // <u,3,u,5>: Cost 1 vsldoi8 LHS, RHS + 1611897040U, // <u,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7> + 1209280442U, // <u,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7> + 538155365U, // <u,3,u,u>: Cost 1 vsldoi8 LHS, LHS + 1165118354U, // <u,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1> + 1618534502U, // <u,4,0,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS + 2634795102U, // <u,4,0,2>: Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0> + 2686451968U, // <u,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4> + 2692276562U, // <u,4,0,4>: Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5> + 1705438098U, // <u,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1> + 2658685890U, // <u,4,0,6>: Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0> + 2256489928U, // <u,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0> + 1618535069U, // <u,4,0,u>: Cost 2 vsldoi8 <1,2,u,4>, LHS + 1189464978U, // <u,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1> + 2692277044U, // <u,4,1,1>: Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1> + 1618535367U, // <u,4,1,2>: Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4> + 2640775992U, // <u,4,1,3>: Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1> + 1189465296U, // <u,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4> + 115723574U, // <u,4,1,5>: Cost 1 vmrghw LHS, RHS + 2263207289U, // <u,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2> + 2664666780U, // <u,4,1,7>: Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1> + 115723817U, // <u,4,1,u>: Cost 1 vmrghw LHS, RHS + 2263919506U, // <u,4,2,0>: Cost 3 vmrghw <u,2,3,0>, <4,0,5,1> + 2222115812U, // <u,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2> + 2692277864U, // <u,4,2,2>: Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2> + 2692277926U, // <u,4,2,3>: Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1> + 2324114640U, // <u,4,2,4>: Cost 3 vmrglw <7,0,u,2>, <4,4,4,4> + 1190178102U, // <u,4,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS + 2692278202U, // <u,4,2,6>: Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7> + 2701568053U, // <u,4,2,7>: Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4> + 1190178345U, // <u,4,2,u>: Cost 2 vmrghw <u,2,3,0>, RHS + 2692278422U, // <u,4,3,0>: Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2> + 2282981552U, // <u,4,3,1>: Cost 3 vmrglw LHS, <3,0,4,1> + 2704222585U, // <u,4,3,2>: Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4> + 2692278684U, // <u,4,3,3>: Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3> + 1257016528U, // <u,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4> + 1209239246U, // <u,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5> + 2691910300U, // <u,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7> + 2664683166U, // <u,4,3,7>: Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3> + 1209239249U, // <u,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u> + 1573027942U, // <u,4,4,0>: Cost 2 vsldoi4 <4,u,4,4>, LHS + 2634826695U, // <u,4,4,1>: Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4> + 2634827874U, // <u,4,4,2>: Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4> + 2289629073U, // <u,4,4,3>: Cost 3 vmrglw <1,2,u,4>, <u,2,4,3> + 229035318U, // <u,4,4,4>: Cost 1 vspltisw0 RHS + 1618537782U, // <u,4,4,5>: Cost 2 vsldoi8 <1,2,u,4>, RHS + 2658718662U, // <u,4,4,6>: Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4> + 2289629401U, // <u,4,4,7>: Cost 3 vmrglw <1,2,u,4>, <u,6,4,7> + 229035318U, // <u,4,4,u>: Cost 1 vspltisw0 RHS + 1561092198U, // <u,4,5,0>: Cost 2 vsldoi4 <2,u,4,5>, LHS + 2628863370U, // <u,4,5,1>: Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5> + 1561094243U, // <u,4,5,2>: Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5> + 2634836118U, // <u,4,5,3>: Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2> + 1561095478U, // <u,4,5,4>: Cost 2 vsldoi4 <2,u,4,5>, RHS + 118705462U, // <u,4,5,5>: Cost 1 vmrghw RHS, RHS + 604859702U, // <u,4,5,6>: Cost 1 vsldoi12 LHS, RHS + 2658726906U, // <u,4,5,7>: Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2> + 604859720U, // <u,4,5,u>: Cost 1 vsldoi12 LHS, RHS + 2266631058U, // <u,4,6,0>: Cost 3 vmrghw <u,6,3,7>, <4,0,5,1> + 2302692152U, // <u,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1> + 2718822906U, // <u,4,6,2>: Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3> + 2722804309U, // <u,4,6,3>: Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4> + 2723467942U, // <u,4,6,4>: Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4> + 1192889654U, // <u,4,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS + 2718823224U, // <u,4,6,6>: Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6> + 2718823246U, // <u,4,6,7>: Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1> + 1192889897U, // <u,4,6,u>: Cost 2 vmrghw <u,6,3,7>, RHS + 2640822374U, // <u,4,7,0>: Cost 3 vsldoi4 <3,u,4,7>, LHS + 2640823194U, // <u,4,7,1>: Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4> + 2728113373U, // <u,4,7,2>: Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4> + 2640825150U, // <u,4,7,3>: Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7> + 1235815632U, // <u,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4> + 1235814094U, // <u,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5> + 2730767905U, // <u,4,7,6>: Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4> + 2309556892U, // <u,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7> + 1235814097U, // <u,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u> + 1561116774U, // <u,4,u,0>: Cost 2 vsldoi4 <2,u,4,u>, LHS + 1618540334U, // <u,4,u,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS + 1561118822U, // <u,4,u,2>: Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u> + 2692282300U, // <u,4,u,3>: Cost 3 vsldoi8 <1,2,u,4>, <u,3,0,1> + 229035318U, // <u,4,u,4>: Cost 1 vspltisw0 RHS + 120368438U, // <u,4,u,5>: Cost 1 vmrghw LHS, RHS + 604859945U, // <u,4,u,6>: Cost 1 vsldoi12 LHS, RHS + 2309565084U, // <u,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7> + 604859963U, // <u,4,u,u>: Cost 1 vsldoi12 LHS, RHS + 2690293760U, // <u,5,0,0>: Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0> + 1616552038U, // <u,5,0,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS + 2640840434U, // <u,5,0,2>: Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5> + 2640841536U, // <u,5,0,3>: Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0> + 1613381970U, // <u,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5> + 2316135642U, // <u,5,0,5>: Cost 3 vmrglw <5,6,u,0>, <4,4,5,5> + 2289592834U, // <u,5,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,5,6> + 2664732324U, // <u,5,0,7>: Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0> + 1616552661U, // <u,5,0,u>: Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5> + 1573077094U, // <u,5,1,0>: Cost 2 vsldoi4 <4,u,5,1>, LHS + 1237536282U, // <u,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1> + 2690294678U, // <u,5,1,2>: Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0> + 2646821014U, // <u,5,1,3>: Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2> + 1573080602U, // <u,5,1,4>: Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1> + 1189466116U, // <u,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5> + 1189466210U, // <u,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0> + 2646823930U, // <u,5,1,7>: Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2> + 1573082926U, // <u,5,1,u>: Cost 2 vsldoi4 <4,u,5,1>, LHS + 2640855142U, // <u,5,2,0>: Cost 3 vsldoi4 <3,u,5,2>, LHS + 2697594448U, // <u,5,2,1>: Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5> + 2690295400U, // <u,5,2,2>: Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2> + 1625179890U, // <u,5,2,3>: Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5> + 2699585347U, // <u,5,2,4>: Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5> + 2781171471U, // <u,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3> + 2690295738U, // <u,5,2,6>: Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7> + 3775318070U, // <u,5,2,7>: Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5> + 1628498055U, // <u,5,2,u>: Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5> + 2287627234U, // <u,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0> + 1257016210U, // <u,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1> + 2646836942U, // <u,5,3,2>: Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5> + 2287625131U, // <u,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3> + 2287627238U, // <u,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4> + 1257016538U, // <u,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5> + 1209240066U, // <u,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6> + 2287625459U, // <u,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7> + 1209240068U, // <u,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u> + 2640871526U, // <u,5,4,0>: Cost 3 vsldoi4 <3,u,5,4>, LHS + 2316168082U, // <u,5,4,1>: Cost 3 vmrglw <5,6,u,4>, <4,0,5,1> + 2640873202U, // <u,5,4,2>: Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5> + 2640874308U, // <u,5,4,3>: Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4> + 1637788917U, // <u,5,4,4>: Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5> + 1616555318U, // <u,5,4,5>: Cost 2 vsldoi8 <0,u,u,5>, RHS + 2287638591U, // <u,5,4,6>: Cost 3 vmrglw <0,u,u,4>, <u,4,5,6> + 2664765096U, // <u,5,4,7>: Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4> + 1616555561U, // <u,5,4,u>: Cost 2 vsldoi8 <0,u,u,5>, RHS + 1573109862U, // <u,5,5,0>: Cost 2 vsldoi4 <4,u,5,5>, LHS + 2646852404U, // <u,5,5,1>: Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1> + 2646853224U, // <u,5,5,2>: Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2> + 2287646618U, // <u,5,5,3>: Cost 3 vmrglw <0,u,u,5>, <u,2,5,3> + 1573113374U, // <u,5,5,4>: Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5> + 296144182U, // <u,5,5,5>: Cost 1 vspltisw1 RHS + 1192448098U, // <u,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0> + 2287646946U, // <u,5,5,7>: Cost 3 vmrglw <0,u,u,5>, <u,6,5,7> + 296144182U, // <u,5,5,u>: Cost 1 vspltisw1 RHS + 1567146086U, // <u,5,6,0>: Cost 2 vsldoi4 <3,u,5,6>, LHS + 2628945300U, // <u,5,6,1>: Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6> + 2634917997U, // <u,5,6,2>: Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6> + 1567148870U, // <u,5,6,3>: Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6> + 1567149366U, // <u,5,6,4>: Cost 2 vsldoi4 <3,u,5,6>, RHS + 2781171799U, // <u,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7> + 1228950018U, // <u,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6> + 27705344U, // <u,5,6,7>: Cost 0 copy RHS + 27705344U, // <u,5,6,u>: Cost 0 copy RHS + 2628952166U, // <u,5,7,0>: Cost 3 vsldoi4 <1,u,5,7>, LHS + 1235815314U, // <u,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1> + 2309556734U, // <u,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2> + 2309555115U, // <u,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3> + 2628955446U, // <u,5,7,4>: Cost 3 vsldoi4 <1,u,5,7>, RHS + 1235815642U, // <u,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5> + 1235814914U, // <u,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6> + 2309555443U, // <u,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7> + 1235814916U, // <u,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u> + 1567162470U, // <u,5,u,0>: Cost 2 vsldoi4 <3,u,5,u>, LHS + 1616557870U, // <u,5,u,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS + 2690299781U, // <u,5,u,2>: Cost 3 vsldoi8 <0,u,u,5>, <u,2,3,0> + 1567165256U, // <u,5,u,3>: Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u> + 1567165750U, // <u,5,u,4>: Cost 2 vsldoi4 <3,u,5,u>, RHS + 296144182U, // <u,5,u,5>: Cost 1 vspltisw1 RHS + 1209281026U, // <u,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6> + 27705344U, // <u,5,u,7>: Cost 0 copy RHS + 27705344U, // <u,5,u,u>: Cost 0 copy RHS + 2705563648U, // <u,6,0,0>: Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0> + 1631821926U, // <u,6,0,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS + 2262462970U, // <u,6,0,2>: Cost 3 vmrghw <u,0,1,2>, <6,2,7,3> + 2646886941U, // <u,6,0,3>: Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6> + 2705563986U, // <u,6,0,4>: Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5> + 2316062652U, // <u,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5> + 2316137272U, // <u,6,0,6>: Cost 3 vmrglw <5,6,u,0>, <6,6,6,6> + 1215851830U, // <u,6,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS + 1215851831U, // <u,6,0,u>: Cost 2 vmrglw <1,2,u,0>, RHS + 2634948710U, // <u,6,1,0>: Cost 3 vsldoi4 <2,u,6,1>, LHS + 2705564468U, // <u,6,1,1>: Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1> + 1189466618U, // <u,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3> + 2263208498U, // <u,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5> + 2693620843U, // <u,6,1,4>: Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6> + 2652868860U, // <u,6,1,5>: Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1> + 1189466936U, // <u,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6> + 1213869366U, // <u,6,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS + 1213869367U, // <u,6,1,u>: Cost 2 vmrglw <0,u,u,1>, RHS + 2658844774U, // <u,6,2,0>: Cost 3 vsldoi4 <6,u,6,2>, LHS + 3771344465U, // <u,6,2,1>: Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6> + 1178554874U, // <u,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3> + 2698929907U, // <u,6,2,3>: Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6> + 2699593540U, // <u,6,2,4>: Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6> + 2700257173U, // <u,6,2,5>: Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6> + 2705565626U, // <u,6,2,6>: Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7> + 1226485046U, // <u,6,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS + 1226485047U, // <u,6,2,u>: Cost 2 vmrglw <3,0,u,2>, RHS + 2705565846U, // <u,6,3,0>: Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2> + 2330756585U, // <u,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1> + 2330756829U, // <u,6,3,2>: Cost 3 vmrglw LHS, <2,3,6,2> + 2282981734U, // <u,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3> + 1631824413U, // <u,6,3,4>: Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6> + 2652885246U, // <u,6,3,5>: Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3> + 1257018168U, // <u,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6> + 135499062U, // <u,6,3,7>: Cost 1 vmrglw LHS, RHS + 135499063U, // <u,6,3,u>: Cost 1 vmrglw LHS, RHS + 2646917222U, // <u,6,4,0>: Cost 3 vsldoi4 <4,u,6,4>, LHS + 2217365931U, // <u,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5> + 2790167156U, // <u,6,4,2>: Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u> + 2646919709U, // <u,6,4,3>: Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6> + 2711538934U, // <u,6,4,4>: Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6> + 1631825206U, // <u,6,4,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS + 2316170040U, // <u,6,4,6>: Cost 3 vmrglw <5,6,u,4>, <6,6,6,6> + 1215884598U, // <u,6,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS + 1215884599U, // <u,6,4,u>: Cost 2 vmrglw <1,2,u,4>, RHS + 2634981478U, // <u,6,5,0>: Cost 3 vsldoi4 <2,u,6,5>, LHS + 2266190247U, // <u,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1> + 1192448506U, // <u,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3> + 2266190386U, // <u,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5> + 2634984758U, // <u,6,5,4>: Cost 3 vsldoi4 <2,u,6,5>, RHS + 2652901632U, // <u,6,5,5>: Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5> + 1192448824U, // <u,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6> + 1213902134U, // <u,6,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS + 1213902135U, // <u,6,5,u>: Cost 2 vmrglw <0,u,u,5>, RHS + 1583808614U, // <u,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS + 2322010445U, // <u,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1> + 2718839290U, // <u,6,6,2>: Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3> + 2670823965U, // <u,6,6,3>: Cost 3 vsldoi4 <u,u,6,6>, <3,4,u,6> + 1583811894U, // <u,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS + 2724147961U, // <u,6,6,5>: Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6> + 363253046U, // <u,6,6,6>: Cost 1 vspltisw2 RHS + 1229172022U, // <u,6,6,7>: Cost 2 vmrglw <3,4,u,6>, RHS + 363253046U, // <u,6,6,u>: Cost 1 vspltisw2 RHS + 499458150U, // <u,6,7,0>: Cost 1 vsldoi4 RHS, LHS + 1573200692U, // <u,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1> + 1573201512U, // <u,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2> + 1573202070U, // <u,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2> + 499461673U, // <u,6,7,4>: Cost 1 vsldoi4 RHS, RHS + 1573203972U, // <u,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5> + 1235817272U, // <u,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6> + 162073910U, // <u,6,7,7>: Cost 1 vmrglw RHS, RHS + 162073911U, // <u,6,7,u>: Cost 1 vmrglw RHS, RHS + 499466342U, // <u,6,u,0>: Cost 1 vsldoi4 RHS, LHS + 1631827758U, // <u,6,u,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS + 1573209704U, // <u,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2> + 1573210262U, // <u,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2> + 499469866U, // <u,6,u,4>: Cost 1 vsldoi4 RHS, RHS + 1631828122U, // <u,6,u,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS + 363253046U, // <u,6,u,6>: Cost 1 vspltisw2 RHS + 135540022U, // <u,6,u,7>: Cost 1 vmrglw LHS, RHS + 135540023U, // <u,6,u,u>: Cost 1 vmrglw LHS, RHS + 1638465536U, // <u,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0> + 564723814U, // <u,7,0,1>: Cost 1 vsldoi8 RHS, LHS + 2712207533U, // <u,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2> + 2712207612U, // <u,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0> + 1638465874U, // <u,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5> + 1579192580U, // <u,7,0,5>: Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0> + 2712207862U, // <u,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7> + 2316137282U, // <u,7,0,7>: Cost 3 vmrglw <5,6,u,0>, <6,6,7,7> + 564724381U, // <u,7,0,u>: Cost 1 vsldoi8 RHS, LHS + 1189467130U, // <u,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2> + 1638466356U, // <u,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1> + 1638466454U, // <u,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0> + 2311500282U, // <u,7,1,3>: Cost 3 vmrglw <4,u,u,1>, <6,2,7,3> + 1189467494U, // <u,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6> + 2712208495U, // <u,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1> + 2694956302U, // <u,7,1,6>: Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7> + 1189467756U, // <u,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7> + 1638466940U, // <u,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0> + 2712208829U, // <u,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2> + 2712208927U, // <u,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1> + 1638467176U, // <u,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2> + 1638467238U, // <u,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1> + 2712209165U, // <u,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5> + 2712209256U, // <u,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6> + 1627187175U, // <u,7,2,6>: Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7> + 2324116290U, // <u,7,2,7>: Cost 3 vmrglw <7,0,u,2>, <6,6,7,7> + 1628514441U, // <u,7,2,u>: Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7> + 1638467734U, // <u,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2> + 2712209638U, // <u,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1> + 2700929387U, // <u,7,3,2>: Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u> + 1638467996U, // <u,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3> + 1638468098U, // <u,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6> + 2712210002U, // <u,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5> + 1585189856U, // <u,7,3,6>: Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3> + 1257018178U, // <u,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7> + 1638468382U, // <u,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2> + 1638468498U, // <u,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1> + 2712210378U, // <u,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3> + 2712210485U, // <u,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2> + 2712210564U, // <u,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0> + 1638468816U, // <u,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4> + 564727112U, // <u,7,4,5>: Cost 1 vsldoi8 RHS, RHS + 2712210809U, // <u,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2> + 2712210888U, // <u,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,0> + 564727337U, // <u,7,4,u>: Cost 1 vsldoi8 RHS, RHS + 1192449018U, // <u,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2> + 2714201743U, // <u,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1> + 2712211198U, // <u,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4> + 2311533050U, // <u,7,5,3>: Cost 3 vmrglw <4,u,u,5>, <6,2,7,3> + 1192449382U, // <u,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6> + 1638469636U, // <u,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5> + 1638469730U, // <u,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0> + 1192449644U, // <u,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7> + 1638469892U, // <u,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0> + 2712211745U, // <u,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2> + 2712211879U, // <u,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1> + 1638470138U, // <u,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3> + 2712212018U, // <u,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5> + 2712212109U, // <u,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6> + 2712212203U, // <u,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1> + 1638470456U, // <u,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6> + 1638470478U, // <u,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1> + 1638470559U, // <u,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,0,1> + 1235816546U, // <u,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0> + 2309558371U, // <u,7,7,1>: Cost 3 vmrglw RHS, <5,6,7,1> + 2641045434U, // <u,7,7,2>: Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7> + 1235816954U, // <u,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3> + 1235816550U, // <u,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4> + 2309558375U, // <u,7,7,5>: Cost 3 vmrglw RHS, <5,6,7,5> + 1585222628U, // <u,7,7,6>: Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7> + 430361910U, // <u,7,7,7>: Cost 1 vspltisw3 RHS + 430361910U, // <u,7,7,u>: Cost 1 vspltisw3 RHS + 1638471379U, // <u,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2> + 564729646U, // <u,7,u,1>: Cost 1 vsldoi8 RHS, LHS + 1638471557U, // <u,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0> + 1638471612U, // <u,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1> + 1638471743U, // <u,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6> + 564730010U, // <u,7,u,5>: Cost 1 vsldoi8 RHS, RHS + 1638471888U, // <u,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7> + 430361910U, // <u,7,u,7>: Cost 1 vspltisw3 RHS + 564730213U, // <u,7,u,u>: Cost 1 vsldoi8 RHS, LHS + 202162278U, // <u,u,0,0>: Cost 1 vspltisw0 LHS + 538189985U, // <u,u,0,1>: Cost 1 vsldoi8 LHS, LHS + 2685673645U, // <u,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2> + 1215848604U, // <u,u,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS + 1611931986U, // <u,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5> + 1579266317U, // <u,u,0,5>: Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0> + 2289592861U, // <u,u,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,u,6> + 1215851848U, // <u,u,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS + 538190493U, // <u,u,0,u>: Cost 1 vsldoi8 LHS, LHS + 1549411025U, // <u,u,1,0>: Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1> + 115726126U, // <u,u,1,1>: Cost 1 vmrghw LHS, LHS + 604862254U, // <u,u,1,2>: Cost 1 vsldoi12 LHS, LHS + 1213866140U, // <u,u,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS + 1549413686U, // <u,u,1,4>: Cost 2 vsldoi4 <0,u,u,1>, RHS + 115726490U, // <u,u,1,5>: Cost 1 vmrghw LHS, RHS + 1585247207U, // <u,u,1,6>: Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1> + 1213869384U, // <u,u,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS + 604862308U, // <u,u,1,u>: Cost 1 vsldoi12 LHS, LHS + 1567334502U, // <u,u,2,0>: Cost 2 vsldoi4 <3,u,u,2>, LHS + 1190180654U, // <u,u,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS + 336380006U, // <u,u,2,2>: Cost 1 vspltisw2 LHS + 835584U, // <u,u,2,3>: Cost 0 copy LHS + 1567337782U, // <u,u,2,4>: Cost 2 vsldoi4 <3,u,u,2>, RHS + 1190181018U, // <u,u,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS + 1611933626U, // <u,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7> + 1226485064U, // <u,u,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS + 835584U, // <u,u,2,u>: Cost 0 copy LHS + 475685587U, // <u,u,3,0>: Cost 1 vsldoi4 LHS, LHS + 1209239278U, // <u,u,3,1>: Cost 2 vmrglw LHS, <2,3,u,1> + 1209239765U, // <u,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2> + 135495836U, // <u,u,3,3>: Cost 1 vmrglw LHS, LHS + 475688246U, // <u,u,3,4>: Cost 1 vsldoi4 LHS, RHS + 1209239282U, // <u,u,3,5>: Cost 2 vmrglw LHS, <2,3,u,5> + 1209240093U, // <u,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6> + 135499080U, // <u,u,3,7>: Cost 1 vmrglw LHS, RHS + 135495841U, // <u,u,3,u>: Cost 1 vmrglw LHS, LHS + 1555406950U, // <u,u,4,0>: Cost 2 vsldoi4 <1,u,u,4>, LHS + 1555408301U, // <u,u,4,1>: Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4> + 2289625301U, // <u,u,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,u,2> + 1215881372U, // <u,u,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS + 229035318U, // <u,u,4,4>: Cost 1 vspltisw0 RHS + 538193206U, // <u,u,4,5>: Cost 1 vsldoi8 LHS, RHS + 2289625629U, // <u,u,4,6>: Cost 3 vmrglw <1,2,u,4>, <3,4,u,6> + 1215884616U, // <u,u,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS + 538193449U, // <u,u,4,u>: Cost 1 vsldoi8 LHS, RHS + 1549443797U, // <u,u,5,0>: Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5> + 118708014U, // <u,u,5,1>: Cost 1 vmrghw RHS, LHS + 1561389191U, // <u,u,5,2>: Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5> + 1213898908U, // <u,u,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS + 1549446454U, // <u,u,5,4>: Cost 2 vsldoi4 <0,u,u,5>, RHS + 118708378U, // <u,u,5,5>: Cost 1 vmrghw RHS, RHS + 604862618U, // <u,u,5,6>: Cost 1 vsldoi12 LHS, RHS + 1213902152U, // <u,u,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS + 604862636U, // <u,u,5,u>: Cost 1 vsldoi12 LHS, RHS + 1567367270U, // <u,u,6,0>: Cost 2 vsldoi4 <3,u,u,6>, LHS + 1192892206U, // <u,u,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS + 1638478330U, // <u,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3> + 1679046864U, // <u,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7> + 1567370550U, // <u,u,6,4>: Cost 2 vsldoi4 <3,u,u,6>, RHS + 1192892570U, // <u,u,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS + 363253046U, // <u,u,6,6>: Cost 1 vspltisw2 RHS + 27705344U, // <u,u,6,7>: Cost 0 copy RHS + 27705344U, // <u,u,6,u>: Cost 0 copy RHS + 499605606U, // <u,u,7,0>: Cost 1 vsldoi4 RHS, LHS + 1235812425U, // <u,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1> + 1561405577U, // <u,u,7,2>: Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7> + 162070684U, // <u,u,7,3>: Cost 1 vmrglw RHS, LHS + 499609147U, // <u,u,7,4>: Cost 1 vsldoi4 RHS, RHS + 1235812753U, // <u,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5> + 1235814941U, // <u,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6> + 162073928U, // <u,u,7,7>: Cost 1 vmrglw RHS, RHS + 162070689U, // <u,u,7,u>: Cost 1 vmrglw RHS, LHS + 475726552U, // <u,u,u,0>: Cost 1 vsldoi4 LHS, LHS + 538195758U, // <u,u,u,1>: Cost 1 vsldoi8 LHS, LHS + 604862821U, // <u,u,u,2>: Cost 1 vsldoi12 LHS, LHS + 835584U, // <u,u,u,3>: Cost 0 copy LHS + 475729206U, // <u,u,u,4>: Cost 1 vsldoi4 LHS, RHS + 538196122U, // <u,u,u,5>: Cost 1 vsldoi8 LHS, RHS + 604862861U, // <u,u,u,6>: Cost 1 vsldoi12 LHS, RHS + 27705344U, // <u,u,u,7>: Cost 0 copy RHS + 835584U, // <u,u,u,u>: Cost 0 copy LHS + 0 +}; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp new file mode 100644 index 0000000..2e90b7a --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -0,0 +1,645 @@ +//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reginfo" +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCMachineFunctionInfo.h" +#include "PPCRegisterInfo.h" +#include "PPCFrameLowering.h" +#include "PPCSubtarget.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include <cstdlib> + +#define GET_REGINFO_TARGET_DESC +#include "PPCGenRegisterInfo.inc" + +// FIXME (64-bit): Eventually enable by default. +namespace llvm { +cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger", + cl::init(false), + cl::desc("Enable PPC32 register scavenger"), + cl::Hidden); +cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger", + cl::init(false), + cl::desc("Enable PPC64 register scavenger"), + cl::Hidden); +} + +using namespace llvm; + +// FIXME (64-bit): Should be inlined. +bool +PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { + return ((EnablePPC32RS && !Subtarget.isPPC64()) || + (EnablePPC64RS && Subtarget.isPPC64())); +} + +PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, + const TargetInstrInfo &tii) + : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, + ST.isPPC64() ? 0 : 1, + ST.isPPC64() ? 0 : 1), + Subtarget(ST), TII(tii) { + ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; + ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; + ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; + ImmToIdxMap[PPC::LWZ] = PPC::LWZX; ImmToIdxMap[PPC::LWA] = PPC::LWAX; + ImmToIdxMap[PPC::LFS] = PPC::LFSX; ImmToIdxMap[PPC::LFD] = PPC::LFDX; + ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX; + ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX; + ImmToIdxMap[PPC::ADDI] = PPC::ADD4; + + // 64-bit + ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8; + ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8; + ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8; + ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX; + ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32; +} + +/// getPointerRegClass - Return the register class to use to hold pointers. +/// This is used for addressing modes. +const TargetRegisterClass * +PPCRegisterInfo::getPointerRegClass(unsigned Kind) const { + if (Subtarget.isPPC64()) + return &PPC::G8RCRegClass; + return &PPC::GPRCRegClass; +} + +const unsigned* +PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + // 32-bit Darwin calling convention. + static const unsigned Darwin32_CalleeSavedRegs[] = { + PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31, + + PPC::F14, PPC::F15, PPC::F16, PPC::F17, + PPC::F18, PPC::F19, PPC::F20, PPC::F21, + PPC::F22, PPC::F23, PPC::F24, PPC::F25, + PPC::F26, PPC::F27, PPC::F28, PPC::F29, + PPC::F30, PPC::F31, + + PPC::CR2, PPC::CR3, PPC::CR4, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31, + + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + + PPC::LR, 0 + }; + + // 32-bit SVR4 calling convention. + static const unsigned SVR4_CalleeSavedRegs[] = { + PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31, + + PPC::F14, PPC::F15, PPC::F16, PPC::F17, + PPC::F18, PPC::F19, PPC::F20, PPC::F21, + PPC::F22, PPC::F23, PPC::F24, PPC::F25, + PPC::F26, PPC::F27, PPC::F28, PPC::F29, + PPC::F30, PPC::F31, + + PPC::CR2, PPC::CR3, PPC::CR4, + + PPC::VRSAVE, + + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31, + + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + + 0 + }; + // 64-bit Darwin calling convention. + static const unsigned Darwin64_CalleeSavedRegs[] = { + PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31, + + PPC::F14, PPC::F15, PPC::F16, PPC::F17, + PPC::F18, PPC::F19, PPC::F20, PPC::F21, + PPC::F22, PPC::F23, PPC::F24, PPC::F25, + PPC::F26, PPC::F27, PPC::F28, PPC::F29, + PPC::F30, PPC::F31, + + PPC::CR2, PPC::CR3, PPC::CR4, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31, + + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + + PPC::LR8, 0 + }; + + // 64-bit SVR4 calling convention. + static const unsigned SVR4_64_CalleeSavedRegs[] = { + PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31, + + PPC::F14, PPC::F15, PPC::F16, PPC::F17, + PPC::F18, PPC::F19, PPC::F20, PPC::F21, + PPC::F22, PPC::F23, PPC::F24, PPC::F25, + PPC::F26, PPC::F27, PPC::F28, PPC::F29, + PPC::F30, PPC::F31, + + PPC::CR2, PPC::CR3, PPC::CR4, + + PPC::VRSAVE, + + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31, + + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + + 0 + }; + + if (Subtarget.isDarwinABI()) + return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs : + Darwin32_CalleeSavedRegs; + + return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs; +} + +BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + const PPCFrameLowering *PPCFI = + static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); + + Reserved.set(PPC::R0); + Reserved.set(PPC::R1); + Reserved.set(PPC::LR); + Reserved.set(PPC::LR8); + Reserved.set(PPC::RM); + + // The SVR4 ABI reserves r2 and r13 + if (Subtarget.isSVR4ABI()) { + Reserved.set(PPC::R2); // System-reserved register + Reserved.set(PPC::R13); // Small Data Area pointer register + } + // Reserve R2 on Darwin to hack around the problem of save/restore of CR + // when the stack frame is too big to address directly; we need two regs. + // This is a hack. + if (Subtarget.isDarwinABI()) { + Reserved.set(PPC::R2); + } + + // On PPC64, r13 is the thread pointer. Never allocate this register. + // Note that this is over conservative, as it also prevents allocation of R31 + // when the FP is not needed. + if (Subtarget.isPPC64()) { + Reserved.set(PPC::R13); + Reserved.set(PPC::R31); + + if (!requiresRegisterScavenging(MF)) + Reserved.set(PPC::R0); // FIXME (64-bit): Remove + + Reserved.set(PPC::X0); + Reserved.set(PPC::X1); + Reserved.set(PPC::X13); + Reserved.set(PPC::X31); + + // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. + if (Subtarget.isSVR4ABI()) { + Reserved.set(PPC::X2); + } + // Reserve R2 on Darwin to hack around the problem of save/restore of CR + // when the stack frame is too big to address directly; we need two regs. + // This is a hack. + if (Subtarget.isDarwinABI()) { + Reserved.set(PPC::X2); + } + } + + if (PPCFI->needsFP(MF)) + Reserved.set(PPC::R31); + + return Reserved; +} + +//===----------------------------------------------------------------------===// +// Stack Frame Processing methods +//===----------------------------------------------------------------------===// + +void PPCRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) { + // Add (actually subtract) back the amount the callee popped on return. + if (int CalleeAmt = I->getOperand(1).getImm()) { + bool is64Bit = Subtarget.isPPC64(); + CalleeAmt *= -1; + unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; + unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; + MachineInstr *MI = I; + DebugLoc dl = MI->getDebugLoc(); + + if (isInt<16>(CalleeAmt)) { + BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg). + addImm(CalleeAmt); + } else { + MachineBasicBlock::iterator MBBI = I; + BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) + .addImm(CalleeAmt >> 16); + BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) + .addReg(TmpReg, RegState::Kill) + .addImm(CalleeAmt & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(ADDInstr)) + .addReg(StackReg) + .addReg(StackReg) + .addReg(TmpReg); + } + } + } + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + +/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered +/// register first and then a spilled callee-saved register if that fails. +static +unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, + const TargetRegisterClass *RC, int SPAdj) { + assert(RS && "Register scavenging must be on"); + unsigned Reg = RS->FindUnusedReg(RC); + // FIXME: move ARM callee-saved reg scan to target independent code, then + // search for already spilled CS register here. + if (Reg == 0) + Reg = RS->scavengeRegister(RC, II, SPAdj); + return Reg; +} + +/// lowerDynamicAlloc - Generate the code for allocating an object in the +/// current frame. The sequence of code with be in the general form +/// +/// addi R0, SP, \#frameSize ; get the address of the previous frame +/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size +/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation +/// +void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + // Get the basic block's function. + MachineFunction &MF = *MBB.getParent(); + // Get the frame info. + MachineFrameInfo *MFI = MF.getFrameInfo(); + // Determine whether 64-bit pointers are used. + bool LP64 = Subtarget.isPPC64(); + DebugLoc dl = MI.getDebugLoc(); + + // Get the maximum call stack size. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + // Get the total frame size. + unsigned FrameSize = MFI->getStackSize(); + + // Get stack alignments. + unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); + if (MaxAlign > TargetAlign) + report_fatal_error("Dynamic alloca with large aligns not supported"); + + // Determine the previous frame's address. If FrameSize can't be + // represented as 16 bits or we need special alignment, then we load the + // previous frame's address from 0(SP). Why not do an addis of the hi? + // Because R0 is our only safe tmp register and addi/addis treat R0 as zero. + // Constructing the constant and adding would take 3 instructions. + // Fortunately, a frame greater than 32K is rare. + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *RC = LP64 ? G8RC : GPRC; + + // FIXME (64-bit): Use "findScratchRegister" + unsigned Reg; + if (requiresRegisterScavenging(MF)) + Reg = findScratchRegister(II, RS, RC, SPAdj); + else + Reg = PPC::R0; + + if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { + BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) + .addReg(PPC::R31) + .addImm(FrameSize); + } else if (LP64) { + if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. + BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) + .addImm(0) + .addReg(PPC::X1); + else + BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0) + .addImm(0) + .addReg(PPC::X1); + } else { + BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg) + .addImm(0) + .addReg(PPC::R1); + } + + // Grow the stack and update the stack pointer link, then determine the + // address of new allocated space. + if (LP64) { + if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. + BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) + .addReg(Reg, RegState::Kill) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); + else + BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) + .addReg(PPC::X0, RegState::Kill) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); + + if (!MI.getOperand(1).isKill()) + BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) + .addReg(PPC::X1) + .addImm(maxCallFrameSize); + else + // Implicitly kill the register. + BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) + .addReg(PPC::X1) + .addImm(maxCallFrameSize) + .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill); + } else { + BuildMI(MBB, II, dl, TII.get(PPC::STWUX)) + .addReg(Reg, RegState::Kill) + .addReg(PPC::R1) + .addReg(MI.getOperand(1).getReg()); + + if (!MI.getOperand(1).isKill()) + BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg()) + .addReg(PPC::R1) + .addImm(maxCallFrameSize); + else + // Implicitly kill the register. + BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg()) + .addReg(PPC::R1) + .addImm(maxCallFrameSize) + .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill); + } + + // Discard the DYNALLOC instruction. + MBB.erase(II); +} + +/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of +/// reserving a whole register (R0), we scrounge for one here. This generates +/// code like this: +/// +/// mfcr rA ; Move the conditional register into GPR rA. +/// rlwinm rA, rA, SB, 0, 31 ; Shift the bits left so they are in CR0's slot. +/// stw rA, FI ; Store rA to the frame. +/// +void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex, int SPAdj, + RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>, <FI> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; + unsigned Reg = findScratchRegister(II, RS, RC, SPAdj); + unsigned SrcReg = MI.getOperand(0).getReg(); + bool LP64 = Subtarget.isPPC64(); + + // We need to store the CR in the low 4-bits of the saved value. First, issue + // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg. + BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + + // If the saved register wasn't CR0, shift the bits left so that they are in + // CR0's slot. + if (SrcReg != PPC::CR0) + // rlwinm rA, rA, ShiftBits, 0, 31. + BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) + .addReg(Reg, RegState::Kill) + .addImm(getPPCRegisterNumbering(SrcReg) * 4) + .addImm(0) + .addImm(31); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW)) + .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())), + FrameIndex); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void +PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + + // Get the instruction. + MachineInstr &MI = *II; + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + // Get the basic block's function. + MachineFunction &MF = *MBB.getParent(); + // Get the frame info. + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + DebugLoc dl = MI.getDebugLoc(); + + // Find out which operand is the frame index. + unsigned FIOperandNo = 0; + while (!MI.getOperand(FIOperandNo).isFI()) { + ++FIOperandNo; + assert(FIOperandNo != MI.getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + // Take into account whether it's an add or mem instruction + unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2; + if (MI.isInlineAsm()) + OffsetOperandNo = FIOperandNo-1; + + // Get the frame index. + int FrameIndex = MI.getOperand(FIOperandNo).getIndex(); + + // Get the frame pointer save index. Users of this index are primarily + // DYNALLOC instructions. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + int FPSI = FI->getFramePointerSaveIndex(); + // Get the instruction opcode. + unsigned OpC = MI.getOpcode(); + + // Special case for dynamic alloca. + if (FPSI && FrameIndex == FPSI && + (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { + lowerDynamicAlloc(II, SPAdj, RS); + return; + } + + // Special case for pseudo-op SPILL_CR. + if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default. + if (OpC == PPC::SPILL_CR) { + lowerCRSpilling(II, FrameIndex, SPAdj, RS); + return; + } + + // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). + MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ? + PPC::R31 : PPC::R1, + false); + + // Figure out if the offset in the instruction is shifted right two bits. This + // is true for instructions like "STD", which the machine implicitly adds two + // low zeros to. + bool isIXAddr = false; + switch (OpC) { + case PPC::LWA: + case PPC::LD: + case PPC::STD: + case PPC::STD_32: + isIXAddr = true; + break; + } + + // Now add the frame object offset to the offset from r1. + int Offset = MFI->getObjectOffset(FrameIndex); + if (!isIXAddr) + Offset += MI.getOperand(OffsetOperandNo).getImm(); + else + Offset += MI.getOperand(OffsetOperandNo).getImm() << 2; + + // If we're not using a Frame Pointer that has been set to the value of the + // SP before having the stack size subtracted from it, then add the stack size + // to Offset to get the correct offset. + // Naked functions have stack size 0, although getStackSize may not reflect that + // because we didn't call all the pieces that compute it for naked functions. + if (!MF.getFunction()->hasFnAttr(Attribute::Naked)) + Offset += MFI->getStackSize(); + + // If we can, encode the offset directly into the instruction. If this is a + // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If + // this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits + // clear can be encoded. This is extremely uncommon, because normally you + // only "std" to a stack slot that is at least 4-byte aligned, but it can + // happen in invalid code. + if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) { + if (isIXAddr) + Offset >>= 2; // The actual encoded value has the low two bits zero. + MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); + return; + } + + // The offset doesn't fit into a single register, scavenge one to build the + // offset in. + // FIXME: figure out what SPAdj is doing here. + + // FIXME (64-bit): Use "findScratchRegister". + unsigned SReg; + if (requiresRegisterScavenging(MF)) + SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj); + else + SReg = PPC::R0; + + // Insert a set of rA with the full offset value before the ld, st, or add + BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg) + .addImm(Offset >> 16); + BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg) + .addReg(SReg, RegState::Kill) + .addImm(Offset); + + // Convert into indexed form of the instruction: + // + // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0 + // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 + unsigned OperandBase; + + if (OpC != TargetOpcode::INLINEASM) { + assert(ImmToIdxMap.count(OpC) && + "No indexed form of load or store available!"); + unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; + MI.setDesc(TII.get(NewOpcode)); + OperandBase = 1; + } else { + OperandBase = OffsetOperandNo; + } + + unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); + MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); + MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); +} + +unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!Subtarget.isPPC64()) + return TFI->hasFP(MF) ? PPC::R31 : PPC::R1; + else + return TFI->hasFP(MF) ? PPC::X31 : PPC::X1; +} + +unsigned PPCRegisterInfo::getEHExceptionRegister() const { + return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3; +} + +unsigned PPCRegisterInfo::getEHHandlerRegister() const { + return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h new file mode 100644 index 0000000..1cc7213 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -0,0 +1,70 @@ +//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC32_REGISTERINFO_H +#define POWERPC32_REGISTERINFO_H + +#include "PPC.h" +#include <map> + +#define GET_REGINFO_HEADER +#include "PPCGenRegisterInfo.inc" + +namespace llvm { +class PPCSubtarget; +class TargetInstrInfo; +class Type; + +class PPCRegisterInfo : public PPCGenRegisterInfo { + std::map<unsigned, unsigned> ImmToIdxMap; + const PPCSubtarget &Subtarget; + const TargetInstrInfo &TII; +public: + PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); + + /// getPointerRegClass - Return the register class to use to hold pointers. + /// This is used for addressing modes. + virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const; + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + /// requiresRegisterScavenging - We require a register scavenger. + /// FIXME (64-bit): Should be inlined. + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void lowerDynamicAlloc(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const; + void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, + int SPAdj, RegScavenger *RS) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + // Debug information queries. + unsigned getFrameRegister(const MachineFunction &MF) const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td new file mode 100644 index 0000000..1acdf4e --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -0,0 +1,326 @@ +//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +let Namespace = "PPC" in { +def sub_lt : SubRegIndex; +def sub_gt : SubRegIndex; +def sub_eq : SubRegIndex; +def sub_un : SubRegIndex; +def sub_32 : SubRegIndex; +} + + +class PPCReg<string n> : Register<n> { + let Namespace = "PPC"; +} + +// We identify all our registers with a 5-bit ID, for consistency's sake. + +// GPR - One of the 32 32-bit general-purpose registers +class GPR<bits<5> num, string n> : PPCReg<n> { + field bits<5> Num = num; +} + +// GP8 - One of the 32 64-bit general-purpose registers +class GP8<GPR SubReg, string n> : PPCReg<n> { + field bits<5> Num = SubReg.Num; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_32]; +} + +// SPR - One of the 32-bit special-purpose registers +class SPR<bits<10> num, string n> : PPCReg<n> { + field bits<10> Num = num; +} + +// FPR - One of the 32 64-bit floating-point registers +class FPR<bits<5> num, string n> : PPCReg<n> { + field bits<5> Num = num; +} + +// VR - One of the 32 128-bit vector registers +class VR<bits<5> num, string n> : PPCReg<n> { + field bits<5> Num = num; +} + +// CR - One of the 8 4-bit condition registers +class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { + field bits<3> Num = num; + let SubRegs = subregs; +} + +// CRBIT - One of the 32 1-bit condition register fields +class CRBIT<bits<5> num, string n> : PPCReg<n> { + field bits<5> Num = num; +} + + +// General-purpose registers +def R0 : GPR< 0, "r0">, DwarfRegNum<[-2, 0]>; +def R1 : GPR< 1, "r1">, DwarfRegNum<[-2, 1]>; +def R2 : GPR< 2, "r2">, DwarfRegNum<[-2, 2]>; +def R3 : GPR< 3, "r3">, DwarfRegNum<[-2, 3]>; +def R4 : GPR< 4, "r4">, DwarfRegNum<[-2, 4]>; +def R5 : GPR< 5, "r5">, DwarfRegNum<[-2, 5]>; +def R6 : GPR< 6, "r6">, DwarfRegNum<[-2, 6]>; +def R7 : GPR< 7, "r7">, DwarfRegNum<[-2, 7]>; +def R8 : GPR< 8, "r8">, DwarfRegNum<[-2, 8]>; +def R9 : GPR< 9, "r9">, DwarfRegNum<[-2, 9]>; +def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>; +def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>; +def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>; +def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>; +def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>; +def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>; +def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>; +def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>; +def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>; +def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>; +def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>; +def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>; +def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>; +def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>; +def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>; +def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>; +def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>; +def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>; +def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>; +def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>; +def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>; +def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>; + +// 64-bit General-purpose registers +def X0 : GP8< R0, "r0">, DwarfRegNum<[0, -2]>; +def X1 : GP8< R1, "r1">, DwarfRegNum<[1, -2]>; +def X2 : GP8< R2, "r2">, DwarfRegNum<[2, -2]>; +def X3 : GP8< R3, "r3">, DwarfRegNum<[3, -2]>; +def X4 : GP8< R4, "r4">, DwarfRegNum<[4, -2]>; +def X5 : GP8< R5, "r5">, DwarfRegNum<[5, -2]>; +def X6 : GP8< R6, "r6">, DwarfRegNum<[6, -2]>; +def X7 : GP8< R7, "r7">, DwarfRegNum<[7, -2]>; +def X8 : GP8< R8, "r8">, DwarfRegNum<[8, -2]>; +def X9 : GP8< R9, "r9">, DwarfRegNum<[9, -2]>; +def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>; +def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>; +def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>; +def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>; +def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>; +def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>; +def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>; +def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>; +def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>; +def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>; +def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>; +def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>; +def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>; +def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>; +def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>; +def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>; +def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>; +def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>; +def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>; +def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>; +def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>; +def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>; + +// Floating-point registers +def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>; +def F1 : FPR< 1, "f1">, DwarfRegNum<[33, 33]>; +def F2 : FPR< 2, "f2">, DwarfRegNum<[34, 34]>; +def F3 : FPR< 3, "f3">, DwarfRegNum<[35, 35]>; +def F4 : FPR< 4, "f4">, DwarfRegNum<[36, 36]>; +def F5 : FPR< 5, "f5">, DwarfRegNum<[37, 37]>; +def F6 : FPR< 6, "f6">, DwarfRegNum<[38, 38]>; +def F7 : FPR< 7, "f7">, DwarfRegNum<[39, 39]>; +def F8 : FPR< 8, "f8">, DwarfRegNum<[40, 40]>; +def F9 : FPR< 9, "f9">, DwarfRegNum<[41, 41]>; +def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>; +def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>; +def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>; +def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>; +def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>; +def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>; +def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>; +def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>; +def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>; +def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>; +def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>; +def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>; +def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>; +def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>; +def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>; +def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>; +def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>; +def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>; +def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>; +def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>; +def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>; +def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>; + +// Vector registers +def V0 : VR< 0, "v0">, DwarfRegNum<[77, 77]>; +def V1 : VR< 1, "v1">, DwarfRegNum<[78, 78]>; +def V2 : VR< 2, "v2">, DwarfRegNum<[79, 79]>; +def V3 : VR< 3, "v3">, DwarfRegNum<[80, 80]>; +def V4 : VR< 4, "v4">, DwarfRegNum<[81, 81]>; +def V5 : VR< 5, "v5">, DwarfRegNum<[82, 82]>; +def V6 : VR< 6, "v6">, DwarfRegNum<[83, 83]>; +def V7 : VR< 7, "v7">, DwarfRegNum<[84, 84]>; +def V8 : VR< 8, "v8">, DwarfRegNum<[85, 85]>; +def V9 : VR< 9, "v9">, DwarfRegNum<[86, 86]>; +def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>; +def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>; +def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>; +def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>; +def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>; +def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>; +def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>; +def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>; +def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>; +def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>; +def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>; +def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>; +def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>; +def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>; +def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>; +def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>; +def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>; +def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>; +def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>; +def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>; +def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>; +def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>; + +// Condition register bits +def CR0LT : CRBIT< 0, "0">; +def CR0GT : CRBIT< 1, "1">; +def CR0EQ : CRBIT< 2, "2">; +def CR0UN : CRBIT< 3, "3">; +def CR1LT : CRBIT< 4, "4">; +def CR1GT : CRBIT< 5, "5">; +def CR1EQ : CRBIT< 6, "6">; +def CR1UN : CRBIT< 7, "7">; +def CR2LT : CRBIT< 8, "8">; +def CR2GT : CRBIT< 9, "9">; +def CR2EQ : CRBIT<10, "10">; +def CR2UN : CRBIT<11, "11">; +def CR3LT : CRBIT<12, "12">; +def CR3GT : CRBIT<13, "13">; +def CR3EQ : CRBIT<14, "14">; +def CR3UN : CRBIT<15, "15">; +def CR4LT : CRBIT<16, "16">; +def CR4GT : CRBIT<17, "17">; +def CR4EQ : CRBIT<18, "18">; +def CR4UN : CRBIT<19, "19">; +def CR5LT : CRBIT<20, "20">; +def CR5GT : CRBIT<21, "21">; +def CR5EQ : CRBIT<22, "22">; +def CR5UN : CRBIT<23, "23">; +def CR6LT : CRBIT<24, "24">; +def CR6GT : CRBIT<25, "25">; +def CR6EQ : CRBIT<26, "26">; +def CR6UN : CRBIT<27, "27">; +def CR7LT : CRBIT<28, "28">; +def CR7GT : CRBIT<29, "29">; +def CR7EQ : CRBIT<30, "30">; +def CR7UN : CRBIT<31, "31">; + +// Condition registers +let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in { +def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68, 68]>; +def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69, 69]>; +def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70, 70]>; +def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71, 71]>; +def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72, 72]>; +def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73, 73]>; +def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>; +def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>; +} + +// Link register +def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>; +//let Aliases = [LR] in +def LR8 : SPR<8, "lr">, DwarfRegNum<[65, -2]>; + +// Count register +def CTR : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>; +def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>; + +// VRsave register +def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[109]>; + +// Carry bit. In the architecture this is really bit 0 of the XER register +// (which really is SPR register 1); this is the only bit interesting to a +// compiler. +def CARRY: SPR<1, "ca">; + +// FP rounding mode: bits 30 and 31 of the FP status and control register +// This is not allocated as a normal register; it appears only in +// Uses and Defs. The ABI says it needs to be preserved by a function, +// but this is not achieved by saving and restoring it as with +// most registers, it has to be done in code; to make this work all the +// return and call instructions are described as Uses of RM, so instructions +// that do nothing but change RM will not get deleted. +// Also, in the architecture it is not really a SPR; 512 is arbitrary. +def RM: SPR<512, "**ROUNDING MODE**">; + +/// Register classes +// Allocate volatiles first +// then nonvolatiles in reverse order since stmw/lmw save from rN to r31 +def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), + (sequence "R%u", 30, 13), + R31, R0, R1, LR)>; + +def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), + (sequence "X%u", 30, 14), + X31, X13, X0, X1, LR8)>; + +// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 +// ABI the size of the Floating-point register save area is determined by the +// allocated non-volatile register with the lowest register number, as FP +// register N is spilled to offset 8 * (32 - N) below the back chain word of the +// previous stack frame. By allocating non-volatiles in reverse order we make +// sure that the Floating-point register save area is always as small as +// possible because there aren't any unused spill slots. +def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13), + (sequence "F%u", 31, 14))>; +def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>; + +def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128, + (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11, + V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, + V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; + +def CRBITRC : RegisterClass<"PPC", [i32], 32, + (add CR0LT, CR0GT, CR0EQ, CR0UN, + CR1LT, CR1GT, CR1EQ, CR1UN, + CR2LT, CR2GT, CR2EQ, CR2UN, + CR3LT, CR3GT, CR3EQ, CR3UN, + CR4LT, CR4GT, CR4EQ, CR4UN, + CR5LT, CR5GT, CR5EQ, CR5UN, + CR6LT, CR6GT, CR6EQ, CR6UN, + CR7LT, CR7GT, CR7EQ, CR7UN)> +{ + let CopyCost = -1; +} + +def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6, + CR7, CR2, CR3, CR4)> { + let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)]; +} + +def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)>; +def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)>; +def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; +def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> { + let CopyCost = -1; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h new file mode 100644 index 0000000..a33e7e0 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h @@ -0,0 +1,56 @@ +//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PowerPC 32-bit target-specific relocation types. +// +//===----------------------------------------------------------------------===// + +#ifndef PPC32RELOCATIONS_H +#define PPC32RELOCATIONS_H + +#include "llvm/CodeGen/MachineRelocation.h" + +// Hack to rid us of a PPC pre-processor symbol which is erroneously +// defined in a PowerPC header file (bug in Linux/PPC) +#ifdef PPC +#undef PPC +#endif + +namespace llvm { + namespace PPC { + enum RelocationType { + // reloc_vanilla - A standard relocation, where the address of the + // relocated object completely overwrites the address of the relocation. + reloc_vanilla, + + // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions. + reloc_pcrel_bx, + + // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE, + // and other bcx instructions. + reloc_pcrel_bcx, + + // reloc_absolute_high - Absolute relocation, for the loadhi instruction + // (which is really addis). Add the high 16-bits of the specified global + // address into the low 16-bits of the instruction. + reloc_absolute_high, + + // reloc_absolute_low - Absolute relocation, for the la instruction (which + // is really an addi). Add the low 16-bits of the specified global + // address into the low 16-bits of the instruction. + reloc_absolute_low, + + // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store + // instruction which have two implicit zero bits. + reloc_absolute_low_ix + }; + } +} + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td new file mode 100644 index 0000000..9664f14 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td @@ -0,0 +1,505 @@ +//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Functional units across PowerPC chips sets +// +def BPU : FuncUnit; // Branch unit +def SLU : FuncUnit; // Store/load unit +def SRU : FuncUnit; // special register unit +def IU1 : FuncUnit; // integer unit 1 (simple) +def IU2 : FuncUnit; // integer unit 2 (complex) +def FPU1 : FuncUnit; // floating point unit 1 +def FPU2 : FuncUnit; // floating point unit 2 +def VPU : FuncUnit; // vector permutation unit +def VIU1 : FuncUnit; // vector integer unit 1 (simple) +def VIU2 : FuncUnit; // vector integer unit 2 (complex) +def VFPU : FuncUnit; // vector floating point unit + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for PowerPC +// +def IntGeneral : InstrItinClass; +def IntCompare : InstrItinClass; +def IntDivD : InstrItinClass; +def IntDivW : InstrItinClass; +def IntMFFS : InstrItinClass; +def IntMFVSCR : InstrItinClass; +def IntMTFSB0 : InstrItinClass; +def IntMTSRD : InstrItinClass; +def IntMulHD : InstrItinClass; +def IntMulHW : InstrItinClass; +def IntMulHWU : InstrItinClass; +def IntMulLI : InstrItinClass; +def IntRFID : InstrItinClass; +def IntRotateD : InstrItinClass; +def IntRotate : InstrItinClass; +def IntShift : InstrItinClass; +def IntTrapD : InstrItinClass; +def IntTrapW : InstrItinClass; +def BrB : InstrItinClass; +def BrCR : InstrItinClass; +def BrMCR : InstrItinClass; +def BrMCRX : InstrItinClass; +def LdStDCBA : InstrItinClass; +def LdStDCBF : InstrItinClass; +def LdStDCBI : InstrItinClass; +def LdStGeneral : InstrItinClass; +def LdStDSS : InstrItinClass; +def LdStICBI : InstrItinClass; +def LdStUX : InstrItinClass; +def LdStLD : InstrItinClass; +def LdStLDARX : InstrItinClass; +def LdStLFD : InstrItinClass; +def LdStLFDU : InstrItinClass; +def LdStLHA : InstrItinClass; +def LdStLMW : InstrItinClass; +def LdStLVecX : InstrItinClass; +def LdStLWA : InstrItinClass; +def LdStLWARX : InstrItinClass; +def LdStSLBIA : InstrItinClass; +def LdStSLBIE : InstrItinClass; +def LdStSTD : InstrItinClass; +def LdStSTDCX : InstrItinClass; +def LdStSTVEBX : InstrItinClass; +def LdStSTWCX : InstrItinClass; +def LdStSync : InstrItinClass; +def SprISYNC : InstrItinClass; +def SprMFSR : InstrItinClass; +def SprMTMSR : InstrItinClass; +def SprMTSR : InstrItinClass; +def SprTLBSYNC : InstrItinClass; +def SprMFCR : InstrItinClass; +def SprMFMSR : InstrItinClass; +def SprMFSPR : InstrItinClass; +def SprMFTB : InstrItinClass; +def SprMTSPR : InstrItinClass; +def SprMTSRIN : InstrItinClass; +def SprRFI : InstrItinClass; +def SprSC : InstrItinClass; +def FPGeneral : InstrItinClass; +def FPCompare : InstrItinClass; +def FPDivD : InstrItinClass; +def FPDivS : InstrItinClass; +def FPFused : InstrItinClass; +def FPRes : InstrItinClass; +def FPSqrt : InstrItinClass; +def VecGeneral : InstrItinClass; +def VecFP : InstrItinClass; +def VecFPCompare : InstrItinClass; +def VecComplex : InstrItinClass; +def VecPerm : InstrItinClass; +def VecFPRound : InstrItinClass; +def VecVSL : InstrItinClass; +def VecVSR : InstrItinClass; + +//===----------------------------------------------------------------------===// +// Processor instruction itineraries. + +include "PPCScheduleG3.td" +include "PPCScheduleG4.td" +include "PPCScheduleG4Plus.td" +include "PPCScheduleG5.td" + +//===----------------------------------------------------------------------===// +// Instruction to itinerary class map - When add new opcodes to the supported +// set, refer to the following table to determine which itinerary class the +// opcode belongs. +// +// opcode itinerary class +// ====== =============== +// add IntGeneral +// addc IntGeneral +// adde IntGeneral +// addi IntGeneral +// addic IntGeneral +// addic. IntGeneral +// addis IntGeneral +// addme IntGeneral +// addze IntGeneral +// and IntGeneral +// andc IntGeneral +// andi. IntGeneral +// andis. IntGeneral +// b BrB +// bc BrB +// bcctr BrB +// bclr BrB +// cmp IntCompare +// cmpi IntCompare +// cmpl IntCompare +// cmpli IntCompare +// cntlzd IntRotateD +// cntlzw IntGeneral +// crand BrCR +// crandc BrCR +// creqv BrCR +// crnand BrCR +// crnor BrCR +// cror BrCR +// crorc BrCR +// crxor BrCR +// dcba LdStDCBA +// dcbf LdStDCBF +// dcbi LdStDCBI +// dcbst LdStDCBF +// dcbt LdStGeneral +// dcbtst LdStGeneral +// dcbz LdStDCBF +// divd IntDivD +// divdu IntDivD +// divw IntDivW +// divwu IntDivW +// dss LdStDSS +// dst LdStDSS +// dstst LdStDSS +// eciwx LdStGeneral +// ecowx LdStGeneral +// eieio LdStGeneral +// eqv IntGeneral +// extsb IntGeneral +// extsh IntGeneral +// extsw IntRotateD +// fabs FPGeneral +// fadd FPGeneral +// fadds FPGeneral +// fcfid FPGeneral +// fcmpo FPCompare +// fcmpu FPCompare +// fctid FPGeneral +// fctidz FPGeneral +// fctiw FPGeneral +// fctiwz FPGeneral +// fdiv FPDivD +// fdivs FPDivS +// fmadd FPFused +// fmadds FPGeneral +// fmr FPGeneral +// fmsub FPFused +// fmsubs FPGeneral +// fmul FPFused +// fmuls FPGeneral +// fnabs FPGeneral +// fneg FPGeneral +// fnmadd FPFused +// fnmadds FPGeneral +// fnmsub FPFused +// fnmsubs FPGeneral +// fres FPRes +// frsp FPGeneral +// frsqrte FPGeneral +// fsel FPGeneral +// fsqrt FPSqrt +// fsqrts FPSqrt +// fsub FPGeneral +// fsubs FPGeneral +// icbi LdStICBI +// isync SprISYNC +// lbz LdStGeneral +// lbzu LdStGeneral +// lbzux LdStUX +// lbzx LdStGeneral +// ld LdStLD +// ldarx LdStLDARX +// ldu LdStLD +// ldux LdStLD +// ldx LdStLD +// lfd LdStLFD +// lfdu LdStLFDU +// lfdux LdStLFDU +// lfdx LdStLFDU +// lfs LdStLFDU +// lfsu LdStLFDU +// lfsux LdStLFDU +// lfsx LdStLFDU +// lha LdStLHA +// lhau LdStLHA +// lhaux LdStLHA +// lhax LdStLHA +// lhbrx LdStGeneral +// lhz LdStGeneral +// lhzu LdStGeneral +// lhzux LdStUX +// lhzx LdStGeneral +// lmw LdStLMW +// lswi LdStLMW +// lswx LdStLMW +// lvebx LdStLVecX +// lvehx LdStLVecX +// lvewx LdStLVecX +// lvsl LdStLVecX +// lvsr LdStLVecX +// lvx LdStLVecX +// lvxl LdStLVecX +// lwa LdStLWA +// lwarx LdStLWARX +// lwaux LdStLHA +// lwax LdStLHA +// lwbrx LdStGeneral +// lwz LdStGeneral +// lwzu LdStGeneral +// lwzux LdStUX +// lwzx LdStGeneral +// mcrf BrMCR +// mcrfs FPGeneral +// mcrxr BrMCRX +// mfcr SprMFCR +// mffs IntMFFS +// mfmsr SprMFMSR +// mfspr SprMFSPR +// mfsr SprMFSR +// mfsrin SprMFSR +// mftb SprMFTB +// mfvscr IntMFVSCR +// mtcrf BrMCRX +// mtfsb0 IntMTFSB0 +// mtfsb1 IntMTFSB0 +// mtfsf IntMTFSB0 +// mtfsfi IntMTFSB0 +// mtmsr SprMTMSR +// mtmsrd LdStLD +// mtspr SprMTSPR +// mtsr SprMTSR +// mtsrd IntMTSRD +// mtsrdin IntMTSRD +// mtsrin SprMTSRIN +// mtvscr IntMFVSCR +// mulhd IntMulHD +// mulhdu IntMulHD +// mulhw IntMulHW +// mulhwu IntMulHWU +// mulld IntMulHD +// mulli IntMulLI +// mullw IntMulHW +// nand IntGeneral +// neg IntGeneral +// nor IntGeneral +// or IntGeneral +// orc IntGeneral +// ori IntGeneral +// oris IntGeneral +// rfi SprRFI +// rfid IntRFID +// rldcl IntRotateD +// rldcr IntRotateD +// rldic IntRotateD +// rldicl IntRotateD +// rldicr IntRotateD +// rldimi IntRotateD +// rlwimi IntRotate +// rlwinm IntGeneral +// rlwnm IntGeneral +// sc SprSC +// slbia LdStSLBIA +// slbie LdStSLBIE +// sld IntRotateD +// slw IntGeneral +// srad IntRotateD +// sradi IntRotateD +// sraw IntShift +// srawi IntShift +// srd IntRotateD +// srw IntGeneral +// stb LdStGeneral +// stbu LdStGeneral +// stbux LdStGeneral +// stbx LdStGeneral +// std LdStSTD +// stdcx. LdStSTDCX +// stdu LdStSTD +// stdux LdStSTD +// stdx LdStSTD +// stfd LdStUX +// stfdu LdStUX +// stfdux LdStUX +// stfdx LdStUX +// stfiwx LdStUX +// stfs LdStUX +// stfsu LdStUX +// stfsux LdStUX +// stfsx LdStUX +// sth LdStGeneral +// sthbrx LdStGeneral +// sthu LdStGeneral +// sthux LdStGeneral +// sthx LdStGeneral +// stmw LdStLMW +// stswi LdStLMW +// stswx LdStLMW +// stvebx LdStSTVEBX +// stvehx LdStSTVEBX +// stvewx LdStSTVEBX +// stvx LdStSTVEBX +// stvxl LdStSTVEBX +// stw LdStGeneral +// stwbrx LdStGeneral +// stwcx. LdStSTWCX +// stwu LdStGeneral +// stwux LdStGeneral +// stwx LdStGeneral +// subf IntGeneral +// subfc IntGeneral +// subfe IntGeneral +// subfic IntGeneral +// subfme IntGeneral +// subfze IntGeneral +// sync LdStSync +// td IntTrapD +// tdi IntTrapD +// tlbia LdStSLBIA +// tlbie LdStDCBF +// tlbsync SprTLBSYNC +// tw IntTrapW +// twi IntTrapW +// vaddcuw VecGeneral +// vaddfp VecFP +// vaddsbs VecGeneral +// vaddshs VecGeneral +// vaddsws VecGeneral +// vaddubm VecGeneral +// vaddubs VecGeneral +// vadduhm VecGeneral +// vadduhs VecGeneral +// vadduwm VecGeneral +// vadduws VecGeneral +// vand VecGeneral +// vandc VecGeneral +// vavgsb VecGeneral +// vavgsh VecGeneral +// vavgsw VecGeneral +// vavgub VecGeneral +// vavguh VecGeneral +// vavguw VecGeneral +// vcfsx VecFP +// vcfux VecFP +// vcmpbfp VecFPCompare +// vcmpeqfp VecFPCompare +// vcmpequb VecGeneral +// vcmpequh VecGeneral +// vcmpequw VecGeneral +// vcmpgefp VecFPCompare +// vcmpgtfp VecFPCompare +// vcmpgtsb VecGeneral +// vcmpgtsh VecGeneral +// vcmpgtsw VecGeneral +// vcmpgtub VecGeneral +// vcmpgtuh VecGeneral +// vcmpgtuw VecGeneral +// vctsxs VecFP +// vctuxs VecFP +// vexptefp VecFP +// vlogefp VecFP +// vmaddfp VecFP +// vmaxfp VecFPCompare +// vmaxsb VecGeneral +// vmaxsh VecGeneral +// vmaxsw VecGeneral +// vmaxub VecGeneral +// vmaxuh VecGeneral +// vmaxuw VecGeneral +// vmhaddshs VecComplex +// vmhraddshs VecComplex +// vminfp VecFPCompare +// vminsb VecGeneral +// vminsh VecGeneral +// vminsw VecGeneral +// vminub VecGeneral +// vminuh VecGeneral +// vminuw VecGeneral +// vmladduhm VecComplex +// vmrghb VecPerm +// vmrghh VecPerm +// vmrghw VecPerm +// vmrglb VecPerm +// vmrglh VecPerm +// vmrglw VecPerm +// vmsubfp VecFP +// vmsummbm VecComplex +// vmsumshm VecComplex +// vmsumshs VecComplex +// vmsumubm VecComplex +// vmsumuhm VecComplex +// vmsumuhs VecComplex +// vmulesb VecComplex +// vmulesh VecComplex +// vmuleub VecComplex +// vmuleuh VecComplex +// vmulosb VecComplex +// vmulosh VecComplex +// vmuloub VecComplex +// vmulouh VecComplex +// vnor VecGeneral +// vor VecGeneral +// vperm VecPerm +// vpkpx VecPerm +// vpkshss VecPerm +// vpkshus VecPerm +// vpkswss VecPerm +// vpkswus VecPerm +// vpkuhum VecPerm +// vpkuhus VecPerm +// vpkuwum VecPerm +// vpkuwus VecPerm +// vrefp VecFPRound +// vrfim VecFPRound +// vrfin VecFPRound +// vrfip VecFPRound +// vrfiz VecFPRound +// vrlb VecGeneral +// vrlh VecGeneral +// vrlw VecGeneral +// vrsqrtefp VecFP +// vsel VecGeneral +// vsl VecVSL +// vslb VecGeneral +// vsldoi VecPerm +// vslh VecGeneral +// vslo VecPerm +// vslw VecGeneral +// vspltb VecPerm +// vsplth VecPerm +// vspltisb VecPerm +// vspltish VecPerm +// vspltisw VecPerm +// vspltw VecPerm +// vsr VecVSR +// vsrab VecGeneral +// vsrah VecGeneral +// vsraw VecGeneral +// vsrb VecGeneral +// vsrh VecGeneral +// vsro VecPerm +// vsrw VecGeneral +// vsubcuw VecGeneral +// vsubfp VecFP +// vsubsbs VecGeneral +// vsubshs VecGeneral +// vsubsws VecGeneral +// vsububm VecGeneral +// vsububs VecGeneral +// vsubuhm VecGeneral +// vsubuhs VecGeneral +// vsubuwm VecGeneral +// vsubuws VecGeneral +// vsum2sws VecComplex +// vsum4sbs VecComplex +// vsum4shs VecComplex +// vsum4ubs VecComplex +// vsumsws VecComplex +// vupkhpx VecPerm +// vupkhsb VecPerm +// vupkhsh VecPerm +// vupklpx VecPerm +// vupklsb VecPerm +// vupklsh VecPerm +// vxor VecGeneral +// xor IntGeneral +// xori IntGeneral +// xoris IntGeneral +// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td new file mode 100644 index 0000000..ad4da1f --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td @@ -0,0 +1,64 @@ +//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G3 (750) processor. +// +//===----------------------------------------------------------------------===// + + +def G3Itineraries : ProcessorItineraries< + [IU1, IU2, FPU1, BPU, SRU, SLU], [], [ + InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>, + InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>, + InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>, + InstrItinData<IntMFFS , [InstrStage<1, [FPU1]>]>, + InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>, + InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>, + InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>, + InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>, + InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>, + InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>, + InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>, + InstrItinData<BrB , [InstrStage<1, [BPU]>]>, + InstrItinData<BrCR , [InstrStage<1, [SRU]>]>, + InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>, + InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>, + InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, + InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>, + InstrItinData<LdStSync , [InstrStage<3, [SLU]>]>, + InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>, + InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>, + InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>, + InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>, + InstrItinData<SprTLBSYNC , [InstrStage<3, [SRU]>]>, + InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>, + InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>, + InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>, + InstrItinData<SprMFTB , [InstrStage<3, [SRU]>]>, + InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>, + InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>, + InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, + InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, + InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, + InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, + InstrItinData<FPFused , [InstrStage<2, [FPU1]>]>, + InstrItinData<FPRes , [InstrStage<10, [FPU1]>]> +]>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td new file mode 100644 index 0000000..03c3b29 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td @@ -0,0 +1,74 @@ +//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G4 (7400) processor. +// +//===----------------------------------------------------------------------===// + +def G4Itineraries : ProcessorItineraries< + [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [ + InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>, + InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>, + InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>, + InstrItinData<IntMFFS , [InstrStage<3, [FPU1]>]>, + InstrItinData<IntMFVSCR , [InstrStage<1, [VIU1]>]>, + InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>, + InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>, + InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>, + InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>, + InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>, + InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>, + InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>, + InstrItinData<BrB , [InstrStage<1, [BPU]>]>, + InstrItinData<BrCR , [InstrStage<1, [SRU]>]>, + InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>, + InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>, + InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, + InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTVEBX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTWCX , [InstrStage<5, [SLU]>]>, + InstrItinData<LdStSync , [InstrStage<8, [SLU]>]>, + InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>, + InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>, + InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>, + InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>, + InstrItinData<SprTLBSYNC , [InstrStage<8, [SRU]>]>, + InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>, + InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>, + InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>, + InstrItinData<SprMFTB , [InstrStage<1, [SRU]>]>, + InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>, + InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>, + InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, + InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, + InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, + InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, + InstrItinData<FPFused , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>, + InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>, + InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>, + InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>, + InstrItinData<VecComplex , [InstrStage<3, [VIU2]>]>, + InstrItinData<VecPerm , [InstrStage<1, [VPU]>]>, + InstrItinData<VecFPRound , [InstrStage<4, [VFPU]>]>, + InstrItinData<VecVSL , [InstrStage<1, [VIU1]>]>, + InstrItinData<VecVSR , [InstrStage<1, [VIU1]>]> +]>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td new file mode 100644 index 0000000..00cac3c --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -0,0 +1,80 @@ +//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G4+ (7450) processor. +// +//===----------------------------------------------------------------------===// + +def IU3 : FuncUnit; // integer unit 3 (7450 simple) +def IU4 : FuncUnit; // integer unit 4 (7450 simple) + +def G4PlusItineraries : ProcessorItineraries< + [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [ + InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, + InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, + InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>, + InstrItinData<IntMFFS , [InstrStage<5, [FPU1]>]>, + InstrItinData<IntMFVSCR , [InstrStage<2, [VFPU]>]>, + InstrItinData<IntMTFSB0 , [InstrStage<5, [FPU1]>]>, + InstrItinData<IntMulHW , [InstrStage<4, [IU2]>]>, + InstrItinData<IntMulHWU , [InstrStage<4, [IU2]>]>, + InstrItinData<IntMulLI , [InstrStage<3, [IU2]>]>, + InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, + InstrItinData<IntShift , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>, + InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>, + InstrItinData<BrB , [InstrStage<1, [BPU]>]>, + InstrItinData<BrCR , [InstrStage<2, [IU2]>]>, + InstrItinData<BrMCR , [InstrStage<2, [IU2]>]>, + InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>, + InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>, + InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>, + InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>, + InstrItinData<SprISYNC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>, + InstrItinData<SprMFSR , [InstrStage<4, [IU2]>]>, + InstrItinData<SprMTMSR , [InstrStage<2, [IU2]>]>, + InstrItinData<SprMTSR , [InstrStage<2, [IU2]>]>, + InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>, + InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>, + InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>, + InstrItinData<SprMFSPR , [InstrStage<4, [IU2]>]>, + InstrItinData<SprMFTB , [InstrStage<5, [IU2]>]>, + InstrItinData<SprMTSPR , [InstrStage<2, [IU2]>]>, + InstrItinData<SprMTSRIN , [InstrStage<2, [IU2]>]>, + InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, + InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>, + InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>, + InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>, + InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>, + InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>, + InstrItinData<FPFused , [InstrStage<5, [FPU1]>]>, + InstrItinData<FPRes , [InstrStage<14, [FPU1]>]>, + InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>, + InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>, + InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>, + InstrItinData<VecComplex , [InstrStage<4, [VIU2]>]>, + InstrItinData<VecPerm , [InstrStage<2, [VPU]>]>, + InstrItinData<VecFPRound , [InstrStage<4, [VIU1]>]>, + InstrItinData<VecVSL , [InstrStage<2, [VPU]>]>, + InstrItinData<VecVSR , [InstrStage<2, [VPU]>]> +]>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td new file mode 100644 index 0000000..1671f22 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td @@ -0,0 +1,84 @@ +//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G5 (970) processor. +// +//===----------------------------------------------------------------------===// + +def G5Itineraries : ProcessorItineraries< + [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [ + InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>, + InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>, + InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>, + InstrItinData<IntDivW , [InstrStage<36, [IU1]>]>, + InstrItinData<IntMFFS , [InstrStage<6, [IU2]>]>, + InstrItinData<IntMFVSCR , [InstrStage<1, [VFPU]>]>, + InstrItinData<IntMTFSB0 , [InstrStage<6, [FPU1, FPU2]>]>, + InstrItinData<IntMulHD , [InstrStage<7, [IU1, IU2]>]>, + InstrItinData<IntMulHW , [InstrStage<5, [IU1, IU2]>]>, + InstrItinData<IntMulHWU , [InstrStage<5, [IU1, IU2]>]>, + InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>, + InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>, + InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>, + InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>, + InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>, + InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>, + InstrItinData<IntTrapW , [InstrStage<1, [IU1, IU2]>]>, + InstrItinData<BrB , [InstrStage<1, [BPU]>]>, + InstrItinData<BrCR , [InstrStage<4, [BPU]>]>, + InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>, + InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>, + InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>, + InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>, + InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>, + InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>, + InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>, + InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>, + InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>, + InstrItinData<LdStLWARX , [InstrStage<11, [SLU]>]>, + InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work + InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>, + InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>, + InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>, + InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>, + InstrItinData<SprISYNC , [InstrStage<40, [SLU]>]>, // needs work + InstrItinData<SprMFSR , [InstrStage<3, [SLU]>]>, + InstrItinData<SprMTMSR , [InstrStage<3, [SLU]>]>, + InstrItinData<SprMTSR , [InstrStage<3, [SLU]>]>, + InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>, + InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>, + InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>, + InstrItinData<SprMFSPR , [InstrStage<3, [IU2]>]>, + InstrItinData<SprMFTB , [InstrStage<10, [IU2]>]>, + InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>, + InstrItinData<SprSC , [InstrStage<1, [IU2]>]>, + InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>, + InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>, + InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>, + InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>, + InstrItinData<FPFused , [InstrStage<6, [FPU1, FPU2]>]>, + InstrItinData<FPRes , [InstrStage<6, [FPU1, FPU2]>]>, + InstrItinData<FPSqrt , [InstrStage<40, [FPU1, FPU2]>]>, + InstrItinData<VecGeneral , [InstrStage<2, [VIU1]>]>, + InstrItinData<VecFP , [InstrStage<8, [VFPU]>]>, + InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>, + InstrItinData<VecComplex , [InstrStage<5, [VIU2]>]>, + InstrItinData<VecPerm , [InstrStage<3, [VPU]>]>, + InstrItinData<VecFPRound , [InstrStage<8, [VFPU]>]>, + InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>, + InstrItinData<VecVSR , [InstrStage<3, [VPU]>]> +]>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp new file mode 100644 index 0000000..d4258b4 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -0,0 +1,23 @@ +//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPCSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "powerpc-selectiondag-info" +#include "PPCTargetMachine.h" +using namespace llvm; + +PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { +} + +PPCSelectionDAGInfo::~PPCSelectionDAGInfo() { +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h new file mode 100644 index 0000000..341b69c --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -0,0 +1,31 @@ +//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PowerPC subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPCCSELECTIONDAGINFO_H +#define POWERPCCSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class PPCTargetMachine; + +class PPCSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM); + ~PPCSelectionDAGInfo(); +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp new file mode 100644 index 0000000..cf194de --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -0,0 +1,141 @@ +//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPC specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "PPCSubtarget.h" +#include "PPC.h" +#include "llvm/GlobalValue.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/TargetRegistry.h" +#include <cstdlib> + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "PPCGenSubtargetInfo.inc" + +using namespace llvm; + +#if defined(__APPLE__) +#include <mach/mach.h> +#include <mach/mach_host.h> +#include <mach/host_info.h> +#include <mach/machine.h> + +/// GetCurrentPowerPCFeatures - Returns the current CPUs features. +static const char *GetCurrentPowerPCCPU() { + host_basic_info_data_t hostInfo; + mach_msg_type_number_t infoCount; + + infoCount = HOST_BASIC_INFO_COUNT; + host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, + &infoCount); + + if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic"; + + switch(hostInfo.cpu_subtype) { + case CPU_SUBTYPE_POWERPC_601: return "601"; + case CPU_SUBTYPE_POWERPC_602: return "602"; + case CPU_SUBTYPE_POWERPC_603: return "603"; + case CPU_SUBTYPE_POWERPC_603e: return "603e"; + case CPU_SUBTYPE_POWERPC_603ev: return "603ev"; + case CPU_SUBTYPE_POWERPC_604: return "604"; + case CPU_SUBTYPE_POWERPC_604e: return "604e"; + case CPU_SUBTYPE_POWERPC_620: return "620"; + case CPU_SUBTYPE_POWERPC_750: return "750"; + case CPU_SUBTYPE_POWERPC_7400: return "7400"; + case CPU_SUBTYPE_POWERPC_7450: return "7450"; + case CPU_SUBTYPE_POWERPC_970: return "970"; + default: ; + } + + return "generic"; +} +#endif + + +PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) + : PPCGenSubtargetInfo(TT, CPU, FS) + , StackAlignment(16) + , DarwinDirective(PPC::DIR_NONE) + , IsGigaProcessor(false) + , Has64BitSupport(false) + , Use64BitRegs(false) + , IsPPC64(is64Bit) + , HasAltivec(false) + , HasFSQRT(false) + , HasSTFIWX(false) + , HasLazyResolverStubs(false) + , IsJITCodeModel(false) + , TargetTriple(TT) { + + // Determine default and user specified characteristics + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "generic"; +#if defined(__APPLE__) + if (CPUName == "generic") + CPUName = GetCurrentPowerPCCPU(); +#endif + + // Parse features string. + ParseSubtargetFeatures(CPUName, FS); + + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUName); + + // If we are generating code for ppc64, verify that options make sense. + if (is64Bit) { + Has64BitSupport = true; + // Silently force 64-bit register use on ppc64. + Use64BitRegs = true; + } + + // If the user requested use of 64-bit regs, but the cpu selected doesn't + // support it, ignore. + if (use64BitRegs() && !has64BitSupport()) + Use64BitRegs = false; + + // Set up darwin-specific properties. + if (isDarwin()) + HasLazyResolverStubs = true; +} + +/// SetJITMode - This is called to inform the subtarget info that we are +/// producing code for the JIT. +void PPCSubtarget::SetJITMode() { + // JIT mode doesn't want lazy resolver stubs, it knows exactly where + // everything is. This matters for PPC64, which codegens in PIC mode without + // stubs. + HasLazyResolverStubs = false; + + // Calls to external functions need to use indirect calls + IsJITCodeModel = true; +} + + +/// hasLazyResolverStub - Return true if accesses to the specified global have +/// to go through a dyld lazy resolution stub. This means that an extra load +/// is required to get the address of the global. +bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV, + const TargetMachine &TM) const { + // We never have stubs if HasLazyResolverStubs=false or if in static mode. + if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static) + return false; + // If symbol visibility is hidden, the extra load is not needed if + // the symbol is definitely defined in the current translation unit. + bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); + if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage()) + return false; + return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || + GV->hasCommonLinkage() || isDecl; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h new file mode 100644 index 0000000..e028de6 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -0,0 +1,151 @@ +//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PowerPC specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPCSUBTARGET_H +#define POWERPCSUBTARGET_H + +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/ADT/Triple.h" +#include <string> + +#define GET_SUBTARGETINFO_HEADER +#include "PPCGenSubtargetInfo.inc" + +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + +namespace llvm { +class StringRef; + +namespace PPC { + // -m directive values. + enum { + DIR_NONE, + DIR_32, + DIR_601, + DIR_602, + DIR_603, + DIR_7400, + DIR_750, + DIR_970, + DIR_64 + }; +} + +class GlobalValue; +class TargetMachine; + +class PPCSubtarget : public PPCGenSubtargetInfo { +protected: + /// stackAlignment - The minimum alignment known to hold of the stack frame on + /// entry to the function and which must be maintained by every function. + unsigned StackAlignment; + + /// Selected instruction itineraries (one entry per itinerary class.) + InstrItineraryData InstrItins; + + /// Which cpu directive was used. + unsigned DarwinDirective; + + /// Used by the ISel to turn in optimizations for POWER4-derived architectures + bool IsGigaProcessor; + bool Has64BitSupport; + bool Use64BitRegs; + bool IsPPC64; + bool HasAltivec; + bool HasFSQRT; + bool HasSTFIWX; + bool HasLazyResolverStubs; + bool IsJITCodeModel; + + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; + +public: + /// This constructor initializes the data members to match that + /// of the specified triple. + /// + PPCSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + /// SetJITMode - This is called to inform the subtarget info that we are + /// producing code for the JIT. + void SetJITMode(); + + /// getStackAlignment - Returns the minimum alignment known to hold of the + /// stack frame on entry to the function and which must be maintained by every + /// function for this subtarget. + unsigned getStackAlignment() const { return StackAlignment; } + + /// getDarwinDirective - Returns the -m directive specified for the cpu. + /// + unsigned getDarwinDirective() const { return DarwinDirective; } + + /// getInstrItins - Return the instruction itineraies based on subtarget + /// selection. + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + + /// getTargetDataString - Return the pointer size and type alignment + /// properties of this subtarget. + const char *getTargetDataString() const { + // Note, the alignment values for f64 and i64 on ppc64 in Darwin + // documentation are wrong; these are correct (i.e. "what gcc does"). + return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64" + : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32"; + } + + /// isPPC64 - Return true if we are generating code for 64-bit pointer mode. + /// + bool isPPC64() const { return IsPPC64; } + + /// has64BitSupport - Return true if the selected CPU supports 64-bit + /// instructions, regardless of whether we are in 32-bit or 64-bit mode. + bool has64BitSupport() const { return Has64BitSupport; } + + /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit + /// registers in 32-bit mode when possible. This can only true if + /// has64BitSupport() returns true. + bool use64BitRegs() const { return Use64BitRegs; } + + /// hasLazyResolverStub - Return true if accesses to the specified global have + /// to go through a dyld lazy resolution stub. This means that an extra load + /// is required to get the address of the global. + bool hasLazyResolverStub(const GlobalValue *GV, + const TargetMachine &TM) const; + + // isJITCodeModel - True if we're generating code for the JIT + bool isJITCodeModel() const { return IsJITCodeModel; } + + // Specific obvious features. + bool hasFSQRT() const { return HasFSQRT; } + bool hasSTFIWX() const { return HasSTFIWX; } + bool hasAltivec() const { return HasAltivec; } + bool isGigaProcessor() const { return IsGigaProcessor; } + + const Triple &getTargetTriple() const { return TargetTriple; } + + /// isDarwin - True if this is any darwin platform. + bool isDarwin() const { return TargetTriple.isMacOSX(); } + + bool isDarwinABI() const { return isDarwin(); } + bool isSVR4ABI() const { return !isDarwin(); } + +}; +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp new file mode 100644 index 0000000..f5744b8 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -0,0 +1,94 @@ +//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the PowerPC target. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCTargetMachine.h" +#include "llvm/PassManager.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +extern "C" void LLVMInitializePowerPCTarget() { + // Register the targets + RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); + RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target); +} + +PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, + bool is64Bit) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + Subtarget(TT, CPU, FS, is64Bit), + DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), + FrameLowering(Subtarget), JITInfo(*this, is64Bit), + TLInfo(*this), TSInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()) { +} + +/// Override this for PowerPC. Tail merging happily breaks up instruction issue +/// groups, which typically degrades performance. +bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } + +PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) { +} + + +PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) { +} + + +//===----------------------------------------------------------------------===// +// Pass Pipeline Configuration +//===----------------------------------------------------------------------===// + +bool PPCTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Install an instruction selector. + PM.add(createPPCISelDag(*this)); + return false; +} + +bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Must run branch selection immediately preceding the asm printer. + PM.add(createPPCBranchSelectionPass()); + return false; +} + +bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + JITCodeEmitter &JCE) { + // FIXME: This should be moved to TargetJITInfo!! + if (Subtarget.isPPC64()) + // Temporary workaround for the inability of PPC64 JIT to handle jump + // tables. + DisableJumpTables = true; + + // Inform the subtarget that we are in JIT mode. FIXME: does this break macho + // writing? + Subtarget.SetJITMode(); + + // Machine code emitter pass for PowerPC. + PM.add(createPPCJITCodeEmitterPass(*this, JCE)); + + return false; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h new file mode 100644 index 0000000..d06f084 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -0,0 +1,96 @@ +//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PowerPC specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef PPC_TARGETMACHINE_H +#define PPC_TARGETMACHINE_H + +#include "PPCFrameLowering.h" +#include "PPCSubtarget.h" +#include "PPCJITInfo.h" +#include "PPCInstrInfo.h" +#include "PPCISelLowering.h" +#include "PPCSelectionDAGInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { +class PassManager; +class GlobalValue; + +/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets. +/// +class PPCTargetMachine : public LLVMTargetMachine { + PPCSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + PPCInstrInfo InstrInfo; + PPCFrameLowering FrameLowering; + PPCJITInfo JITInfo; + PPCTargetLowering TLInfo; + PPCSelectionDAGInfo TSInfo; + InstrItineraryData InstrItins; + +public: + PPCTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64Bit); + + virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const PPCFrameLowering *getFrameLowering() const { + return &FrameLowering; + } + virtual PPCJITInfo *getJITInfo() { return &JITInfo; } + virtual const PPCTargetLowering *getTargetLowering() const { + return &TLInfo; + } + virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const PPCRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; } + virtual const InstrItineraryData *getInstrItineraryData() const { + return &InstrItins; + } + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + JITCodeEmitter &JCE); + virtual bool getEnableTailMergeDefault() const; +}; + +/// PPC32TargetMachine - PowerPC 32-bit target machine. +/// +class PPC32TargetMachine : public PPCTargetMachine { +public: + PPC32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); +}; + +/// PPC64TargetMachine - PowerPC 64-bit target machine. +/// +class PPC64TargetMachine : public PPCTargetMachine { +public: + PPC64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp new file mode 100644 index 0000000..5dc8568 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp @@ -0,0 +1,23 @@ +//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "llvm/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::ThePPC32Target, llvm::ThePPC64Target; + +extern "C" void LLVMInitializePowerPCTargetInfo() { + RegisterTarget<Triple::ppc, /*HasJIT=*/true> + X(ThePPC32Target, "ppc32", "PowerPC 32"); + + RegisterTarget<Triple::ppc64, /*HasJIT=*/true> + Y(ThePPC64Target, "ppc64", "PowerPC 64"); +} |