summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp1971
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp408
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp450
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h71
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp240
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp425
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h56
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp83
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h39
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp360
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp150
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h100
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp275
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h104
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp383
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp86
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h76
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h106
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td424
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp1565
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp253
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp237
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp697
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h35
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td265
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp207
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp2344
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp1780
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h129
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp433
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h102
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp4379
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp11590
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h902
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td1235
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td1215
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h43
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td1675
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td172
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp1828
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h279
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td4127
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td1216
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td447
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td1785
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp233
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp437
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp219
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp230
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp25
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h205
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h6591
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp1041
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h144
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td363
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule.td126
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td607
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td171
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td320
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td380
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td80
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td96
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td112
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td129
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td396
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td405
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp245
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h302
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp170
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp156
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp407
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h84
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp61
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h35
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h27
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp413
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h97
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp188
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp372
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp1003
-rw-r--r--contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp26
81 files changed, 60643 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
new file mode 100644
index 0000000..220c70a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -0,0 +1,1971 @@
+//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCMCExpr.h"
+#include "PPCTargetStreamer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static const MCPhysReg RRegs[32] = {
+ PPC::R0, PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static const MCPhysReg RRegsNoR0[32] = {
+ PPC::ZERO,
+ PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static const MCPhysReg XRegs[32] = {
+ PPC::X0, PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static const MCPhysReg XRegsNoX0[32] = {
+ PPC::ZERO8,
+ PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static const MCPhysReg FRegs[32] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31
+};
+static const MCPhysReg VRegs[32] = {
+ PPC::V0, PPC::V1, PPC::V2, PPC::V3,
+ PPC::V4, PPC::V5, PPC::V6, PPC::V7,
+ PPC::V8, PPC::V9, PPC::V10, PPC::V11,
+ PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+static const MCPhysReg VSRegs[64] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
+static const MCPhysReg VSFRegs[64] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+static const MCPhysReg VSSRegs[64] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+static unsigned QFRegs[32] = {
+ PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
+ PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
+ PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
+ PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
+ PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
+ PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
+ PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
+};
+static const MCPhysReg CRBITRegs[32] = {
+ PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
+ PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+ PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
+ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
+ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
+};
+static const MCPhysReg CRRegs[8] = {
+ PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
+ PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
+};
+
+// Evaluate an expression containing condition register
+// or condition register field symbols. Returns positive
+// value on success, or -1 on error.
+static int64_t
+EvaluateCRExpr(const MCExpr *E) {
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ return -1;
+
+ case MCExpr::Constant: {
+ int64_t Res = cast<MCConstantExpr>(E)->getValue();
+ return Res < 0 ? -1 : Res;
+ }
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+ StringRef Name = SRE->getSymbol().getName();
+
+ if (Name == "lt") return 0;
+ if (Name == "gt") return 1;
+ if (Name == "eq") return 2;
+ if (Name == "so") return 3;
+ if (Name == "un") return 3;
+
+ if (Name == "cr0") return 0;
+ if (Name == "cr1") return 1;
+ if (Name == "cr2") return 2;
+ if (Name == "cr3") return 3;
+ if (Name == "cr4") return 4;
+ if (Name == "cr5") return 5;
+ if (Name == "cr6") return 6;
+ if (Name == "cr7") return 7;
+
+ return -1;
+ }
+
+ case MCExpr::Unary:
+ return -1;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ int64_t LHSVal = EvaluateCRExpr(BE->getLHS());
+ int64_t RHSVal = EvaluateCRExpr(BE->getRHS());
+ int64_t Res;
+
+ if (LHSVal < 0 || RHSVal < 0)
+ return -1;
+
+ switch (BE->getOpcode()) {
+ default: return -1;
+ case MCBinaryExpr::Add: Res = LHSVal + RHSVal; break;
+ case MCBinaryExpr::Mul: Res = LHSVal * RHSVal; break;
+ }
+
+ return Res < 0 ? -1 : Res;
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+namespace {
+
+struct PPCOperand;
+
+class PPCAsmParser : public MCTargetAsmParser {
+ const MCInstrInfo &MII;
+ bool IsPPC64;
+ bool IsDarwin;
+
+ void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return getParser().Error(L, Msg); }
+
+ bool isPPC64() const { return IsPPC64; }
+ bool isDarwin() const { return IsDarwin; }
+
+ bool MatchRegisterName(const AsmToken &Tok,
+ unsigned &RegNo, int64_t &IntVal);
+
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+
+ const MCExpr *ExtractModifierFromExpr(const MCExpr *E,
+ PPCMCExpr::VariantKind &Variant);
+ const MCExpr *FixupVariantKind(const MCExpr *E);
+ bool ParseExpression(const MCExpr *&EVal);
+ bool ParseDarwinExpression(const MCExpr *&EVal);
+
+ bool ParseOperand(OperandVector &Operands);
+
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+ bool ParseDirectiveTC(unsigned Size, SMLoc L);
+ bool ParseDirectiveMachine(SMLoc L);
+ bool ParseDarwinDirectiveMachine(SMLoc L);
+ bool ParseDirectiveAbiVersion(SMLoc L);
+ bool ParseDirectiveLocalEntry(SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
+
+ void ProcessInstruction(MCInst &Inst, const OperandVector &Ops);
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "PPCGenAsmMatcher.inc"
+
+ /// }
+
+
+public:
+ PPCAsmParser(const MCSubtargetInfo &STI, MCAsmParser &,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, STI), MII(MII) {
+ // Check for 64-bit vs. 32-bit pointer mode.
+ Triple TheTriple(STI.getTargetTriple());
+ IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
+ TheTriple.getArch() == Triple::ppc64le);
+ IsDarwin = TheTriple.isMacOSX();
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+
+ bool ParseDirective(AsmToken DirectiveID) override;
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+ unsigned Kind) override;
+
+ const MCExpr *applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind,
+ MCContext &Ctx) override;
+};
+
+/// PPCOperand - Instances of this class represent a parsed PowerPC machine
+/// instruction.
+struct PPCOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Immediate,
+ ContextImmediate,
+ Expression,
+ TLSRegister
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+ bool IsPPC64;
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct ImmOp {
+ int64_t Val;
+ };
+
+ struct ExprOp {
+ const MCExpr *Val;
+ int64_t CRVal; // Cached result of EvaluateCRExpr(Val)
+ };
+
+ struct TLSRegOp {
+ const MCSymbolRefExpr *Sym;
+ };
+
+ union {
+ struct TokOp Tok;
+ struct ImmOp Imm;
+ struct ExprOp Expr;
+ struct TLSRegOp TLSReg;
+ };
+
+ PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ PPCOperand(const PPCOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ IsPPC64 = o.IsPPC64;
+ switch (Kind) {
+ case Token:
+ Tok = o.Tok;
+ break;
+ case Immediate:
+ case ContextImmediate:
+ Imm = o.Imm;
+ break;
+ case Expression:
+ Expr = o.Expr;
+ break;
+ case TLSRegister:
+ TLSReg = o.TLSReg;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const override { return StartLoc; }
+
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const override { return EndLoc; }
+
+ /// isPPC64 - True if this operand is for an instruction in 64-bit mode.
+ bool isPPC64() const { return IsPPC64; }
+
+ int64_t getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+ int64_t getImmS16Context() const {
+ assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!");
+ if (Kind == Immediate)
+ return Imm.Val;
+ return static_cast<int16_t>(Imm.Val);
+ }
+ int64_t getImmU16Context() const {
+ assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!");
+ return Imm.Val;
+ }
+
+ const MCExpr *getExpr() const {
+ assert(Kind == Expression && "Invalid access!");
+ return Expr.Val;
+ }
+
+ int64_t getExprCRVal() const {
+ assert(Kind == Expression && "Invalid access!");
+ return Expr.CRVal;
+ }
+
+ const MCExpr *getTLSReg() const {
+ assert(Kind == TLSRegister && "Invalid access!");
+ return TLSReg.Sym;
+ }
+
+ unsigned getReg() const override {
+ assert(isRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
+ unsigned getVSReg() const {
+ assert(isVSRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
+ unsigned getCCReg() const {
+ assert(isCCRegNumber() && "Invalid access!");
+ return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
+ }
+
+ unsigned getCRBit() const {
+ assert(isCRBitNumber() && "Invalid access!");
+ return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
+ }
+
+ unsigned getCRBitMask() const {
+ assert(isCRBitMask() && "Invalid access!");
+ return 7 - countTrailingZeros<uint64_t>(Imm.Val);
+ }
+
+ bool isToken() const override { return Kind == Token; }
+ bool isImm() const override { return Kind == Immediate || Kind == Expression; }
+ bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
+ bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+ bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); }
+ bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
+ bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
+ bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
+ bool isU6ImmX2() const { return Kind == Immediate &&
+ isUInt<6>(getImm()) &&
+ (getImm() & 1) == 0; }
+ bool isU7ImmX4() const { return Kind == Immediate &&
+ isUInt<7>(getImm()) &&
+ (getImm() & 3) == 0; }
+ bool isU8ImmX8() const { return Kind == Immediate &&
+ isUInt<8>(getImm()) &&
+ (getImm() & 7) == 0; }
+
+ bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); }
+ bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); }
+ bool isU16Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isUInt<16>(getImmU16Context());
+ default:
+ return false;
+ }
+ }
+ bool isS16Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isInt<16>(getImmS16Context());
+ default:
+ return false;
+ }
+ }
+ bool isS16ImmX4() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<16>(getImm()) &&
+ (getImm() & 3) == 0); }
+ bool isS17Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isInt<17>(getImmS16Context());
+ default:
+ return false;
+ }
+ }
+ bool isTLSReg() const { return Kind == TLSRegister; }
+ bool isDirectBr() const {
+ if (Kind == Expression)
+ return true;
+ if (Kind != Immediate)
+ return false;
+ // Operand must be 64-bit aligned, signed 27-bit immediate.
+ if ((getImm() & 3) != 0)
+ return false;
+ if (isInt<26>(getImm()))
+ return true;
+ if (!IsPPC64) {
+ // In 32-bit mode, large 32-bit quantities wrap around.
+ if (isUInt<32>(getImm()) && isInt<26>(static_cast<int32_t>(getImm())))
+ return true;
+ }
+ return false;
+ }
+ bool isCondBr() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<16>(getImm()) &&
+ (getImm() & 3) == 0); }
+ bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); }
+ bool isCCRegNumber() const { return (Kind == Expression
+ && isUInt<3>(getExprCRVal())) ||
+ (Kind == Immediate
+ && isUInt<3>(getImm())); }
+ bool isCRBitNumber() const { return (Kind == Expression
+ && isUInt<5>(getExprCRVal())) ||
+ (Kind == Immediate
+ && isUInt<5>(getImm())); }
+ bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) &&
+ isPowerOf2_32(getImm()); }
+ bool isMem() const override { return false; }
+ bool isReg() const override { return false; }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ llvm_unreachable("addRegOperands");
+ }
+
+ void addRegGPRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(RRegs[getReg()]));
+ }
+
+ void addRegGPRCNoR0Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[getReg()]));
+ }
+
+ void addRegG8RCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(XRegs[getReg()]));
+ }
+
+ void addRegG8RCNoX0Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(XRegsNoX0[getReg()]));
+ }
+
+ void addRegGxRCOperands(MCInst &Inst, unsigned N) const {
+ if (isPPC64())
+ addRegG8RCOperands(Inst, N);
+ else
+ addRegGPRCOperands(Inst, N);
+ }
+
+ void addRegGxRCNoR0Operands(MCInst &Inst, unsigned N) const {
+ if (isPPC64())
+ addRegG8RCNoX0Operands(Inst, N);
+ else
+ addRegGPRCNoR0Operands(Inst, N);
+ }
+
+ void addRegF4RCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
+ }
+
+ void addRegF8RCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
+ }
+
+ void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(VRegs[getReg()]));
+ }
+
+ void addRegVSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(VSRegs[getVSReg()]));
+ }
+
+ void addRegVSFRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(VSFRegs[getVSReg()]));
+ }
+
+ void addRegVSSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(VSSRegs[getVSReg()]));
+ }
+
+ void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ }
+
+ void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ }
+
+ void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(QFRegs[getReg()]));
+ }
+
+ void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(CRBITRegs[getCRBit()]));
+ }
+
+ void addRegCRRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(CRRegs[getCCReg()]));
+ }
+
+ void addCRBitMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(CRRegs[getCRBitMask()]));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ if (Kind == Immediate)
+ Inst.addOperand(MCOperand::createImm(getImm()));
+ else
+ Inst.addOperand(MCOperand::createExpr(getExpr()));
+ }
+
+ void addS16ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ switch (Kind) {
+ case Immediate:
+ Inst.addOperand(MCOperand::createImm(getImm()));
+ break;
+ case ContextImmediate:
+ Inst.addOperand(MCOperand::createImm(getImmS16Context()));
+ break;
+ default:
+ Inst.addOperand(MCOperand::createExpr(getExpr()));
+ break;
+ }
+ }
+
+ void addU16ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ switch (Kind) {
+ case Immediate:
+ Inst.addOperand(MCOperand::createImm(getImm()));
+ break;
+ case ContextImmediate:
+ Inst.addOperand(MCOperand::createImm(getImmU16Context()));
+ break;
+ default:
+ Inst.addOperand(MCOperand::createExpr(getExpr()));
+ break;
+ }
+ }
+
+ void addBranchTargetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ if (Kind == Immediate)
+ Inst.addOperand(MCOperand::createImm(getImm() / 4));
+ else
+ Inst.addOperand(MCOperand::createExpr(getExpr()));
+ }
+
+ void addTLSRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createExpr(getTLSReg()));
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ void print(raw_ostream &OS) const override;
+
+ static std::unique_ptr<PPCOperand> CreateToken(StringRef Str, SMLoc S,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand>
+ CreateTokenWithStringCopy(StringRef Str, SMLoc S, bool IsPPC64) {
+ // Allocate extra memory for the string and copy it.
+ // FIXME: This is incorrect, Operands are owned by unique_ptr with a default
+ // deleter which will destroy them by simply using "delete", not correctly
+ // calling operator delete on this extra memory after calling the dtor
+ // explicitly.
+ void *Mem = ::operator new(sizeof(PPCOperand) + Str.size());
+ std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token));
+ Op->Tok.Data = reinterpret_cast<const char *>(Op.get() + 1);
+ Op->Tok.Length = Str.size();
+ std::memcpy(const_cast<char *>(Op->Tok.Data), Str.data(), Str.size());
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand> CreateImm(int64_t Val, SMLoc S, SMLoc E,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand> CreateExpr(const MCExpr *Val, SMLoc S,
+ SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Expression);
+ Op->Expr.Val = Val;
+ Op->Expr.CRVal = EvaluateCRExpr(Val);
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand>
+ CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(TLSRegister);
+ Op->TLSReg.Sym = Sym;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand>
+ CreateContextImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(ContextImmediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand>
+ CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) {
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val))
+ return CreateImm(CE->getValue(), S, E, IsPPC64);
+
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Val))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS)
+ return CreateTLSReg(SRE, S, E, IsPPC64);
+
+ if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) {
+ int64_t Res;
+ if (TE->evaluateAsConstant(Res))
+ return CreateContextImm(Res, S, E, IsPPC64);
+ }
+
+ return CreateExpr(Val, S, E, IsPPC64);
+ }
+};
+
+} // end anonymous namespace.
+
+void PPCOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ case Immediate:
+ case ContextImmediate:
+ OS << getImm();
+ break;
+ case Expression:
+ OS << *getExpr();
+ break;
+ case TLSRegister:
+ OS << *getTLSReg();
+ break;
+ }
+}
+
+static void
+addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) {
+ if (Op.isImm()) {
+ Inst.addOperand(MCOperand::createImm(-Op.getImm()));
+ return;
+ }
+ const MCExpr *Expr = Op.getExpr();
+ if (const MCUnaryExpr *UnExpr = dyn_cast<MCUnaryExpr>(Expr)) {
+ if (UnExpr->getOpcode() == MCUnaryExpr::Minus) {
+ Inst.addOperand(MCOperand::createExpr(UnExpr->getSubExpr()));
+ return;
+ }
+ } else if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Expr)) {
+ if (BinExpr->getOpcode() == MCBinaryExpr::Sub) {
+ const MCExpr *NE = MCBinaryExpr::createSub(BinExpr->getRHS(),
+ BinExpr->getLHS(), Ctx);
+ Inst.addOperand(MCOperand::createExpr(NE));
+ return;
+ }
+ }
+ Inst.addOperand(MCOperand::createExpr(MCUnaryExpr::createMinus(Expr, Ctx)));
+}
+
+void PPCAsmParser::ProcessInstruction(MCInst &Inst,
+ const OperandVector &Operands) {
+ int Opcode = Inst.getOpcode();
+ switch (Opcode) {
+ case PPC::DCBTx:
+ case PPC::DCBTT:
+ case PPC::DCBTSTx:
+ case PPC::DCBTSTT: {
+ MCInst TmpInst;
+ TmpInst.setOpcode((Opcode == PPC::DCBTx || Opcode == PPC::DCBTT) ?
+ PPC::DCBT : PPC::DCBTST);
+ TmpInst.addOperand(MCOperand::createImm(
+ (Opcode == PPC::DCBTx || Opcode == PPC::DCBTSTx) ? 0 : 16));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::DCBTCT:
+ case PPC::DCBTDS: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::DCBT);
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::DCBTSTCT:
+ case PPC::DCBTSTDS: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::DCBTST);
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::LAx: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::LA);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBI: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::ADDI);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBIS: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::ADDIS);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBIC: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::ADDIC);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBICo: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::ADDICo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTLWI:
+ case PPC::EXTLWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(B));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(N - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTRWI:
+ case PPC::EXTRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(B + N));
+ TmpInst.addOperand(MCOperand::createImm(32 - N));
+ TmpInst.addOperand(MCOperand::createImm(31));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::INSLWI:
+ case PPC::INSLWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::INSLWI? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(32 - B));
+ TmpInst.addOperand(MCOperand::createImm(B));
+ TmpInst.addOperand(MCOperand::createImm((B + N) - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::INSRWI:
+ case PPC::INSRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::INSRWI? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(32 - (B + N)));
+ TmpInst.addOperand(MCOperand::createImm(B));
+ TmpInst.addOperand(MCOperand::createImm((B + N) - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::ROTRWI:
+ case PPC::ROTRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::ROTRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(32 - N));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(31));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SLWI:
+ case PPC::SLWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::SLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(N));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(31 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SRWI:
+ case PPC::SRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::SRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(32 - N));
+ TmpInst.addOperand(MCOperand::createImm(N));
+ TmpInst.addOperand(MCOperand::createImm(31));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::CLRRWI:
+ case PPC::CLRRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(31 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::CLRLSLWI:
+ case PPC::CLRLSLWIo: {
+ MCInst TmpInst;
+ int64_t B = Inst.getOperand(2).getImm();
+ int64_t N = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRLSLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(N));
+ TmpInst.addOperand(MCOperand::createImm(B - N));
+ TmpInst.addOperand(MCOperand::createImm(31 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTLDI:
+ case PPC::EXTLDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTLDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(B));
+ TmpInst.addOperand(MCOperand::createImm(N - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTRDI:
+ case PPC::EXTRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(B + N));
+ TmpInst.addOperand(MCOperand::createImm(64 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::INSRDI:
+ case PPC::INSRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::INSRDI? PPC::RLDIMI : PPC::RLDIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(64 - (B + N)));
+ TmpInst.addOperand(MCOperand::createImm(B));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::ROTRDI:
+ case PPC::ROTRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::ROTRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(64 - N));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SLDI:
+ case PPC::SLDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::SLDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(N));
+ TmpInst.addOperand(MCOperand::createImm(63 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SRDI:
+ case PPC::SRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::SRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(64 - N));
+ TmpInst.addOperand(MCOperand::createImm(N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::CLRRDI:
+ case PPC::CLRRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRRDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(0));
+ TmpInst.addOperand(MCOperand::createImm(63 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::CLRLSLDI:
+ case PPC::CLRLSLDIo: {
+ MCInst TmpInst;
+ int64_t B = Inst.getOperand(2).getImm();
+ int64_t N = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRLSLDI? PPC::RLDIC : PPC::RLDICo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(N));
+ TmpInst.addOperand(MCOperand::createImm(B - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::RLWINMbm:
+ case PPC::RLWINMobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::createImm(MB));
+ TmpInst.addOperand(MCOperand::createImm(ME));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::RLWIMIbm:
+ case PPC::RLWIMIobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::createImm(MB));
+ TmpInst.addOperand(MCOperand::createImm(ME));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::RLWNMbm:
+ case PPC::RLWNMobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::createImm(MB));
+ TmpInst.addOperand(MCOperand::createImm(ME));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::MFTB: {
+ if (getSTI().getFeatureBits()[PPC::FeatureMFTB]) {
+ assert(Inst.getNumOperands() == 2 && "Expecting two operands");
+ Inst.setOpcode(PPC::MFSPR);
+ }
+ break;
+ }
+ }
+}
+
+bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+
+ switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+ case Match_Success:
+ // Post-process instructions (typically extended mnemonics)
+ ProcessInstruction(Inst, Operands);
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst, getSTI());
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "instruction use requires an option to be enabled");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0ULL) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((PPCOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+}
+
+bool PPCAsmParser::
+MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef Name = Tok.getString();
+
+ if (Name.equals_lower("lr")) {
+ RegNo = isPPC64()? PPC::LR8 : PPC::LR;
+ IntVal = 8;
+ return false;
+ } else if (Name.equals_lower("ctr")) {
+ RegNo = isPPC64()? PPC::CTR8 : PPC::CTR;
+ IntVal = 9;
+ return false;
+ } else if (Name.equals_lower("vrsave")) {
+ RegNo = PPC::VRSAVE;
+ IntVal = 256;
+ return false;
+ } else if (Name.startswith_lower("r") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
+ return false;
+ } else if (Name.startswith_lower("f") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = FRegs[IntVal];
+ return false;
+ } else if (Name.startswith_lower("vs") &&
+ !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 64) {
+ RegNo = VSRegs[IntVal];
+ return false;
+ } else if (Name.startswith_lower("v") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = VRegs[IntVal];
+ return false;
+ } else if (Name.startswith_lower("q") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = QFRegs[IntVal];
+ return false;
+ } else if (Name.startswith_lower("cr") &&
+ !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
+ RegNo = CRRegs[IntVal];
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool PPCAsmParser::
+ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+ StartLoc = Tok.getLoc();
+ EndLoc = Tok.getEndLoc();
+ RegNo = 0;
+ int64_t IntVal;
+
+ if (!MatchRegisterName(Tok, RegNo, IntVal)) {
+ Parser.Lex(); // Eat identifier token.
+ return false;
+ }
+
+ return Error(StartLoc, "invalid register name");
+}
+
+/// Extract \code @l/@ha \endcode modifier from expression. Recursively scan
+/// the expression and check for VK_PPC_LO/HI/HA
+/// symbol variants. If all symbols with modifier use the same
+/// variant, return the corresponding PPCMCExpr::VariantKind,
+/// and a modified expression using the default symbol variant.
+/// Otherwise, return NULL.
+const MCExpr *PPCAsmParser::
+ExtractModifierFromExpr(const MCExpr *E,
+ PPCMCExpr::VariantKind &Variant) {
+ MCContext &Context = getParser().getContext();
+ Variant = PPCMCExpr::VK_PPC_None;
+
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return nullptr;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+ switch (SRE->getKind()) {
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Variant = PPCMCExpr::VK_PPC_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Variant = PPCMCExpr::VK_PPC_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Variant = PPCMCExpr::VK_PPC_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHER:
+ Variant = PPCMCExpr::VK_PPC_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHERA:
+ Variant = PPCMCExpr::VK_PPC_HIGHERA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHEST:
+ Variant = PPCMCExpr::VK_PPC_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHESTA:
+ Variant = PPCMCExpr::VK_PPC_HIGHESTA;
+ break;
+ default:
+ return nullptr;
+ }
+
+ return MCSymbolRefExpr::create(&SRE->getSymbol(), Context);
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+ const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant);
+ if (!Sub)
+ return nullptr;
+ return MCUnaryExpr::create(UE->getOpcode(), Sub, Context);
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ PPCMCExpr::VariantKind LHSVariant, RHSVariant;
+ const MCExpr *LHS = ExtractModifierFromExpr(BE->getLHS(), LHSVariant);
+ const MCExpr *RHS = ExtractModifierFromExpr(BE->getRHS(), RHSVariant);
+
+ if (!LHS && !RHS)
+ return nullptr;
+
+ if (!LHS) LHS = BE->getLHS();
+ if (!RHS) RHS = BE->getRHS();
+
+ if (LHSVariant == PPCMCExpr::VK_PPC_None)
+ Variant = RHSVariant;
+ else if (RHSVariant == PPCMCExpr::VK_PPC_None)
+ Variant = LHSVariant;
+ else if (LHSVariant == RHSVariant)
+ Variant = LHSVariant;
+ else
+ return nullptr;
+
+ return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context);
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+/// Find all VK_TLSGD/VK_TLSLD symbol references in expression and replace
+/// them by VK_PPC_TLSGD/VK_PPC_TLSLD. This is necessary to avoid having
+/// _GLOBAL_OFFSET_TABLE_ created via ELFObjectWriter::RelocNeedsGOT.
+/// FIXME: This is a hack.
+const MCExpr *PPCAsmParser::
+FixupVariantKind(const MCExpr *E) {
+ MCContext &Context = getParser().getContext();
+
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return E;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+
+ switch (SRE->getKind()) {
+ case MCSymbolRefExpr::VK_TLSGD:
+ Variant = MCSymbolRefExpr::VK_PPC_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_TLSLD:
+ Variant = MCSymbolRefExpr::VK_PPC_TLSLD;
+ break;
+ default:
+ return E;
+ }
+ return MCSymbolRefExpr::create(&SRE->getSymbol(), Variant, Context);
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+ const MCExpr *Sub = FixupVariantKind(UE->getSubExpr());
+ if (Sub == UE->getSubExpr())
+ return E;
+ return MCUnaryExpr::create(UE->getOpcode(), Sub, Context);
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ const MCExpr *LHS = FixupVariantKind(BE->getLHS());
+ const MCExpr *RHS = FixupVariantKind(BE->getRHS());
+ if (LHS == BE->getLHS() && RHS == BE->getRHS())
+ return E;
+ return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context);
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+/// ParseExpression. This differs from the default "parseExpression" in that
+/// it handles modifiers.
+bool PPCAsmParser::
+ParseExpression(const MCExpr *&EVal) {
+
+ if (isDarwin())
+ return ParseDarwinExpression(EVal);
+
+ // (ELF Platforms)
+ // Handle \code @l/@ha \endcode
+ if (getParser().parseExpression(EVal))
+ return true;
+
+ EVal = FixupVariantKind(EVal);
+
+ PPCMCExpr::VariantKind Variant;
+ const MCExpr *E = ExtractModifierFromExpr(EVal, Variant);
+ if (E)
+ EVal = PPCMCExpr::create(Variant, E, false, getParser().getContext());
+
+ return false;
+}
+
+/// ParseDarwinExpression. (MachO Platforms)
+/// This differs from the default "parseExpression" in that it handles detection
+/// of the \code hi16(), ha16() and lo16() \endcode modifiers. At present,
+/// parseExpression() doesn't recognise the modifiers when in the Darwin/MachO
+/// syntax form so it is done here. TODO: Determine if there is merit in arranging
+/// for this to be done at a higher level.
+bool PPCAsmParser::
+ParseDarwinExpression(const MCExpr *&EVal) {
+ MCAsmParser &Parser = getParser();
+ PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None;
+ switch (getLexer().getKind()) {
+ default:
+ break;
+ case AsmToken::Identifier:
+ // Compiler-generated Darwin identifiers begin with L,l,_ or "; thus
+ // something starting with any other char should be part of the
+ // asm syntax. If handwritten asm includes an identifier like lo16,
+ // then all bets are off - but no-one would do that, right?
+ StringRef poss = Parser.getTok().getString();
+ if (poss.equals_lower("lo16")) {
+ Variant = PPCMCExpr::VK_PPC_LO;
+ } else if (poss.equals_lower("hi16")) {
+ Variant = PPCMCExpr::VK_PPC_HI;
+ } else if (poss.equals_lower("ha16")) {
+ Variant = PPCMCExpr::VK_PPC_HA;
+ }
+ if (Variant != PPCMCExpr::VK_PPC_None) {
+ Parser.Lex(); // Eat the xx16
+ if (getLexer().isNot(AsmToken::LParen))
+ return Error(Parser.getTok().getLoc(), "expected '('");
+ Parser.Lex(); // Eat the '('
+ }
+ break;
+ }
+
+ if (getParser().parseExpression(EVal))
+ return true;
+
+ if (Variant != PPCMCExpr::VK_PPC_None) {
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "expected ')'");
+ Parser.Lex(); // Eat the ')'
+ EVal = PPCMCExpr::create(Variant, EVal, false, getParser().getContext());
+ }
+ return false;
+}
+
+/// ParseOperand
+/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
+/// rNN for MachO.
+bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ const MCExpr *EVal;
+
+ // Attempt to parse the next token as an immediate
+ switch (getLexer().getKind()) {
+ // Special handling for register names. These are interpreted
+ // as immediates corresponding to the register number.
+ case AsmToken::Percent:
+ Parser.Lex(); // Eat the '%'.
+ unsigned RegNo;
+ int64_t IntVal;
+ if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+ Parser.Lex(); // Eat the identifier token.
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
+ return false;
+ }
+ return Error(S, "invalid register name");
+
+ case AsmToken::Identifier:
+ // Note that non-register-name identifiers from the compiler will begin
+ // with '_', 'L'/'l' or '"'. Of course, handwritten asm could include
+ // identifiers like r31foo - so we fall through in the event that parsing
+ // a register name fails.
+ if (isDarwin()) {
+ unsigned RegNo;
+ int64_t IntVal;
+ if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+ Parser.Lex(); // Eat the identifier token.
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
+ return false;
+ }
+ }
+ // Fall-through to process non-register-name identifiers as expression.
+ // All other expressions
+ case AsmToken::LParen:
+ case AsmToken::Plus:
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ case AsmToken::Dot:
+ case AsmToken::Dollar:
+ case AsmToken::Exclaim:
+ case AsmToken::Tilde:
+ if (!ParseExpression(EVal))
+ break;
+ /* fall through */
+ default:
+ return Error(S, "unknown operand");
+ }
+
+ // Push the parsed operand into the list of operands
+ Operands.push_back(PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64()));
+
+ // Check whether this is a TLS call expression
+ bool TLSCall = false;
+ if (const MCSymbolRefExpr *Ref = dyn_cast<MCSymbolRefExpr>(EVal))
+ TLSCall = Ref->getSymbol().getName() == "__tls_get_addr";
+
+ if (TLSCall && getLexer().is(AsmToken::LParen)) {
+ const MCExpr *TLSSym;
+
+ Parser.Lex(); // Eat the '('.
+ S = Parser.getTok().getLoc();
+ if (ParseExpression(TLSSym))
+ return Error(S, "invalid TLS call expression");
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "missing ')'");
+ E = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the ')'.
+
+ Operands.push_back(PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64()));
+ }
+
+ // Otherwise, check for D-form memory operands
+ if (!TLSCall && getLexer().is(AsmToken::LParen)) {
+ Parser.Lex(); // Eat the '('.
+ S = Parser.getTok().getLoc();
+
+ int64_t IntVal;
+ switch (getLexer().getKind()) {
+ case AsmToken::Percent:
+ Parser.Lex(); // Eat the '%'.
+ unsigned RegNo;
+ if (MatchRegisterName(Parser.getTok(), RegNo, IntVal))
+ return Error(S, "invalid register name");
+ Parser.Lex(); // Eat the identifier token.
+ break;
+
+ case AsmToken::Integer:
+ if (!isDarwin()) {
+ if (getParser().parseAbsoluteExpression(IntVal) ||
+ IntVal < 0 || IntVal > 31)
+ return Error(S, "invalid register number");
+ } else {
+ return Error(S, "unexpected integer value");
+ }
+ break;
+
+ case AsmToken::Identifier:
+ if (isDarwin()) {
+ unsigned RegNo;
+ if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+ Parser.Lex(); // Eat the identifier token.
+ break;
+ }
+ }
+ // Fall-through..
+
+ default:
+ return Error(S, "invalid memory operand");
+ }
+
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "missing ')'");
+ E = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the ')'.
+
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
+ }
+
+ return false;
+}
+
+/// Parse an instruction mnemonic followed by its operands.
+bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
+ // The first operand is the token for the instruction name.
+ // If the next character is a '+' or '-', we need to add it to the
+ // instruction name, to match what TableGen is doing.
+ std::string NewOpcode;
+ if (getLexer().is(AsmToken::Plus)) {
+ getLexer().Lex();
+ NewOpcode = Name;
+ NewOpcode += '+';
+ Name = NewOpcode;
+ }
+ if (getLexer().is(AsmToken::Minus)) {
+ getLexer().Lex();
+ NewOpcode = Name;
+ NewOpcode += '-';
+ Name = NewOpcode;
+ }
+ // If the instruction ends in a '.', we need to create a separate
+ // token for it, to match what TableGen is doing.
+ size_t Dot = Name.find('.');
+ StringRef Mnemonic = Name.slice(0, Dot);
+ if (!NewOpcode.empty()) // Underlying memory for Name is volatile.
+ Operands.push_back(
+ PPCOperand::CreateTokenWithStringCopy(Mnemonic, NameLoc, isPPC64()));
+ else
+ Operands.push_back(PPCOperand::CreateToken(Mnemonic, NameLoc, isPPC64()));
+ if (Dot != StringRef::npos) {
+ SMLoc DotLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Dot);
+ StringRef DotStr = Name.slice(Dot, StringRef::npos);
+ if (!NewOpcode.empty()) // Underlying memory for Name is volatile.
+ Operands.push_back(
+ PPCOperand::CreateTokenWithStringCopy(DotStr, DotLoc, isPPC64()));
+ else
+ Operands.push_back(PPCOperand::CreateToken(DotStr, DotLoc, isPPC64()));
+ }
+
+ // If there are no more operands then finish
+ if (getLexer().is(AsmToken::EndOfStatement))
+ return false;
+
+ // Parse the first operand
+ if (ParseOperand(Operands))
+ return true;
+
+ while (getLexer().isNot(AsmToken::EndOfStatement) &&
+ getLexer().is(AsmToken::Comma)) {
+ // Consume the comma token
+ getLexer().Lex();
+
+ // Parse the next operand
+ if (ParseOperand(Operands))
+ return true;
+ }
+
+ // We'll now deal with an unfortunate special case: the syntax for the dcbt
+ // and dcbtst instructions differs for server vs. embedded cores.
+ // The syntax for dcbt is:
+ // dcbt ra, rb, th [server]
+ // dcbt th, ra, rb [embedded]
+ // where th can be omitted when it is 0. dcbtst is the same. We take the
+ // server form to be the default, so swap the operands if we're parsing for
+ // an embedded core (they'll be swapped again upon printing).
+ if (getSTI().getFeatureBits()[PPC::FeatureBookE] &&
+ Operands.size() == 4 &&
+ (Name == "dcbt" || Name == "dcbtst")) {
+ std::swap(Operands[1], Operands[3]);
+ std::swap(Operands[2], Operands[1]);
+ }
+
+ return false;
+}
+
+/// ParseDirective parses the PPC specific directives
+bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (!isDarwin()) {
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ if (IDVal == ".llong")
+ return ParseDirectiveWord(8, DirectiveID.getLoc());
+ if (IDVal == ".tc")
+ return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
+ if (IDVal == ".machine")
+ return ParseDirectiveMachine(DirectiveID.getLoc());
+ if (IDVal == ".abiversion")
+ return ParseDirectiveAbiVersion(DirectiveID.getLoc());
+ if (IDVal == ".localentry")
+ return ParseDirectiveLocalEntry(DirectiveID.getLoc());
+ } else {
+ if (IDVal == ".machine")
+ return ParseDarwinDirectiveMachine(DirectiveID.getLoc());
+ }
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ SMLoc ExprLoc = getLexer().getLoc();
+ if (getParser().parseExpression(Value))
+ return false;
+
+ if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ assert(Size <= 8 && "Invalid size");
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+ return Error(ExprLoc, "literal value out of range for directive");
+ getStreamer().EmitIntValue(IntValue, Size);
+ } else {
+ getStreamer().EmitValue(Value, Size, ExprLoc);
+ }
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// ParseDirectiveTC
+/// ::= .tc [ symbol (, expression)* ]
+bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ // Skip TC symbol, which is only used with XCOFF.
+ while (getLexer().isNot(AsmToken::EndOfStatement)
+ && getLexer().isNot(AsmToken::Comma))
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ Parser.Lex();
+
+ // Align to word size.
+ getParser().getStreamer().EmitValueToAlignment(Size);
+
+ // Emit expressions.
+ return ParseDirectiveWord(Size, L);
+}
+
+/// ParseDirectiveMachine (ELF platforms)
+/// ::= .machine [ cpu | "push" | "pop" ]
+bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (getLexer().isNot(AsmToken::Identifier) &&
+ getLexer().isNot(AsmToken::String)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ StringRef CPU = Parser.getTok().getIdentifier();
+ Parser.Lex();
+
+ // FIXME: Right now, the parser always allows any available
+ // instruction, so the .machine directive is not useful.
+ // Implement ".machine any" (by doing nothing) for the benefit
+ // of existing assembler code. Likewise, we can then implement
+ // ".machine push" and ".machine pop" as no-op.
+ if (CPU != "any" && CPU != "push" && CPU != "pop") {
+ Error(L, "unrecognized machine type");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitMachine(CPU);
+
+ return false;
+}
+
+/// ParseDarwinDirectiveMachine (Mach-o platforms)
+/// ::= .machine cpu-identifier
+bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (getLexer().isNot(AsmToken::Identifier) &&
+ getLexer().isNot(AsmToken::String)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ StringRef CPU = Parser.getTok().getIdentifier();
+ Parser.Lex();
+
+ // FIXME: this is only the 'default' set of cpu variants.
+ // However we don't act on this information at present, this is simply
+ // allowing parsing to proceed with minimal sanity checking.
+ if (CPU != "ppc7400" && CPU != "ppc" && CPU != "ppc64") {
+ Error(L, "unrecognized cpu type");
+ return false;
+ }
+
+ if (isPPC64() && (CPU == "ppc7400" || CPU == "ppc")) {
+ Error(L, "wrong cpu type specified for 64bit");
+ return false;
+ }
+ if (!isPPC64() && CPU == "ppc64") {
+ Error(L, "wrong cpu type specified for 32bit");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveAbiVersion
+/// ::= .abiversion constant-expression
+bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
+ int64_t AbiVersion;
+ if (getParser().parseAbsoluteExpression(AbiVersion)){
+ Error(L, "expected constant expression");
+ return false;
+ }
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitAbiVersion(AbiVersion);
+
+ return false;
+}
+
+/// ParseDirectiveLocalEntry
+/// ::= .localentry symbol, expression
+bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name)) {
+ Error(L, "expected identifier in directive");
+ return false;
+ }
+ MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name));
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ Lex();
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(L, "expected expression");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitLocalEntry(Sym, Expr);
+
+ return false;
+}
+
+
+
+/// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmParser() {
+ RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
+ RegisterMCAsmParser<PPCAsmParser> B(ThePPC64Target);
+ RegisterMCAsmParser<PPCAsmParser> C(ThePPC64LETarget);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "PPCGenAsmMatcher.inc"
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+ unsigned Kind) {
+ // If the kind is a token for a literal immediate, check if our asm
+ // operand matches. This is for InstAliases which have a fixed-value
+ // immediate in the syntax.
+ int64_t ImmVal;
+ switch (Kind) {
+ case MCK_0: ImmVal = 0; break;
+ case MCK_1: ImmVal = 1; break;
+ case MCK_2: ImmVal = 2; break;
+ case MCK_3: ImmVal = 3; break;
+ case MCK_4: ImmVal = 4; break;
+ case MCK_5: ImmVal = 5; break;
+ case MCK_6: ImmVal = 6; break;
+ case MCK_7: ImmVal = 7; break;
+ default: return Match_InvalidOperand;
+ }
+
+ PPCOperand &Op = static_cast<PPCOperand &>(AsmOp);
+ if (Op.isImm() && Op.getImm() == ImmVal)
+ return Match_Success;
+
+ return Match_InvalidOperand;
+}
+
+const MCExpr *
+PPCAsmParser::applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant,
+ MCContext &Ctx) {
+ switch (Variant) {
+ case MCSymbolRefExpr::VK_PPC_LO:
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_LO, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HI:
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HI, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HA:
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HA, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHER:
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHERA:
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHEST:
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHESTA:
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx);
+ default:
+ return nullptr;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
new file mode 100644
index 0000000..1fc84fb
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -0,0 +1,408 @@
+//===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class PPCDisassembler : public MCDisassembler {
+ bool IsLittleEndian;
+
+public:
+ PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ bool IsLittleEndian)
+ : MCDisassembler(STI, Ctx), IsLittleEndian(IsLittleEndian) {}
+
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
+};
+} // end anonymous namespace
+
+static MCDisassembler *createPPCDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new PPCDisassembler(STI, Ctx, /*IsLittleEndian=*/false);
+}
+
+static MCDisassembler *createPPCLEDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new PPCDisassembler(STI, Ctx, /*IsLittleEndian=*/true);
+}
+
+extern "C" void LLVMInitializePowerPCDisassembler() {
+ // Register the disassembler for each target.
+ TargetRegistry::RegisterMCDisassembler(ThePPC32Target,
+ createPPCDisassembler);
+ TargetRegistry::RegisterMCDisassembler(ThePPC64Target,
+ createPPCDisassembler);
+ TargetRegistry::RegisterMCDisassembler(ThePPC64LETarget,
+ createPPCLEDisassembler);
+}
+
+// FIXME: These can be generated by TableGen from the existing register
+// encoding values!
+
+static const unsigned CRRegs[] = {
+ PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
+ PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
+};
+
+static const unsigned CRBITRegs[] = {
+ PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
+ PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+ PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
+ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
+ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
+};
+
+static const unsigned FRegs[] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31
+};
+
+static const unsigned VRegs[] = {
+ PPC::V0, PPC::V1, PPC::V2, PPC::V3,
+ PPC::V4, PPC::V5, PPC::V6, PPC::V7,
+ PPC::V8, PPC::V9, PPC::V10, PPC::V11,
+ PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+static const unsigned VSRegs[] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
+
+static const unsigned VSFRegs[] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
+static const unsigned VSSRegs[] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
+static const unsigned GPRegs[] = {
+ PPC::R0, PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+
+static const unsigned GP0Regs[] = {
+ PPC::ZERO, PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+
+static const unsigned G8Regs[] = {
+ PPC::X0, PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+
+static const unsigned QFRegs[] = {
+ PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
+ PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
+ PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
+ PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
+ PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
+ PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
+ PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
+};
+
+template <std::size_t N>
+static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
+ const unsigned (&Regs)[N]) {
+ assert(RegNo < N && "Invalid register number");
+ Inst.addOperand(MCOperand::createReg(Regs[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRRegs);
+}
+
+static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRRegs);
+}
+
+static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRBITRegs);
+}
+
+static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, FRegs);
+}
+
+static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, FRegs);
+}
+
+static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VRegs);
+}
+
+static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSRegs);
+}
+
+static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSFRegs);
+}
+
+static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSSRegs);
+}
+
+static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, GPRegs);
+}
+
+static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, GP0Regs);
+}
+
+static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, G8Regs);
+}
+
+#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
+#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
+
+static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, QFRegs);
+}
+
+#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass
+#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass
+
+template<unsigned N>
+static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::createImm(Imm));
+ return MCDisassembler::Success;
+}
+
+template<unsigned N>
+static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // Decode the memri field (imm, reg), which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+
+ uint64_t Base = Imm >> 16;
+ uint64_t Disp = Imm & 0xFFFF;
+
+ assert(Base < 32 && "Invalid base register");
+
+ switch (Inst.getOpcode()) {
+ default: break;
+ case PPC::LBZU:
+ case PPC::LHAU:
+ case PPC::LHZU:
+ case PPC::LWZU:
+ case PPC::LFSU:
+ case PPC::LFDU:
+ // Add the tied output operand.
+ Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ break;
+ case PPC::STBU:
+ case PPC::STHU:
+ case PPC::STWU:
+ case PPC::STFSU:
+ case PPC::STFDU:
+ Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
+ break;
+ }
+
+ Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp)));
+ Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // Decode the memrix field (imm, reg), which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+
+ uint64_t Base = Imm >> 14;
+ uint64_t Disp = Imm & 0x3FFF;
+
+ assert(Base < 32 && "Invalid base register");
+
+ if (Inst.getOpcode() == PPC::LDU)
+ // Add the tied output operand.
+ Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ else if (Inst.getOpcode() == PPC::STDU)
+ Inst.insert(Inst.begin(), MCOperand::createReg(GP0Regs[Base]));
+
+ Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 2)));
+ Inst.addOperand(MCOperand::createReg(GP0Regs[Base]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // The cr bit encoding is 0x80 >> cr_reg_num.
+
+ unsigned Zeros = countTrailingZeros(Imm);
+ assert(Zeros < 8 && "Invalid CR bit value");
+
+ Inst.addOperand(MCOperand::createReg(CRRegs[7 - Zeros]));
+ return MCDisassembler::Success;
+}
+
+#include "PPCGenDisassemblerTables.inc"
+
+DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address, raw_ostream &OS,
+ raw_ostream &CS) const {
+ // Get the four bytes of the instruction.
+ Size = 4;
+ if (Bytes.size() < 4) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // Read the instruction in the proper endianness.
+ uint32_t Inst = IsLittleEndian ? support::endian::read32le(Bytes.data())
+ : support::endian::read32be(Bytes.data());
+
+ if (STI.getFeatureBits()[PPC::FeatureQPX]) {
+ DecodeStatus result =
+ decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
+ if (result != MCDisassembler::Fail)
+ return result;
+ }
+
+ return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
new file mode 100644
index 0000000..5e1d227
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -0,0 +1,450 @@
+//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstPrinter.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// FIXME: Once the integrated assembler supports full register names, tie this
+// to the verbose-asm setting.
+static cl::opt<bool>
+FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
+ cl::desc("Use full register names when printing assembly"));
+
+#define PRINT_ALIAS_INSTR
+#include "PPCGenAsmWriter.inc"
+
+void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ const char *RegName = getRegisterName(RegNo);
+ if (RegName[0] == 'q' /* QPX */) {
+ // The system toolchain on the BG/Q does not understand QPX register names
+ // in .cfi_* directives, so print the name of the floating-point
+ // subregister instead.
+ std::string RN(RegName);
+
+ RN[0] = 'f';
+ OS << RN;
+
+ return;
+ }
+
+ OS << RegName;
+}
+
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot, const MCSubtargetInfo &STI) {
+ // Check for slwi/srwi mnemonics.
+ if (MI->getOpcode() == PPC::RLWINM) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char MB = MI->getOperand(3).getImm();
+ unsigned char ME = MI->getOperand(4).getImm();
+ bool useSubstituteMnemonic = false;
+ if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+ O << "\tslwi "; useSubstituteMnemonic = true;
+ }
+ if (SH <= 31 && MB == (32-SH) && ME == 31) {
+ O << "\tsrwi "; useSubstituteMnemonic = true;
+ SH = 32-SH;
+ }
+ if (useSubstituteMnemonic) {
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+
+ printAnnotation(O, Annot);
+ return;
+ }
+ }
+
+ if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ O << "\tmr ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ if (MI->getOpcode() == PPC::RLDICR) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char ME = MI->getOperand(3).getImm();
+ // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+ if (63-SH == ME) {
+ O << "\tsldi ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+ printAnnotation(O, Annot);
+ return;
+ }
+ }
+
+ // dcbt[st] is printed manually here because:
+ // 1. The assembly syntax is different between embedded and server targets
+ // 2. We must print the short mnemonics for TH == 0 because the
+ // embedded/server syntax default will not be stable across assemblers
+ // The syntax for dcbt is:
+ // dcbt ra, rb, th [server]
+ // dcbt th, ra, rb [embedded]
+ // where th can be omitted when it is 0. dcbtst is the same.
+ if (MI->getOpcode() == PPC::DCBT || MI->getOpcode() == PPC::DCBTST) {
+ unsigned char TH = MI->getOperand(0).getImm();
+ O << "\tdcbt";
+ if (MI->getOpcode() == PPC::DCBTST)
+ O << "st";
+ if (TH == 16)
+ O << "t";
+ O << " ";
+
+ bool IsBookE = STI.getFeatureBits()[PPC::FeatureBookE];
+ if (IsBookE && TH != 0 && TH != 16)
+ O << (unsigned int) TH << ", ";
+
+ printOperand(MI, 1, O);
+ O << ", ";
+ printOperand(MI, 2, O);
+
+ if (!IsBookE && TH != 0 && TH != 16)
+ O << ", " << (unsigned int) TH;
+
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ // For fast-isel, a COPY_TO_REGCLASS may survive this long. This is
+ // used when converting a 32-bit float to a 64-bit float as part of
+ // conversion to an integer (see PPCFastISel.cpp:SelectFPToI()),
+ // as otherwise we have problems with incorrect register classes
+ // in machine instruction verification. For now, just avoid trying
+ // to print it as such an instruction has no effect (a 32-bit float
+ // in a register is already in 64-bit form, just with lower
+ // precision). FIXME: Is there a better solution?
+ if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS)
+ return;
+
+ if (!printAliasInstr(MI, O))
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+
+void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O,
+ const char *Modifier) {
+ unsigned Code = MI->getOperand(OpNo).getImm();
+
+ if (StringRef(Modifier) == "cc") {
+ switch ((PPC::Predicate)Code) {
+ case PPC::PRED_LT_MINUS:
+ case PPC::PRED_LT_PLUS:
+ case PPC::PRED_LT:
+ O << "lt";
+ return;
+ case PPC::PRED_LE_MINUS:
+ case PPC::PRED_LE_PLUS:
+ case PPC::PRED_LE:
+ O << "le";
+ return;
+ case PPC::PRED_EQ_MINUS:
+ case PPC::PRED_EQ_PLUS:
+ case PPC::PRED_EQ:
+ O << "eq";
+ return;
+ case PPC::PRED_GE_MINUS:
+ case PPC::PRED_GE_PLUS:
+ case PPC::PRED_GE:
+ O << "ge";
+ return;
+ case PPC::PRED_GT_MINUS:
+ case PPC::PRED_GT_PLUS:
+ case PPC::PRED_GT:
+ O << "gt";
+ return;
+ case PPC::PRED_NE_MINUS:
+ case PPC::PRED_NE_PLUS:
+ case PPC::PRED_NE:
+ O << "ne";
+ return;
+ case PPC::PRED_UN_MINUS:
+ case PPC::PRED_UN_PLUS:
+ case PPC::PRED_UN:
+ O << "un";
+ return;
+ case PPC::PRED_NU_MINUS:
+ case PPC::PRED_NU_PLUS:
+ case PPC::PRED_NU:
+ O << "nu";
+ return;
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
+ }
+ llvm_unreachable("Invalid predicate code");
+ }
+
+ if (StringRef(Modifier) == "pm") {
+ switch ((PPC::Predicate)Code) {
+ case PPC::PRED_LT:
+ case PPC::PRED_LE:
+ case PPC::PRED_EQ:
+ case PPC::PRED_GE:
+ case PPC::PRED_GT:
+ case PPC::PRED_NE:
+ case PPC::PRED_UN:
+ case PPC::PRED_NU:
+ return;
+ case PPC::PRED_LT_MINUS:
+ case PPC::PRED_LE_MINUS:
+ case PPC::PRED_EQ_MINUS:
+ case PPC::PRED_GE_MINUS:
+ case PPC::PRED_GT_MINUS:
+ case PPC::PRED_NE_MINUS:
+ case PPC::PRED_UN_MINUS:
+ case PPC::PRED_NU_MINUS:
+ O << "-";
+ return;
+ case PPC::PRED_LT_PLUS:
+ case PPC::PRED_LE_PLUS:
+ case PPC::PRED_EQ_PLUS:
+ case PPC::PRED_GE_PLUS:
+ case PPC::PRED_GT_PLUS:
+ case PPC::PRED_NE_PLUS:
+ case PPC::PRED_UN_PLUS:
+ case PPC::PRED_NU_PLUS:
+ O << "+";
+ return;
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
+ }
+ llvm_unreachable("Invalid predicate code");
+ }
+
+ assert(StringRef(Modifier) == "reg" &&
+ "Need to specify 'cc', 'pm' or 'reg' as predicate op modifier!");
+ printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 1 && "Invalid u1imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 3 && "Invalid u2imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 8 && "Invalid u3imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 15 && "Invalid u4imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int Value = MI->getOperand(OpNo).getImm();
+ Value = SignExtend32<5>(Value);
+ O << (int)Value;
+}
+
+void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 31 && "Invalid u5imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 63 && "Invalid u6imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned short Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 1023 && "Invalid u10imm argument!");
+ O << (unsigned short)Value;
+}
+
+void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned short Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 4095 && "Invalid u12imm argument!");
+ O << (unsigned short)Value;
+}
+
+void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ O << (short)MI->getOperand(OpNo).getImm();
+ else
+ printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+ else
+ printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (!MI->getOperand(OpNo).isImm())
+ return printOperand(MI, OpNo, O);
+
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print .+8, an eight byte displacement from the PC.
+ O << ".+";
+ printAbsBranchOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (!MI->getOperand(OpNo).isImm())
+ return printOperand(MI, OpNo, O);
+
+ O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
+}
+
+
+void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned CCReg = MI->getOperand(OpNo).getReg();
+ unsigned RegNo;
+ switch (CCReg) {
+ default: llvm_unreachable("Unknown CR register");
+ case PPC::CR0: RegNo = 0; break;
+ case PPC::CR1: RegNo = 1; break;
+ case PPC::CR2: RegNo = 2; break;
+ case PPC::CR3: RegNo = 3; break;
+ case PPC::CR4: RegNo = 4; break;
+ case PPC::CR5: RegNo = 5; break;
+ case PPC::CR6: RegNo = 6; break;
+ case PPC::CR7: RegNo = 7; break;
+ }
+ O << (0x80 >> RegNo);
+}
+
+void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printS16ImmOperand(MI, OpNo, O);
+ O << '(';
+ if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+}
+
+void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ if (MI->getOperand(OpNo).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo, O);
+ O << ", ";
+ printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
+ // come at the _end_ of the expression.
+ const MCOperand &Op = MI->getOperand(OpNo);
+ const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr());
+ O << refExp.getSymbol().getName();
+ O << '(';
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+ if (refExp.getKind() != MCSymbolRefExpr::VK_None)
+ O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind());
+}
+
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ if (FullRegNames)
+ return RegName;
+
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'q': // for QPX
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
+
+void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ const char *RegName = getRegisterName(Op.getReg());
+ // The linux and AIX assembler does not take register prefixes.
+ if (!isDarwinSyntax())
+ RegName = stripRegisterPrefix(RegName);
+
+ O << RegName;
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ Op.getExpr()->print(O, &MAI);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
new file mode 100644
index 0000000..53eb727
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -0,0 +1,71 @@
+//===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
+#define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class PPCInstPrinter : public MCInstPrinter {
+ bool IsDarwin;
+public:
+ PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, bool isDarwin)
+ : MCInstPrinter(MAI, MII, MRI), IsDarwin(isDarwin) {}
+
+ bool isDarwinSyntax() const {
+ return IsDarwin;
+ }
+
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx,
+ raw_ostream &OS);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier = nullptr);
+
+ void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
new file mode 100644
index 0000000..b6dd595
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -0,0 +1,240 @@
+//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ case FK_Data_8:
+ case PPC::fixup_ppc_nofixup:
+ return Value;
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_brcond14abs:
+ return Value & 0xfffc;
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ return Value & 0x3fffffc;
+ case PPC::fixup_ppc_half16:
+ return Value & 0xffff;
+ case PPC::fixup_ppc_half16ds:
+ return Value & 0xfffc;
+ }
+}
+
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ return 1;
+ case FK_Data_2:
+ case PPC::fixup_ppc_half16:
+ case PPC::fixup_ppc_half16ds:
+ return 2;
+ case FK_Data_4:
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_brcond14abs:
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ return 4;
+ case FK_Data_8:
+ return 8;
+ case PPC::fixup_ppc_nofixup:
+ return 0;
+ }
+}
+
+namespace {
+
+class PPCAsmBackend : public MCAsmBackend {
+ const Target &TheTarget;
+ bool IsLittleEndian;
+public:
+ PPCAsmBackend(const Target &T, bool isLittle) : MCAsmBackend(), TheTarget(T),
+ IsLittleEndian(isLittle) {}
+
+ unsigned getNumFixupKinds() const override {
+ return PPC::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+ const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = {
+ // name offset bits flags
+ { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_br24abs", 6, 24, 0 },
+ { "fixup_ppc_brcond14abs", 16, 14, 0 },
+ { "fixup_ppc_half16", 0, 16, 0 },
+ { "fixup_ppc_half16ds", 0, 14, 0 },
+ { "fixup_ppc_nofixup", 0, 0, 0 }
+ };
+ const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
+ // name offset bits flags
+ { "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_br24abs", 2, 24, 0 },
+ { "fixup_ppc_brcond14abs", 2, 14, 0 },
+ { "fixup_ppc_half16", 0, 16, 0 },
+ { "fixup_ppc_half16ds", 2, 14, 0 },
+ { "fixup_ppc_nofixup", 0, 0, 0 }
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return (IsLittleEndian? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
+ }
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value, bool IsPCRel) const override {
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+
+ // For each byte of the fragment that the fixup touches, mask in the bits
+ // from the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = IsLittleEndian ? i : (NumBytes - 1 - i);
+ Data[Offset + i] |= uint8_t((Value >> (Idx * 8)) & 0xff);
+ }
+ }
+
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ const MCValue &Target, uint64_t &Value,
+ bool &IsResolved) override {
+ switch ((PPC::Fixups)Fixup.getKind()) {
+ default: break;
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ // If the target symbol has a local entry point we must not attempt
+ // to resolve the fixup directly. Emit a relocation and leave
+ // resolution of the final target address to the linker.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ if (const auto *S = dyn_cast<MCSymbolELF>(&A->getSymbol())) {
+ // The "other" values are stored in the last 6 bits of the second
+ // byte. The traditional defines for STO values assume the full byte
+ // and thus the shift to pack it.
+ unsigned Other = S->getOther() << 2;
+ if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
+ IsResolved = false;
+ }
+ }
+ break;
+ }
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst) const override {
+ // FIXME.
+ return false;
+ }
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ // FIXME.
+ llvm_unreachable("relaxInstruction() unimplemented");
+ }
+
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
+ // FIXME.
+ llvm_unreachable("relaxInstruction() unimplemented");
+ }
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->write32(0x60000000);
+
+ OW->WriteZeros(Count % 4);
+
+ return true;
+ }
+
+ unsigned getPointerSize() const {
+ StringRef Name = TheTarget.getName();
+ if (Name == "ppc64" || Name == "ppc64le") return 8;
+ assert(Name == "ppc32" && "Unknown target name!");
+ return 4;
+ }
+
+ bool isLittleEndian() const {
+ return IsLittleEndian;
+ }
+};
+} // end anonymous namespace
+
+
+// FIXME: This should be in a separate file.
+namespace {
+ class DarwinPPCAsmBackend : public PPCAsmBackend {
+ public:
+ DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { }
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ bool is64 = getPointerSize() == 8;
+ return createPPCMachObjectWriter(
+ OS,
+ /*Is64Bit=*/is64,
+ (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
+ MachO::CPU_SUBTYPE_POWERPC_ALL);
+ }
+ };
+
+ class ELFPPCAsmBackend : public PPCAsmBackend {
+ uint8_t OSABI;
+ public:
+ ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) :
+ PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { }
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ bool is64 = getPointerSize() == 8;
+ return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI);
+ }
+ };
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU) {
+ if (TT.isOSDarwin())
+ return new DarwinPPCAsmBackend(T);
+
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+ bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
+ return new ELFPPCAsmBackend(T, IsLittleEndian, OSABI);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
new file mode 100644
index 0000000..dd99495
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -0,0 +1,425 @@
+//===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCExpr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+ class PPCELFObjectWriter : public MCELFObjectTargetWriter {
+ public:
+ PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI);
+
+ protected:
+ unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel) const override;
+
+ bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ unsigned Type) const override;
+ };
+}
+
+PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
+ : MCELFObjectTargetWriter(Is64Bit, OSABI,
+ Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC,
+ /*HasRelocationAddend*/ true) {}
+
+static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
+ const MCFixup &Fixup) {
+ const MCExpr *Expr = Fixup.getValue();
+
+ if (Expr->getKind() != MCExpr::Target)
+ return Target.getAccessVariant();
+
+ switch (cast<PPCMCExpr>(Expr)->getKind()) {
+ case PPCMCExpr::VK_PPC_None:
+ return MCSymbolRefExpr::VK_None;
+ case PPCMCExpr::VK_PPC_LO:
+ return MCSymbolRefExpr::VK_PPC_LO;
+ case PPCMCExpr::VK_PPC_HI:
+ return MCSymbolRefExpr::VK_PPC_HI;
+ case PPCMCExpr::VK_PPC_HA:
+ return MCSymbolRefExpr::VK_PPC_HA;
+ case PPCMCExpr::VK_PPC_HIGHERA:
+ return MCSymbolRefExpr::VK_PPC_HIGHERA;
+ case PPCMCExpr::VK_PPC_HIGHER:
+ return MCSymbolRefExpr::VK_PPC_HIGHER;
+ case PPCMCExpr::VK_PPC_HIGHEST:
+ return MCSymbolRefExpr::VK_PPC_HIGHEST;
+ case PPCMCExpr::VK_PPC_HIGHESTA:
+ return MCSymbolRefExpr::VK_PPC_HIGHESTA;
+ }
+ llvm_unreachable("unknown PPCMCExpr kind");
+}
+
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup);
+
+ // determine the type of the relocation
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_REL24;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_PPC_PLTREL24;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LOCAL:
+ Type = ELF::R_PPC_LOCAL24PC;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_brcond14abs:
+ Type = ELF::R_PPC_REL14;
+ break;
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_REL16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = ELF::R_PPC_REL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = ELF::R_PPC_REL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = ELF::R_PPC_REL16_HA;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_half16ds:
+ Target.print(errs());
+ errs() << '\n';
+ report_fatal_error("Invalid PC-relative half16ds relocation");
+ case FK_Data_4:
+ case FK_PCRel_4:
+ Type = ELF::R_PPC_REL32;
+ break;
+ case FK_Data_8:
+ case FK_PCRel_8:
+ Type = ELF::R_PPC64_REL64;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case PPC::fixup_ppc_br24abs:
+ Type = ELF::R_PPC_ADDR24;
+ break;
+ case PPC::fixup_ppc_brcond14abs:
+ Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_
+ break;
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_ADDR16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = ELF::R_PPC_ADDR16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = ELF::R_PPC_ADDR16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = ELF::R_PPC_ADDR16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHER:
+ Type = ELF::R_PPC64_ADDR16_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHERA:
+ Type = ELF::R_PPC64_ADDR16_HIGHERA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHEST:
+ Type = ELF::R_PPC64_ADDR16_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHESTA:
+ Type = ELF::R_PPC64_ADDR16_HIGHESTA;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_PPC_GOT16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_LO:
+ Type = ELF::R_PPC_GOT16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_HI:
+ Type = ELF::R_PPC_GOT16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_HA:
+ Type = ELF::R_PPC_GOT16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC:
+ Type = ELF::R_PPC64_TOC16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_LO:
+ Type = ELF::R_PPC64_TOC16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_HI:
+ Type = ELF::R_PPC64_TOC16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_HA:
+ Type = ELF::R_PPC64_TOC16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL:
+ Type = ELF::R_PPC_TPREL16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_LO:
+ Type = ELF::R_PPC_TPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HI:
+ Type = ELF::R_PPC_TPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HA:
+ Type = ELF::R_PPC_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHER:
+ Type = ELF::R_PPC64_TPREL16_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHERA:
+ Type = ELF::R_PPC64_TPREL16_HIGHERA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHEST:
+ Type = ELF::R_PPC64_TPREL16_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHESTA:
+ Type = ELF::R_PPC64_TPREL16_HIGHESTA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL:
+ Type = ELF::R_PPC64_DTPREL16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HI:
+ Type = ELF::R_PPC64_DTPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HA:
+ Type = ELF::R_PPC64_DTPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHER:
+ Type = ELF::R_PPC64_DTPREL16_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHERA:
+ Type = ELF::R_PPC64_DTPREL16_HIGHERA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHEST:
+ Type = ELF::R_PPC64_DTPREL16_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHESTA:
+ Type = ELF::R_PPC64_DTPREL16_HIGHESTA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD:
+ if (is64Bit())
+ Type = ELF::R_PPC64_GOT_TLSGD16;
+ else
+ Type = ELF::R_PPC_GOT_TLSGD16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO:
+ Type = ELF::R_PPC64_GOT_TLSGD16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HI:
+ Type = ELF::R_PPC64_GOT_TLSGD16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA:
+ Type = ELF::R_PPC64_GOT_TLSGD16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD:
+ if (is64Bit())
+ Type = ELF::R_PPC64_GOT_TLSLD16;
+ else
+ Type = ELF::R_PPC_GOT_TLSLD16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO:
+ Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HI:
+ Type = ELF::R_PPC64_GOT_TLSLD16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA:
+ Type = ELF::R_PPC64_GOT_TLSLD16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL:
+ /* We don't have R_PPC64_GOT_TPREL16, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_TPREL16_DS. */
+ Type = ELF::R_PPC64_GOT_TPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO:
+ /* We don't have R_PPC64_GOT_TPREL16_LO, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_TPREL16_LO_DS. */
+ Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HI:
+ Type = ELF::R_PPC64_GOT_TPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL:
+ /* We don't have R_PPC64_GOT_DTPREL16, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_DTPREL16_DS. */
+ Type = ELF::R_PPC64_GOT_DTPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO:
+ /* We don't have R_PPC64_GOT_DTPREL16_LO, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_DTPREL16_LO_DS. */
+ Type = ELF::R_PPC64_GOT_DTPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA:
+ Type = ELF::R_PPC64_GOT_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HI:
+ Type = ELF::R_PPC64_GOT_DTPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HA:
+ Type = ELF::R_PPC64_GOT_DTPREL16_HA;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_half16ds:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC64_ADDR16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = ELF::R_PPC64_ADDR16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_PPC64_GOT16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_LO:
+ Type = ELF::R_PPC64_GOT16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC:
+ Type = ELF::R_PPC64_TOC16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_LO:
+ Type = ELF::R_PPC64_TOC16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL:
+ Type = ELF::R_PPC64_TPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_LO:
+ Type = ELF::R_PPC64_TPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL:
+ Type = ELF::R_PPC64_DTPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL:
+ Type = ELF::R_PPC64_GOT_TPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO:
+ Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL:
+ Type = ELF::R_PPC64_GOT_DTPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO:
+ Type = ELF::R_PPC64_GOT_DTPREL16_LO_DS;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_nofixup:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TLSGD:
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLSGD;
+ else
+ Type = ELF::R_PPC_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TLSLD:
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLSLD;
+ else
+ Type = ELF::R_PPC_TLSLD;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TLS:
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLS;
+ else
+ Type = ELF::R_PPC_TLS;
+ break;
+ }
+ break;
+ case FK_Data_8:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TOCBASE:
+ Type = ELF::R_PPC64_TOC;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC64_ADDR64;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPMOD:
+ Type = ELF::R_PPC64_DTPMOD64;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL:
+ Type = ELF::R_PPC64_TPREL64;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL:
+ Type = ELF::R_PPC64_DTPREL64;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ Type = ELF::R_PPC_ADDR32;
+ break;
+ case FK_Data_2:
+ Type = ELF::R_PPC_ADDR16;
+ break;
+ }
+ }
+ return Type;
+}
+
+bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+ unsigned Type) const {
+ switch (Type) {
+ default:
+ return false;
+
+ case ELF::R_PPC_REL24:
+ // If the target symbol has a local entry point, we must keep the
+ // target symbol to preserve that information for the linker.
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = cast<MCSymbolELF>(Sym).getOther() << 2;
+ return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0;
+ }
+}
+
+MCObjectWriter *llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit,
+ bool IsLittleEndian,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI);
+ return createELFObjectWriter(MOTW, OS, IsLittleEndian);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
new file mode 100644
index 0000000..ae43e59d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -0,0 +1,56 @@
+//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+#undef PPC
+
+namespace llvm {
+namespace PPC {
+enum Fixups {
+ // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
+ // and 'bl'.
+ fixup_ppc_br24 = FirstTargetFixupKind,
+
+ /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
+ /// branches.
+ fixup_ppc_brcond14,
+
+ /// fixup_ppc_br24abs - 24-bit absolute relocation for direct branches
+ /// like 'ba' and 'bla'.
+ fixup_ppc_br24abs,
+
+ /// fixup_ppc_brcond14abs - 14-bit absolute relocation for conditional
+ /// branches.
+ fixup_ppc_brcond14abs,
+
+ /// fixup_ppc_half16 - A 16-bit fixup corresponding to lo16(_foo)
+ /// or ha16(_foo) for instrs like 'li' or 'addis'.
+ fixup_ppc_half16,
+
+ /// fixup_ppc_half16ds - A 14-bit fixup corresponding to lo16(_foo) with
+ /// implied 2 zero bits for instrs like 'std'.
+ fixup_ppc_half16ds,
+
+ /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
+ /// to __tls_get_addr for the TLS general and local dynamic models,
+ /// or inserts the thread-pointer register number.
+ fixup_ppc_nofixup,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
new file mode 100644
index 0000000..d8fab5b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -0,0 +1,83 @@
+//===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MCAsmInfoDarwin properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+void PPCMCAsmInfoDarwin::anchor() { }
+
+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
+ IsLittleEndian = false;
+
+ CommentString = ";";
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ if (!is64Bit)
+ Data64bitsDirective = nullptr; // We can't emit a 64-bit unit in PPC32 mode.
+
+ AssemblerDialect = 1; // New-Style mnemonics.
+ SupportsDebugInformation= true; // Debug information.
+
+ // The installed assembler for OSX < 10.6 lacks some directives.
+ // FIXME: this should really be a check on the assembler characteristics
+ // rather than OS version
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
+ HasWeakDefCanBeHiddenDirective = false;
+
+ UseIntegratedAssembler = true;
+}
+
+void PPCELFMCAsmInfo::anchor() { }
+
+PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
+ // FIXME: This is not always needed. For example, it is not needed in the
+ // v2 abi.
+ NeedsLocalForSize = true;
+
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
+ IsLittleEndian = T.getArch() == Triple::ppc64le;
+
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ CommentString = "#";
+
+ // Uses '.section' before '.bss' directive
+ UsesELFSectionDirectiveForBSS = true;
+
+ // Debug Information
+ SupportsDebugInformation = true;
+
+ DollarIsPC = true;
+
+ // Set up DWARF directives
+ MinInstAlignment = 4;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ ZeroDirective = "\t.space\t";
+ Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
+ AssemblerDialect = 1; // New-Style mnemonics.
+ LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
+
+ UseIntegratedAssembler = true;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
new file mode 100644
index 0000000..e252ac9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -0,0 +1,39 @@
+//===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCAsmInfoDarwin class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
+
+namespace llvm {
+class Triple;
+
+class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+
+public:
+ explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple &);
+};
+
+class PPCELFMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit PPCELFMCAsmInfo(bool is64Bit, const Triple &);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
new file mode 100644
index 0000000..b729156
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -0,0 +1,360 @@
+//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class PPCMCCodeEmitter : public MCCodeEmitter {
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
+ void operator=(const PPCMCCodeEmitter &) = delete;
+
+ const MCInstrInfo &MCII;
+ const MCContext &CTX;
+ bool IsLittleEndian;
+
+public:
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), CTX(ctx),
+ IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
+
+ ~PPCMCCodeEmitter() override {}
+
+ unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override {
+ // For fast-isel, a float COPY_TO_REGCLASS can survive this long.
+ // It's just a nop to keep the register classes happy, so don't
+ // generate anything.
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ if (Opcode == TargetOpcode::COPY_TO_REGCLASS)
+ return;
+
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+
+ // Output the constant in big/little endian byte order.
+ unsigned Size = Desc.getSize();
+ switch (Size) {
+ case 4:
+ if (IsLittleEndian) {
+ support::endian::Writer<support::little>(OS).write<uint32_t>(Bits);
+ } else {
+ support::endian::Writer<support::big>(OS).write<uint32_t>(Bits);
+ }
+ break;
+ case 8:
+ // If we emit a pair of instructions, the first one is
+ // always in the top 32 bits, even on little-endian.
+ if (IsLittleEndian) {
+ uint64_t Swapped = (Bits << 32) | (Bits >> 32);
+ support::endian::Writer<support::little>(OS).write<uint64_t>(Swapped);
+ } else {
+ support::endian::Writer<support::big>(OS).write<uint64_t>(Bits);
+ }
+ break;
+ default:
+ llvm_unreachable ("Invalid instruction size");
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+ }
+
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new PPCMCCodeEmitter(MCII, Ctx);
+}
+
+unsigned PPCMCCodeEmitter::
+getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_br24));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_brcond14));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::
+getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_br24abs));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::
+getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_brcond14abs));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the immediate field.
+ Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode (imm, reg) as a memri, which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 14;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16ds));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe8dis, which has the low 5-bits of (imm / 8)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 3;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
+unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe4dis, which has the low 5-bits of (imm / 4)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 2;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
+unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe2dis, which has the low 5-bits of (imm / 2)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 1;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
+unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the TLS register, which simply provides a relocation
+ // hint to the linker that this statement is part of a relocation sequence.
+ // Return the thread-pointer register's encoding.
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_nofixup));
+ const Triple &TT = STI.getTargetTriple();
+ bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le;
+ return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2);
+}
+
+unsigned PPCMCCodeEmitter::getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // For special TLS calls, we need two fixups; one for the branch target
+ // (__tls_get_addr), which we create via getDirectBrEncoding as usual,
+ // and one for the TLSGD or TLSLD symbol, which is emitted here.
+ const MCOperand &MO = MI.getOperand(OpNo+1);
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_nofixup));
+ return getDirectBrEncoding(MI, OpNo, Fixups, STI);
+}
+
+unsigned PPCMCCodeEmitter::
+get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 ||
+ MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+ return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+}
+
+
+unsigned PPCMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg()) {
+ // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // The GPR operand should come through here though.
+ assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
+ MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
+ MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+ return CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+ }
+
+ assert(MO.isImm() &&
+ "Relocation required in an instruction that we cannot encode!");
+ return MO.getImm();
+}
+
+
+#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
new file mode 100644
index 0000000..6b97d4c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -0,0 +1,150 @@
+//===-- PPCMCExpr.cpp - PPC specific MC expression classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCFixupKinds.h"
+#include "PPCMCExpr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppcmcexpr"
+
+const PPCMCExpr*
+PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin);
+}
+
+void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ if (isDarwinSyntax()) {
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO: OS << "lo16"; break;
+ case VK_PPC_HI: OS << "hi16"; break;
+ case VK_PPC_HA: OS << "ha16"; break;
+ }
+
+ OS << '(';
+ getSubExpr()->print(OS, MAI);
+ OS << ')';
+ } else {
+ getSubExpr()->print(OS, MAI);
+
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO: OS << "@l"; break;
+ case VK_PPC_HI: OS << "@h"; break;
+ case VK_PPC_HA: OS << "@ha"; break;
+ case VK_PPC_HIGHER: OS << "@higher"; break;
+ case VK_PPC_HIGHERA: OS << "@highera"; break;
+ case VK_PPC_HIGHEST: OS << "@highest"; break;
+ case VK_PPC_HIGHESTA: OS << "@highesta"; break;
+ }
+ }
+}
+
+bool
+PPCMCExpr::evaluateAsConstant(int64_t &Res) const {
+ MCValue Value;
+
+ if (!getSubExpr()->evaluateAsRelocatable(Value, nullptr, nullptr))
+ return false;
+
+ if (!Value.isAbsolute())
+ return false;
+
+ Res = evaluateAsInt64(Value.getConstant());
+ return true;
+}
+
+int64_t
+PPCMCExpr::evaluateAsInt64(int64_t Value) const {
+ switch (Kind) {
+ case VK_PPC_LO:
+ return Value & 0xffff;
+ case VK_PPC_HI:
+ return (Value >> 16) & 0xffff;
+ case VK_PPC_HA:
+ return ((Value + 0x8000) >> 16) & 0xffff;
+ case VK_PPC_HIGHER:
+ return (Value >> 32) & 0xffff;
+ case VK_PPC_HIGHERA:
+ return ((Value + 0x8000) >> 32) & 0xffff;
+ case VK_PPC_HIGHEST:
+ return (Value >> 48) & 0xffff;
+ case VK_PPC_HIGHESTA:
+ return ((Value + 0x8000) >> 48) & 0xffff;
+ case VK_PPC_None:
+ break;
+ }
+ llvm_unreachable("Invalid kind!");
+}
+
+bool
+PPCMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ MCValue Value;
+
+ if (!getSubExpr()->evaluateAsRelocatable(Value, Layout, Fixup))
+ return false;
+
+ if (Value.isAbsolute()) {
+ int64_t Result = evaluateAsInt64(Value.getConstant());
+ if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) &&
+ (Result >= 0x8000))
+ return false;
+ Res = MCValue::get(Result);
+ } else {
+ if (!Layout)
+ return false;
+
+ MCContext &Context = Layout->getAssembler().getContext();
+ const MCSymbolRefExpr *Sym = Value.getSymA();
+ MCSymbolRefExpr::VariantKind Modifier = Sym->getKind();
+ if (Modifier != MCSymbolRefExpr::VK_None)
+ return false;
+ switch (Kind) {
+ default:
+ llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO:
+ Modifier = MCSymbolRefExpr::VK_PPC_LO;
+ break;
+ case VK_PPC_HI:
+ Modifier = MCSymbolRefExpr::VK_PPC_HI;
+ break;
+ case VK_PPC_HA:
+ Modifier = MCSymbolRefExpr::VK_PPC_HA;
+ break;
+ case VK_PPC_HIGHERA:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHERA;
+ break;
+ case VK_PPC_HIGHER:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHER;
+ break;
+ case VK_PPC_HIGHEST:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHEST;
+ break;
+ case VK_PPC_HIGHESTA:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHESTA;
+ break;
+ }
+ Sym = MCSymbolRefExpr::create(&Sym->getSymbol(), Modifier, Context);
+ Res = MCValue::get(Sym, Value.getSymB(), Value.getConstant());
+ }
+
+ return true;
+}
+
+void PPCMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
new file mode 100644
index 0000000..d42a111
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -0,0 +1,100 @@
+//===-- PPCMCExpr.h - PPC specific MC expression classes --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H
+
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+
+namespace llvm {
+
+class PPCMCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_PPC_None,
+ VK_PPC_LO,
+ VK_PPC_HI,
+ VK_PPC_HA,
+ VK_PPC_HIGHER,
+ VK_PPC_HIGHERA,
+ VK_PPC_HIGHEST,
+ VK_PPC_HIGHESTA
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+ bool IsDarwin;
+
+ int64_t evaluateAsInt64(int64_t Value) const;
+
+ explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin)
+ : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx);
+
+ static const PPCMCExpr *createLo(const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return create(VK_PPC_LO, Expr, isDarwin, Ctx);
+ }
+
+ static const PPCMCExpr *createHi(const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return create(VK_PPC_HI, Expr, isDarwin, Ctx);
+ }
+
+ static const PPCMCExpr *createHa(const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return create(VK_PPC_HA, Expr, isDarwin, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// isDarwinSyntax - True if expression is to be printed using Darwin syntax.
+ bool isDarwinSyntax() const { return IsDarwin; }
+
+
+ /// @}
+
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
+ }
+
+ // There are no TLS PPCMCExprs at the moment.
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+
+ bool evaluateAsConstant(int64_t &Res) const;
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
new file mode 100644
index 0000000..30f232a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -0,0 +1,275 @@
+//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "InstPrinter/PPCInstPrinter.h"
+#include "PPCMCAsmInfo.h"
+#include "PPCTargetStreamer.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PPCGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PPCGenRegisterInfo.inc"
+
+// Pin the vtable to this file.
+PPCTargetStreamer::~PPCTargetStreamer() {}
+PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+
+static MCInstrInfo *createPPCMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitPPCMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createPPCMCRegisterInfo(const Triple &TT) {
+ bool isPPC64 =
+ (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
+ unsigned Flavour = isPPC64 ? 0 : 1;
+ unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR;
+
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitPPCMCRegisterInfo(X, RA, Flavour, Flavour);
+ return X;
+}
+
+static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT,
+ StringRef CPU, StringRef FS) {
+ return createPPCMCSubtargetInfoImpl(TT, CPU, FS);
+}
+
+static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TheTriple) {
+ bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
+ TheTriple.getArch() == Triple::ppc64le);
+
+ MCAsmInfo *MAI;
+ if (TheTriple.isOSDarwin())
+ MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
+ else
+ MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
+
+ // Initial state of the frame pointer is R1.
+ unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createDefCfa(nullptr, MRI.getDwarfRegNum(Reg, true), 0);
+ MAI->addInitialFrameState(Inst);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createPPCMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ if (RM == Reloc::Default) {
+ if (TT.isOSDarwin())
+ RM = Reloc::DynamicNoPIC;
+ else
+ RM = Reloc::Static;
+ }
+ if (CM == CodeModel::Default) {
+ if (!TT.isOSDarwin() &&
+ (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
+ CM = CodeModel::Medium;
+ }
+ X->initMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+namespace {
+class PPCTargetAsmStreamer : public PPCTargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
+ : PPCTargetStreamer(S), OS(OS) {}
+ void emitTCEntry(const MCSymbol &S) override {
+ OS << "\t.tc ";
+ OS << S.getName();
+ OS << "[TC],";
+ OS << S.getName();
+ OS << '\n';
+ }
+ void emitMachine(StringRef CPU) override {
+ OS << "\t.machine " << CPU << '\n';
+ }
+ void emitAbiVersion(int AbiVersion) override {
+ OS << "\t.abiversion " << AbiVersion << '\n';
+ }
+ void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
+ const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+
+ OS << "\t.localentry\t";
+ S->print(OS, MAI);
+ OS << ", ";
+ LocalOffset->print(OS, MAI);
+ OS << '\n';
+ }
+};
+
+class PPCTargetELFStreamer : public PPCTargetStreamer {
+public:
+ PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+ MCELFStreamer &getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+ }
+ void emitTCEntry(const MCSymbol &S) override {
+ // Creates a R_PPC64_TOC relocation
+ Streamer.EmitValueToAlignment(8);
+ Streamer.EmitSymbolValue(&S, 8);
+ }
+ void emitMachine(StringRef CPU) override {
+ // FIXME: Is there anything to do in here or does this directive only
+ // limit the parser?
+ }
+ void emitAbiVersion(int AbiVersion) override {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ Flags &= ~ELF::EF_PPC64_ABI;
+ Flags |= (AbiVersion & ELF::EF_PPC64_ABI);
+ MCA.setELFHeaderEFlags(Flags);
+ }
+ void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
+ MCAssembler &MCA = getStreamer().getAssembler();
+
+ int64_t Res;
+ if (!LocalOffset->evaluateAsAbsolute(Res, MCA))
+ report_fatal_error(".localentry expression must be absolute.");
+
+ unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
+ if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
+ report_fatal_error(".localentry expression cannot be encoded.");
+
+ unsigned Other = S->getOther();
+ Other &= ~ELF::STO_PPC64_LOCAL_MASK;
+ Other |= Encoded;
+ S->setOther(Other);
+
+ // For GAS compatibility, unless we already saw a .abiversion directive,
+ // set e_flags to indicate ELFv2 ABI.
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ if ((Flags & ELF::EF_PPC64_ABI) == 0)
+ MCA.setELFHeaderEFlags(Flags | 2);
+ }
+ void emitAssignment(MCSymbol *S, const MCExpr *Value) override {
+ auto *Symbol = cast<MCSymbolELF>(S);
+ // When encoding an assignment to set symbol A to symbol B, also copy
+ // the st_other bits encoding the local entry point offset.
+ if (Value->getKind() != MCExpr::SymbolRef)
+ return;
+ const auto &RhsSym = cast<MCSymbolELF>(
+ static_cast<const MCSymbolRefExpr *>(Value)->getSymbol());
+ unsigned Other = Symbol->getOther();
+ Other &= ~ELF::STO_PPC64_LOCAL_MASK;
+ Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK;
+ Symbol->setOther(Other);
+ }
+};
+
+class PPCTargetMachOStreamer : public PPCTargetStreamer {
+public:
+ PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+ void emitTCEntry(const MCSymbol &S) override {
+ llvm_unreachable("Unknown pseudo-op: .tc");
+ }
+ void emitMachine(StringRef CPU) override {
+ // FIXME: We should update the CPUType, CPUSubType in the Object file if
+ // the new values are different from the defaults.
+ }
+ void emitAbiVersion(int AbiVersion) override {
+ llvm_unreachable("Unknown pseudo-op: .abiversion");
+ }
+ void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
+ llvm_unreachable("Unknown pseudo-op: .localentry");
+ }
+};
+}
+
+static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new PPCTargetAsmStreamer(S, OS);
+}
+
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ const Triple &TT = STI.getTargetTriple();
+ if (TT.isOSBinFormatELF())
+ return new PPCTargetELFStreamer(S);
+ return new PPCTargetMachOStreamer(S);
+}
+
+static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
+ return new PPCInstPrinter(MAI, MII, MRI, T.isOSDarwin());
+}
+
+extern "C" void LLVMInitializePowerPCTargetMC() {
+ for (Target *T : {&ThePPC32Target, &ThePPC64Target, &ThePPC64LETarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createPPCMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createPPCMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createPPCMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(*T, createPPCAsmBackend);
+
+ // Register the object target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createObjectTargetStreamer);
+
+ // Register the asm target streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createPPCMCInstPrinter);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
new file mode 100644
index 0000000..77fe458
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -0,0 +1,104 @@
+//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+class Triple;
+class StringRef;
+class raw_pwrite_stream;
+class raw_ostream;
+
+extern Target ThePPC32Target;
+extern Target ThePPC64Target;
+extern Target ThePPC64LETarget;
+
+MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
+
+MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ const Triple &TT, StringRef CPU);
+
+/// Construct an PPC ELF object writer.
+MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
+ bool IsLittleEndian, uint8_t OSABI);
+/// Construct a PPC Mach-O object writer.
+MCObjectWriter *createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
+/// Returns true iff Val consists of one contiguous run of 1s with any number of
+/// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so
+/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not,
+/// since all 1s are not contiguous.
+static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (!Val)
+ return false;
+
+ if (isShiftedMask_32(Val)) {
+ // look for the first non-zero bit
+ MB = countLeadingZeros(Val);
+ // look for the first zero bit after the run of ones
+ ME = countLeadingZeros((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_32(Val)) {
+ // effectively look for the first zero bit
+ ME = countLeadingZeros(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
+} // End llvm namespace
+
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
+// Defines symbolic names for PowerPC registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "PPCGenRegisterInfo.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PPCGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
new file mode 100644
index 0000000..b54a0e1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -0,0 +1,383 @@
+//===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MachO.h"
+
+using namespace llvm;
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+ bool recordScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size, uint64_t &FixedValue);
+
+ void RecordPPCRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+
+public:
+ PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
+
+ void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+ const MCAsmLayout &Layout, const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) override {
+ if (Writer->is64Bit()) {
+ report_fatal_error("Relocation emission for MachO/PPC64 unimplemented.");
+ } else
+ RecordPPCRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ }
+};
+}
+
+/// computes the log2 of the size of the relocation,
+/// used for relocation_info::r_length.
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default:
+ report_fatal_error("log2size(FixupKind): Unhandled fixup kind!");
+ case FK_PCRel_1:
+ case FK_Data_1:
+ return 0;
+ case FK_PCRel_2:
+ case FK_Data_2:
+ return 1;
+ case FK_PCRel_4:
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_half16:
+ case PPC::fixup_ppc_br24:
+ case FK_Data_4:
+ return 2;
+ case FK_PCRel_8:
+ case FK_Data_8:
+ return 3;
+ }
+ return 0;
+}
+
+/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum.
+/// Outline based on PPCELFObjectWriter::GetRelocType().
+static unsigned getRelocType(const MCValue &Target,
+ const MCFixupKind FixupKind, // from
+ // Fixup.getKind()
+ const bool IsPCRel) {
+ const MCSymbolRefExpr::VariantKind Modifier =
+ Target.isAbsolute() ? MCSymbolRefExpr::VK_None
+ : Target.getSymA()->getKind();
+ // determine the type of the relocation
+ unsigned Type = MachO::GENERIC_RELOC_VANILLA;
+ if (IsPCRel) { // relative to PC
+ switch ((unsigned)FixupKind) {
+ default:
+ report_fatal_error("Unimplemented fixup kind (relative)");
+ case PPC::fixup_ppc_br24:
+ Type = MachO::PPC_RELOC_BR24; // R_PPC_REL24
+ break;
+ case PPC::fixup_ppc_brcond14:
+ Type = MachO::PPC_RELOC_BR14;
+ break;
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported modifier for half16 fixup");
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = MachO::PPC_RELOC_HA16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = MachO::PPC_RELOC_LO16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = MachO::PPC_RELOC_HI16;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch ((unsigned)FixupKind) {
+ default:
+ report_fatal_error("Unimplemented fixup kind (absolute)!");
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported modifier for half16 fixup");
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = MachO::PPC_RELOC_HA16_SECTDIFF;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = MachO::PPC_RELOC_LO16_SECTDIFF;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = MachO::PPC_RELOC_HI16_SECTDIFF;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ break;
+ case FK_Data_2:
+ break;
+ }
+ }
+ return Type;
+}
+
+static void makeRelocationInfo(MachO::any_relocation_info &MRE,
+ const uint32_t FixupOffset, const uint32_t Index,
+ const unsigned IsPCRel, const unsigned Log2Size,
+ const unsigned IsExtern, const unsigned Type) {
+ MRE.r_word0 = FixupOffset;
+ // The bitfield offsets that work (as determined by trial-and-error)
+ // are different than what is documented in the mach-o manuals.
+ // This appears to be an endianness issue; reversing the order of the
+ // documented bitfields in <llvm/Support/MachO.h> fixes this (but
+ // breaks x86/ARM assembly).
+ MRE.r_word1 = ((Index << 8) | // was << 0
+ (IsPCRel << 7) | // was << 24
+ (Log2Size << 5) | // was << 25
+ (IsExtern << 4) | // was << 27
+ (Type << 0)); // was << 28
+}
+
+static void
+makeScatteredRelocationInfo(MachO::any_relocation_info &MRE,
+ const uint32_t Addr, const unsigned Type,
+ const unsigned Log2Size, const unsigned IsPCRel,
+ const uint32_t Value2) {
+ // For notes on bitfield positions and endianness, see:
+ // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry
+ MRE.r_word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) |
+ (IsPCRel << 30) | MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
+}
+
+/// Compute fixup offset (address).
+static uint32_t getFixupOffset(const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ // On Mach-O, ppc_fixup_half16 relocations must refer to the
+ // start of the instruction, not the second halfword, as ELF does
+ if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16)
+ FixupOffset &= ~uint32_t(3);
+ return FixupOffset;
+}
+
+/// \return false if falling back to using non-scattered relocation,
+/// otherwise true for normal scattered relocation.
+/// based on X86MachObjectWriter::recordScatteredRelocation
+/// and ARMMachObjectWriter::recordScatteredRelocation
+bool PPCMachObjectWriter::recordScatteredRelocation(
+ MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size, uint64_t &FixedValue) {
+ // caller already computes these, can we just pass and reuse?
+ const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
+ const MCFixupKind FK = Fixup.getKind();
+ const unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
+ const unsigned Type = getRelocType(Target, FK, IsPCRel);
+
+ // Is this a local or SECTDIFF relocation entry?
+ // SECTDIFF relocation entries have symbol subtractions,
+ // and require two entries, the first for the add-symbol value,
+ // the second for the subtract-symbol value.
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+
+ if (!A->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(*A, Layout);
+ uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ const MCSymbol *SB = &B->getSymbol();
+
+ if (!SB->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // FIXME: is Type correct? see include/llvm/Support/MachO.h
+ Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
+ FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
+ }
+ // FIXME: does FixedValue get used??
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == MachO::PPC_RELOC_SECTDIFF ||
+ Type == MachO::PPC_RELOC_HI16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LO16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_HA16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LO14_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LOCAL_SECTDIFF) {
+ // X86 had this piece, but ARM does not
+ // If the offset is too large to fit in a scattered relocation,
+ // we're hosed. It's an unfortunate limitation of the MachO format.
+ if (FixupOffset > 0xffffff) {
+ char Buffer[32];
+ format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
+ Asm.getContext().reportError(Fixup.getLoc(),
+ Twine("Section too large, can't encode "
+ "r_address (") +
+ Buffer + ") into 24 bits of scattered "
+ "relocation entry.");
+ return false;
+ }
+
+ // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()?
+ // see PPCMCExpr::evaluateAsRelocatableImpl()
+ uint32_t other_half = 0;
+ switch (Type) {
+ case MachO::PPC_RELOC_LO16_SECTDIFF:
+ other_half = (FixedValue >> 16) & 0xffff;
+ // applyFixupOffset longer extracts the high part because it now assumes
+ // this was already done.
+ // It looks like this is not true for the FixedValue needed with Mach-O
+ // relocs.
+ // So we need to adjust FixedValue again here.
+ FixedValue &= 0xffff;
+ break;
+ case MachO::PPC_RELOC_HA16_SECTDIFF:
+ other_half = FixedValue & 0xffff;
+ FixedValue =
+ ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff;
+ break;
+ case MachO::PPC_RELOC_HI16_SECTDIFF:
+ other_half = FixedValue & 0xffff;
+ FixedValue = (FixedValue >> 16) & 0xffff;
+ break;
+ default:
+ llvm_unreachable("Invalid PPC scattered relocation type.");
+ break;
+ }
+
+ MachO::any_relocation_info MRE;
+ makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
+ Log2Size, IsPCRel, Value2);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+ } else {
+ // If the offset is more than 24-bits, it won't fit in a scattered
+ // relocation offset field, so we fall back to using a non-scattered
+ // relocation. This is a bit risky, as if the offset reaches out of
+ // the block and the linker is doing scattered loading on this
+ // symbol, things can go badly.
+ //
+ // Required for 'as' compatibility.
+ if (FixupOffset > 0xffffff)
+ return false;
+ }
+ MachO::any_relocation_info MRE;
+ makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
+ Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
+ return true;
+}
+
+// see PPCELFObjectWriter for a general outline of cases
+void PPCMachObjectWriter::RecordPPCRelocation(
+ MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ const MCFixupKind FK = Fixup.getKind(); // unsigned
+ const unsigned Log2Size = getFixupKindLog2Size(FK);
+ const bool IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
+ const unsigned RelocType = getRelocType(Target, FK, IsPCRel);
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB() &&
+ // Q: are branch targets ever scattered?
+ RelocType != MachO::PPC_RELOC_BR24 &&
+ RelocType != MachO::PPC_RELOC_BR14) {
+ recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ Log2Size, FixedValue);
+ return;
+ }
+
+ // this doesn't seem right for RIT_PPC_BR24
+ // Get the symbol data, if any.
+ const MCSymbol *A = nullptr;
+ if (Target.getSymA())
+ A = &Target.getSymA()->getSymbol();
+
+ // See <reloc.h>.
+ const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
+ unsigned Index = 0;
+ unsigned Type = RelocType;
+
+ const MCSymbol *RelSymbol = nullptr;
+ if (Target.isAbsolute()) { // constant
+ // SymbolNum of 0 indicates the absolute section.
+ //
+ // FIXME: Currently, these are never generated (see code below). I cannot
+ // find a case where they are actually emitted.
+ report_fatal_error("FIXME: relocations to absolute targets "
+ "not yet implemented");
+ // the above line stolen from ARM, not sure
+ } else {
+ // Resolve constant variables.
+ if (A->isVariable()) {
+ int64_t Res;
+ if (A->getVariableValue()->evaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (Writer->doesSymbolRequireExternRelocation(*A)) {
+ RelSymbol = A;
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!A->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(*A);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSection &Sec = A->getSection();
+ Index = Sec.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&Sec);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+ }
+
+ // struct relocation_info (8 bytes)
+ MachO::any_relocation_info MRE;
+ makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, false, Type);
+ Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(
+ new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS,
+ /*IsLittleEndian=*/false);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
new file mode 100644
index 0000000..c2987b6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -0,0 +1,86 @@
+//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCPredicates.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+using namespace llvm;
+
+PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
+ switch (Opcode) {
+ case PPC::PRED_EQ: return PPC::PRED_NE;
+ case PPC::PRED_NE: return PPC::PRED_EQ;
+ case PPC::PRED_LT: return PPC::PRED_GE;
+ case PPC::PRED_GE: return PPC::PRED_LT;
+ case PPC::PRED_GT: return PPC::PRED_LE;
+ case PPC::PRED_LE: return PPC::PRED_GT;
+ case PPC::PRED_NU: return PPC::PRED_UN;
+ case PPC::PRED_UN: return PPC::PRED_NU;
+ case PPC::PRED_EQ_MINUS: return PPC::PRED_NE_PLUS;
+ case PPC::PRED_NE_MINUS: return PPC::PRED_EQ_PLUS;
+ case PPC::PRED_LT_MINUS: return PPC::PRED_GE_PLUS;
+ case PPC::PRED_GE_MINUS: return PPC::PRED_LT_PLUS;
+ case PPC::PRED_GT_MINUS: return PPC::PRED_LE_PLUS;
+ case PPC::PRED_LE_MINUS: return PPC::PRED_GT_PLUS;
+ case PPC::PRED_NU_MINUS: return PPC::PRED_UN_PLUS;
+ case PPC::PRED_UN_MINUS: return PPC::PRED_NU_PLUS;
+ case PPC::PRED_EQ_PLUS: return PPC::PRED_NE_MINUS;
+ case PPC::PRED_NE_PLUS: return PPC::PRED_EQ_MINUS;
+ case PPC::PRED_LT_PLUS: return PPC::PRED_GE_MINUS;
+ case PPC::PRED_GE_PLUS: return PPC::PRED_LT_MINUS;
+ case PPC::PRED_GT_PLUS: return PPC::PRED_LE_MINUS;
+ case PPC::PRED_LE_PLUS: return PPC::PRED_GT_MINUS;
+ case PPC::PRED_NU_PLUS: return PPC::PRED_UN_MINUS;
+ case PPC::PRED_UN_PLUS: return PPC::PRED_NU_MINUS;
+
+ // Simple predicates for single condition-register bits.
+ case PPC::PRED_BIT_SET: return PPC::PRED_BIT_UNSET;
+ case PPC::PRED_BIT_UNSET: return PPC::PRED_BIT_SET;
+ }
+ llvm_unreachable("Unknown PPC branch opcode!");
+}
+
+PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) {
+ switch (Opcode) {
+ case PPC::PRED_EQ: return PPC::PRED_EQ;
+ case PPC::PRED_NE: return PPC::PRED_NE;
+ case PPC::PRED_LT: return PPC::PRED_GT;
+ case PPC::PRED_GE: return PPC::PRED_LE;
+ case PPC::PRED_GT: return PPC::PRED_LT;
+ case PPC::PRED_LE: return PPC::PRED_GE;
+ case PPC::PRED_NU: return PPC::PRED_NU;
+ case PPC::PRED_UN: return PPC::PRED_UN;
+ case PPC::PRED_EQ_MINUS: return PPC::PRED_EQ_MINUS;
+ case PPC::PRED_NE_MINUS: return PPC::PRED_NE_MINUS;
+ case PPC::PRED_LT_MINUS: return PPC::PRED_GT_MINUS;
+ case PPC::PRED_GE_MINUS: return PPC::PRED_LE_MINUS;
+ case PPC::PRED_GT_MINUS: return PPC::PRED_LT_MINUS;
+ case PPC::PRED_LE_MINUS: return PPC::PRED_GE_MINUS;
+ case PPC::PRED_NU_MINUS: return PPC::PRED_NU_MINUS;
+ case PPC::PRED_UN_MINUS: return PPC::PRED_UN_MINUS;
+ case PPC::PRED_EQ_PLUS: return PPC::PRED_EQ_PLUS;
+ case PPC::PRED_NE_PLUS: return PPC::PRED_NE_PLUS;
+ case PPC::PRED_LT_PLUS: return PPC::PRED_GT_PLUS;
+ case PPC::PRED_GE_PLUS: return PPC::PRED_LE_PLUS;
+ case PPC::PRED_GT_PLUS: return PPC::PRED_LT_PLUS;
+ case PPC::PRED_LE_PLUS: return PPC::PRED_GE_PLUS;
+ case PPC::PRED_NU_PLUS: return PPC::PRED_NU_PLUS;
+ case PPC::PRED_UN_PLUS: return PPC::PRED_UN_PLUS;
+
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
+ }
+ llvm_unreachable("Unknown PPC branch opcode!");
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
new file mode 100644
index 0000000..acea600
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -0,0 +1,76 @@
+//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
+namespace llvm {
+namespace PPC {
+ /// Predicate - These are "(BI << 5) | BO" for various predicates.
+ enum Predicate {
+ PRED_LT = (0 << 5) | 12,
+ PRED_LE = (1 << 5) | 4,
+ PRED_EQ = (2 << 5) | 12,
+ PRED_GE = (0 << 5) | 4,
+ PRED_GT = (1 << 5) | 12,
+ PRED_NE = (2 << 5) | 4,
+ PRED_UN = (3 << 5) | 12,
+ PRED_NU = (3 << 5) | 4,
+ PRED_LT_MINUS = (0 << 5) | 14,
+ PRED_LE_MINUS = (1 << 5) | 6,
+ PRED_EQ_MINUS = (2 << 5) | 14,
+ PRED_GE_MINUS = (0 << 5) | 6,
+ PRED_GT_MINUS = (1 << 5) | 14,
+ PRED_NE_MINUS = (2 << 5) | 6,
+ PRED_UN_MINUS = (3 << 5) | 14,
+ PRED_NU_MINUS = (3 << 5) | 6,
+ PRED_LT_PLUS = (0 << 5) | 15,
+ PRED_LE_PLUS = (1 << 5) | 7,
+ PRED_EQ_PLUS = (2 << 5) | 15,
+ PRED_GE_PLUS = (0 << 5) | 7,
+ PRED_GT_PLUS = (1 << 5) | 15,
+ PRED_NE_PLUS = (2 << 5) | 7,
+ PRED_UN_PLUS = (3 << 5) | 15,
+ PRED_NU_PLUS = (3 << 5) | 7,
+
+ // When dealing with individual condition-register bits, we have simple set
+ // and unset predicates.
+ PRED_BIT_SET = 1024,
+ PRED_BIT_UNSET = 1025
+ };
+
+ // Bit for branch taken (plus) or not-taken (minus) hint
+ enum BranchHintBit {
+ BR_NO_HINT = 0x0,
+ BR_NONTAKEN_HINT = 0x2,
+ BR_TAKEN_HINT = 0x3,
+ BR_HINT_MASK = 0X3
+ };
+
+ /// Invert the specified predicate. != -> ==, < -> >=.
+ Predicate InvertPredicate(Predicate Opcode);
+
+ /// Assume the condition register is set by MI(a,b), return the predicate if
+ /// we modify the instructions such that condition register is set by MI(b,a).
+ Predicate getSwappedPredicate(Predicate Opcode);
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
new file mode 100644
index 0000000..a259ed3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -0,0 +1,106 @@
+//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PowerPC back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPC_H
+#define LLVM_LIB_TARGET_POWERPC_PPC_H
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include <string>
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+ class PPCTargetMachine;
+ class PassRegistry;
+ class FunctionPass;
+ class ImmutablePass;
+ class MachineInstr;
+ class AsmPrinter;
+ class MCInst;
+
+ FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM);
+#ifndef NDEBUG
+ FunctionPass *createPPCCTRLoopsVerify();
+#endif
+ FunctionPass *createPPCLoopDataPrefetchPass();
+ FunctionPass *createPPCLoopPreIncPrepPass(PPCTargetMachine &TM);
+ FunctionPass *createPPCTOCRegDepsPass();
+ FunctionPass *createPPCEarlyReturnPass();
+ FunctionPass *createPPCVSXCopyPass();
+ FunctionPass *createPPCVSXFMAMutatePass();
+ FunctionPass *createPPCVSXSwapRemovalPass();
+ FunctionPass *createPPCMIPeepholePass();
+ FunctionPass *createPPCBranchSelectionPass();
+ FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+ FunctionPass *createPPCTLSDynamicCallPass();
+ FunctionPass *createPPCBoolRetToIntPass();
+ void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP, bool isDarwin);
+
+ void initializePPCVSXFMAMutatePass(PassRegistry&);
+ void initializePPCBoolRetToIntPass(PassRegistry&);
+ extern char &PPCVSXFMAMutateID;
+
+ namespace PPCII {
+
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // PPC Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ /// MO_PLT_OR_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" or "FOO@plt" symbol. This is
+ /// used for calls and jumps to external functions on Tiger and earlier, and
+ /// for PIC calls on Linux and ELF systems.
+ MO_PLT_OR_STUB = 1,
+
+ /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
+ /// the function's picbase, e.g. lo16(symbol-picbase).
+ MO_PIC_FLAG = 2,
+
+ /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
+ /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
+ MO_NLP_FLAG = 4,
+
+ /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
+ /// symbol with hidden visibility. This causes a different kind of
+ /// non-lazy-pointer to be generated.
+ MO_NLP_HIDDEN_FLAG = 8,
+
+ /// The next are not flags but distinct values.
+ MO_ACCESS_MASK = 0xf0,
+
+ /// MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
+ MO_LO = 1 << 4,
+ MO_HA = 2 << 4,
+
+ MO_TPREL_LO = 4 << 4,
+ MO_TPREL_HA = 3 << 4,
+
+ /// These values identify relocations on immediates folded
+ /// into memory operations.
+ MO_DTPREL_LO = 5 << 4,
+ MO_TLSLD_LO = 6 << 4,
+ MO_TOC_LO = 7 << 4,
+
+ // Symbol for VK_PPC_TLS fixup attached to an ADD instruction
+ MO_TLS = 8 << 4
+ };
+ } // end namespace PPCII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
new file mode 100644
index 0000000..b03be12
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -0,0 +1,424 @@
+//===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC Subtarget features.
+//
+
+//===----------------------------------------------------------------------===//
+// CPU Directives //
+//===----------------------------------------------------------------------===//
+
+def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">;
+def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
+def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
+def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
+def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
+ "PPC::DIR_E500mc", "">;
+def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
+ "PPC::DIR_E5500", "">;
+def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
+def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
+def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
+def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
+def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
+def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
+def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
+
+def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
+ "Enable 64-bit instructions">;
+def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+ "Use software emulation for floating point">;
+def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
+ "Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true",
+ "Use condition-register bits individually">;
+def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
+ "Enable Altivec instructions">;
+def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true",
+ "Enable SPE instructions">;
+def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
+ "Enable the MFOCRF instruction">;
+def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
+ "Enable the fsqrt instruction">;
+def FeatureFCPSGN : SubtargetFeature<"fcpsgn", "HasFCPSGN", "true",
+ "Enable the fcpsgn instruction">;
+def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true",
+ "Enable the fre instruction">;
+def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true",
+ "Enable the fres instruction">;
+def FeatureFRSQRTE : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true",
+ "Enable the frsqrte instruction">;
+def FeatureFRSQRTES : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true",
+ "Enable the frsqrtes instruction">;
+def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true",
+ "Assume higher precision reciprocal estimates">;
+def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
+ "Enable the stfiwx instruction">;
+def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
+ "Enable the lfiwax instruction">;
+def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true",
+ "Enable the fri[mnpz] instructions">;
+def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true",
+ "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">;
+def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
+ "Enable the isel instruction">;
+def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
+ "Enable the popcnt[dw] instructions">;
+def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true",
+ "Enable the bpermd instruction">;
+def FeatureExtDiv : SubtargetFeature<"extdiv", "HasExtDiv", "true",
+ "Enable extended divide instructions">;
+def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
+ "Enable the ldbrx instruction">;
+def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true",
+ "Enable the cmpb instruction">;
+def FeatureICBT : SubtargetFeature<"icbt","HasICBT", "true",
+ "Enable icbt instruction">;
+def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
+ "Enable Book E instructions",
+ [FeatureICBT]>;
+def FeatureMSYNC : SubtargetFeature<"msync", "HasOnlyMSYNC", "true",
+ "Has only the msync instruction instead of sync",
+ [FeatureBookE]>;
+def FeatureE500 : SubtargetFeature<"e500", "IsE500", "true",
+ "Enable E500/E500mc instructions">;
+def FeaturePPC4xx : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true",
+ "Enable PPC 4xx instructions">;
+def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
+ "Enable PPC 6xx instructions">;
+def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
+ "Enable QPX instructions">;
+def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
+ "Enable VSX instructions",
+ [FeatureAltivec]>;
+def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
+ "Enable POWER8 Altivec instructions",
+ [FeatureAltivec]>;
+def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
+ "Enable POWER8 Crypto instructions",
+ [FeatureP8Altivec]>;
+def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
+ "Enable POWER8 vector instructions",
+ [FeatureVSX, FeatureP8Altivec]>;
+def FeatureDirectMove :
+ SubtargetFeature<"direct-move", "HasDirectMove", "true",
+ "Enable Power8 direct move instructions",
+ [FeatureVSX]>;
+def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
+ "HasPartwordAtomics", "true",
+ "Enable l[bh]arx and st[bh]cx.">;
+def FeatureInvariantFunctionDescriptors :
+ SubtargetFeature<"invariant-function-descriptors",
+ "HasInvariantFunctionDescriptors", "true",
+ "Assume function descriptors are invariant">;
+def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
+ "Enable Hardware Transactional Memory instructions">;
+def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
+ "Implement mftb using the mfspr instruction">;
+def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
+ "Target supports add/load integer fusion.">;
+def FeatureFloat128 :
+ SubtargetFeature<"float128", "HasFloat128", "true",
+ "Enable the __float128 data type for IEEE-754R Binary128.",
+ [FeatureVSX]>;
+
+def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
+ "Treat vector data stream cache control instructions as deprecated">;
+
+/* Since new processors generally contain a superset of features of those that
+ came before them, the idea is to make implementations of new processors
+ less error prone and easier to read.
+ Namely:
+ list<SubtargetFeature> Power8FeatureList = ...
+ list<SubtargetFeature> FutureProcessorSpecificFeatureList =
+ [ features that Power8 does not support ]
+ list<SubtargetFeature> FutureProcessorFeatureList =
+ !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
+
+ Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
+ well as providing a single point of definition if the feature set will be
+ used elsewhere.
+*/
+def ProcessorFeatures {
+ list<SubtargetFeature> Power7FeatureList =
+ [DirectivePwr7, FeatureAltivec, FeatureVSX,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */,
+ FeatureBPERMD, FeatureExtDiv,
+ FeatureMFTB, DeprecatedDST];
+ list<SubtargetFeature> Power8SpecificFeatures =
+ [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
+ FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic,
+ FeatureFusion];
+ list<SubtargetFeature> Power8FeatureList =
+ !listconcat(Power7FeatureList, Power8SpecificFeatures);
+}
+
+// Note: Future features to add when support is extended to more
+// recent ISA levels:
+//
+// DFP p6, p6x, p7 decimal floating-point instructions
+// POPCNTB p5 through p7 popcntb and related instructions
+
+//===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+// RecFormRel - Filter class used to relate non-record-form instructions with
+// their record-form variants.
+class RecFormRel;
+
+// AltVSXFMARel - Filter class used to relate the primary addend-killing VSX
+// FMA instruction forms with their corresponding factor-killing forms.
+class AltVSXFMARel {
+ bit IsVSXFMAAlt = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Relation Map Definitions.
+//===----------------------------------------------------------------------===//
+
+def getRecordFormOpcode : InstrMapping {
+ let FilterClass = "RecFormRel";
+ // Instructions with the same BaseName and Interpretation64Bit values
+ // form a row.
+ let RowFields = ["BaseName", "Interpretation64Bit"];
+ // Instructions with the same RC value form a column.
+ let ColFields = ["RC"];
+ // The key column are the non-record-form instructions.
+ let KeyCol = ["0"];
+ // Value columns RC=1
+ let ValueCols = [["1"]];
+}
+
+def getNonRecordFormOpcode : InstrMapping {
+ let FilterClass = "RecFormRel";
+ // Instructions with the same BaseName and Interpretation64Bit values
+ // form a row.
+ let RowFields = ["BaseName", "Interpretation64Bit"];
+ // Instructions with the same RC value form a column.
+ let ColFields = ["RC"];
+ // The key column are the record-form instructions.
+ let KeyCol = ["1"];
+ // Value columns are RC=0
+ let ValueCols = [["0"]];
+}
+
+def getAltVSXFMAOpcode : InstrMapping {
+ let FilterClass = "AltVSXFMARel";
+ // Instructions with the same BaseName and Interpretation64Bit values
+ // form a row.
+ let RowFields = ["BaseName"];
+ // Instructions with the same RC value form a column.
+ let ColFields = ["IsVSXFMAAlt"];
+ // The key column are the (default) addend-killing instructions.
+ let KeyCol = ["0"];
+ // Value columns IsVSXFMAAlt=1
+ let ValueCols = [["1"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PPCRegisterInfo.td"
+include "PPCSchedule.td"
+include "PPCInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC processors supported.
+//
+
+def : Processor<"generic", G3Itineraries, [Directive32, FeatureMFTB]>;
+def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureICBT, FeatureBookE,
+ FeatureMSYNC, FeatureMFTB]>;
+def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureICBT, FeatureBookE,
+ FeatureMSYNC, FeatureMFTB]>;
+def : Processor<"601", G3Itineraries, [Directive601]>;
+def : Processor<"602", G3Itineraries, [Directive602,
+ FeatureMFTB]>;
+def : Processor<"603", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+def : Processor<"603e", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+def : Processor<"603ev", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+def : Processor<"604", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+def : Processor<"604e", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+def : Processor<"620", G3Itineraries, [Directive620,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+def : Processor<"750", G4Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+def : Processor<"g3", G3Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureMFTB]>;
+
+def : ProcessorModel<"970", G5Model,
+ [Directive970, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt,
+ FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */,
+ FeatureMFTB]>;
+def : ProcessorModel<"g5", G5Model,
+ [Directive970, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureFRES, FeatureFRSQRTE,
+ Feature64Bit /*, Feature64BitRegs */,
+ FeatureMFTB, DeprecatedDST]>;
+def : ProcessorModel<"e500mc", PPCE500mcModel,
+ [DirectiveE500mc,
+ FeatureSTFIWX, FeatureICBT, FeatureBookE,
+ FeatureISEL, FeatureMFTB]>;
+def : ProcessorModel<"e5500", PPCE5500Model,
+ [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
+ FeatureSTFIWX, FeatureICBT, FeatureBookE,
+ FeatureISEL, FeatureMFTB]>;
+def : ProcessorModel<"a2", PPCA2Model,
+ [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */, FeatureMFTB]>;
+def : ProcessorModel<"a2q", PPCA2Model,
+ [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */, FeatureQPX, FeatureMFTB]>;
+def : ProcessorModel<"pwr3", G5Model,
+ [DirectivePwr3, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr4", G5Model,
+ [DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRES, FeatureFRSQRTE,
+ FeatureSTFIWX, Feature64Bit, FeatureMFTB]>;
+def : ProcessorModel<"pwr5", G5Model,
+ [DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, Feature64Bit,
+ FeatureMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr5x", G5Model,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, FeatureFPRND, Feature64Bit,
+ FeatureMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr6", G5Model,
+ [DirectivePwr6, FeatureAltivec,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
+ FeatureFPRND, Feature64Bit /*, Feature64BitRegs */,
+ FeatureMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr6x", G5Model,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
+ FeatureFPRND, Feature64Bit,
+ FeatureMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
+def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
+def : Processor<"ppc", G3Itineraries, [Directive32, FeatureMFTB]>;
+def : ProcessorModel<"ppc64", G5Model,
+ [Directive64, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
+ FeatureFRSQRTE, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */,
+ FeatureMFTB]>;
+def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.Power8FeatureList>;
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PPCCallingConv.td"
+
+def PPCInstrInfo : InstrInfo {
+ let isLittleEndianEncoding = 1;
+
+ // FIXME: Unset this when no longer needed!
+ let decodePositionallyEncodedOperands = 1;
+
+ let noNamedPositionallyEncodedOperands = 1;
+}
+
+def PPCAsmParser : AsmParser {
+ let ShouldEmitMatchRegisterName = 0;
+}
+
+def PPCAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+
+ // We do not use hard coded registers in asm strings. However, some
+ // InstAlias definitions use immediate literals. Set RegisterPrefix
+ // so that those are not misinterpreted as registers.
+ string RegisterPrefix = "%";
+ string BreakCharacters = ".";
+}
+
+def PPC : Target {
+ // Information about the instructions.
+ let InstructionSet = PPCInstrInfo;
+
+ let AssemblyParsers = [PPCAsmParser];
+ let AssemblyParserVariants = [PPCAsmParserVariant];
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
new file mode 100644
index 0000000..9a63c14
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -0,0 +1,1565 @@
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PowerPC assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "InstPrinter/PPCInstPrinter.h"
+#include "MCTargetDesc/PPCMCExpr.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "PPCTargetStreamer.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asmprinter"
+
+namespace {
+class PPCAsmPrinter : public AsmPrinter {
+protected:
+ MapVector<MCSymbol *, MCSymbol *> TOC;
+ const PPCSubtarget *Subtarget;
+ StackMaps SM;
+
+public:
+ explicit PPCAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
+
+ const char *getPassName() const override {
+ return "PowerPC Assembly Printer";
+ }
+
+ MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
+
+ void EmitInstruction(const MachineInstr *MI) override;
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) override;
+
+ void EmitEndOfAsmFile(Module &M) override;
+
+ void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
+ void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
+ void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Subtarget = &MF.getSubtarget<PPCSubtarget>();
+ return AsmPrinter::runOnMachineFunction(MF);
+ }
+ };
+
+ /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+ class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+ public:
+ explicit PPCLinuxAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : PPCAsmPrinter(TM, std::move(Streamer)) {}
+
+ const char *getPassName() const override {
+ return "Linux PPC Assembly Printer";
+ }
+
+ bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
+
+ void EmitFunctionEntryLabel() override;
+
+ void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
+ };
+
+ /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+ /// OS X
+ class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+ public:
+ explicit PPCDarwinAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : PPCAsmPrinter(TM, std::move(Streamer)) {}
+
+ const char *getPassName() const override {
+ return "Darwin PPC Assembly Printer";
+ }
+
+ bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
+
+ void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
+ };
+} // end of anonymous namespace
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'q': // for QPX
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
+
+void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const DataLayout &DL = getDataLayout();
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+ // Linux assembler (Others?) does not take register mnemonics.
+ // FIXME - What about special registers used in mfspr/mtspr?
+ if (!Subtarget->isDarwin())
+ RegName = stripRegisterPrefix(RegName);
+ O << RegName;
+ return;
+ }
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ MO.getMBB()->getSymbol()->print(O, MAI);
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << MO.getIndex();
+ return;
+ case MachineOperand::MO_BlockAddress:
+ GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *SymToPrint;
+
+ // External or weakly linked global variables need non-lazily-resolved stubs
+ if (TM.getRelocationModel() != Reloc::Static &&
+ !GV->isStrongDefinitionForLinker()) {
+ if (!GV->hasHiddenVisibility()) {
+ SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
+ SymToPrint);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
+ } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
+ GV->hasAvailableExternallyLinkage()) {
+ SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(
+ SymToPrint);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
+ } else {
+ SymToPrint = getSymbol(GV);
+ }
+ } else {
+ SymToPrint = getSymbol(GV);
+ }
+
+ SymToPrint->print(O, MAI);
+
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << (unsigned)MO.getType() << ">";
+ return;
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ case 'c': // Don't print "$" before a global var name or constant.
+ break; // PPC never has a prefix.
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ O << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+// At the moment, all inline asm memory operands are a single register.
+// In any case, the output of this routine should always be just one
+// assembler operand.
+
+bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'y': // A memory reference for an X-form instruction
+ {
+ const char *RegName = "r0";
+ if (!Subtarget->isDarwin())
+ RegName = stripRegisterPrefix(RegName);
+ O << RegName << ", ";
+ printOperand(MI, OpNo, O);
+ return false;
+ }
+ case 'U': // Print 'u' for update form.
+ case 'X': // Print 'x' for indexed form.
+ {
+ // FIXME: Currently for PowerPC memory operands are always loaded
+ // into a register, so we never get an update or indexed form.
+ // This is bad even for offset forms, since even if we know we
+ // have a value in -16(r1), we will generate a load into r<n>
+ // and then load from 0(r<n>). Until that issue is fixed,
+ // tolerate 'U' and 'X' but don't output anything.
+ assert(MI->getOperand(OpNo).isReg());
+ return false;
+ }
+ }
+ }
+
+ assert(MI->getOperand(OpNo).isReg());
+ O << "0(";
+ printOperand(MI, OpNo, O);
+ O << ")";
+ return false;
+}
+
+/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
+/// exists for it. If not, create one. Then return a symbol that references
+/// the TOC entry.
+MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
+ MCSymbol *&TOCEntry = TOC[Sym];
+ if (!TOCEntry)
+ TOCEntry = createTempSymbol("C");
+ return TOCEntry;
+}
+
+void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ SM.serializeToStackMapSection();
+}
+
+void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
+ unsigned NumNOPBytes = MI.getOperand(1).getImm();
+
+ SM.recordStackMap(MI);
+ assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+
+ // Scan ahead to trim the shadow.
+ const MachineBasicBlock &MBB = *MI.getParent();
+ MachineBasicBlock::const_iterator MII(MI);
+ ++MII;
+ while (NumNOPBytes > 0) {
+ if (MII == MBB.end() || MII->isCall() ||
+ MII->getOpcode() == PPC::DBG_VALUE ||
+ MII->getOpcode() == TargetOpcode::PATCHPOINT ||
+ MII->getOpcode() == TargetOpcode::STACKMAP)
+ break;
+ ++MII;
+ NumNOPBytes -= 4;
+ }
+
+ // Emit nops.
+ for (unsigned i = 0; i < NumNOPBytes; i += 4)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
+}
+
+// Lower a patchpoint of the form:
+// [<def>], <id>, <numBytes>, <target>, <numArgs>
+void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
+ SM.recordPatchPoint(MI);
+ PatchPointOpers Opers(&MI);
+
+ unsigned EncodedBytes = 0;
+ const MachineOperand &CalleeMO =
+ Opers.getMetaOper(PatchPointOpers::TargetPos);
+
+ if (CalleeMO.isImm()) {
+ int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
+ if (CallTarget) {
+ assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
+ "High 16 bits of call target should be zero.");
+ unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
+ EncodedBytes = 0;
+ // Materialize the jump address:
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI8)
+ .addReg(ScratchReg)
+ .addImm((CallTarget >> 32) & 0xFFFF));
+ ++EncodedBytes;
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::RLDIC)
+ .addReg(ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(32).addImm(16));
+ ++EncodedBytes;
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORIS8)
+ .addReg(ScratchReg)
+ .addReg(ScratchReg)
+ .addImm((CallTarget >> 16) & 0xFFFF));
+ ++EncodedBytes;
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORI8)
+ .addReg(ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(CallTarget & 0xFFFF));
+
+ // Save the current TOC pointer before the remote call.
+ int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::STD)
+ .addReg(PPC::X2)
+ .addImm(TOCSaveOffset)
+ .addReg(PPC::X1));
+ ++EncodedBytes;
+
+ // If we're on ELFv1, then we need to load the actual function pointer
+ // from the function descriptor.
+ if (!Subtarget->isELFv2ABI()) {
+ // Load the new TOC pointer and the function address, but not r11
+ // (needing this is rare, and loading it here would prevent passing it
+ // via a 'nest' parameter.
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
+ .addReg(PPC::X2)
+ .addImm(8)
+ .addReg(ScratchReg));
+ ++EncodedBytes;
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
+ .addReg(ScratchReg)
+ .addImm(0)
+ .addReg(ScratchReg));
+ ++EncodedBytes;
+ }
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR8)
+ .addReg(ScratchReg));
+ ++EncodedBytes;
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTRL8));
+ ++EncodedBytes;
+
+ // Restore the TOC pointer after the call.
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
+ .addReg(PPC::X2)
+ .addImm(TOCSaveOffset)
+ .addReg(PPC::X1));
+ ++EncodedBytes;
+ }
+ } else if (CalleeMO.isGlobal()) {
+ const GlobalValue *GValue = CalleeMO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, OutContext);
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL8_NOP)
+ .addExpr(SymVar));
+ EncodedBytes += 2;
+ }
+
+ // Each instruction is 4 bytes.
+ EncodedBytes *= 4;
+
+ // Emit padding.
+ unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+ assert(NumBytes >= EncodedBytes &&
+ "Patchpoint can't request size less than the length of a call.");
+ assert((NumBytes - EncodedBytes) % 4 == 0 &&
+ "Invalid number of NOP bytes requested!");
+ for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
+}
+
+/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
+/// call to __tls_get_addr to the current output stream.
+void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
+ MCSymbolRefExpr::VariantKind VK) {
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name);
+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+
+ assert(MI->getOperand(0).isReg() &&
+ ((Subtarget->isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
+ (!Subtarget->isPPC64() && MI->getOperand(0).getReg() == PPC::R3)) &&
+ "GETtls[ld]ADDR[32] must define GPR3");
+ assert(MI->getOperand(1).isReg() &&
+ ((Subtarget->isPPC64() && MI->getOperand(1).getReg() == PPC::X3) ||
+ (!Subtarget->isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) &&
+ "GETtls[ld]ADDR[32] must read GPR3");
+
+ if (!Subtarget->isPPC64() && !Subtarget->isDarwin() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ Kind = MCSymbolRefExpr::VK_PLT;
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, VK, OutContext);
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(Subtarget->isPPC64() ?
+ PPC::BL8_NOP_TLS : PPC::BL_TLS)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+}
+
+/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// the current output stream.
+///
+void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MCInst TmpInst;
+ bool isPPC64 = Subtarget->isPPC64();
+ bool isDarwin = TM.getTargetTriple().isOSDarwin();
+ const Module *M = MF->getFunction()->getParent();
+ PICLevel::Level PL = M->getPICLevel();
+
+ // Lower multi-instruction pseudo operations.
+ switch (MI->getOpcode()) {
+ default: break;
+ case TargetOpcode::DBG_VALUE:
+ llvm_unreachable("Should be handled target independently");
+ case TargetOpcode::STACKMAP:
+ return LowerSTACKMAP(SM, *MI);
+ case TargetOpcode::PATCHPOINT:
+ return LowerPATCHPOINT(SM, *MI);
+
+ case PPC::MoveGOTtoLR: {
+ // Transform %LR = MoveGOTtoLR
+ // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4
+ // _GLOBAL_OFFSET_TABLE_@local-4 (instruction preceding
+ // _GLOBAL_OFFSET_TABLE_) has exactly one instruction:
+ // blrl
+ // This will return the pointer to _GLOBAL_OFFSET_TABLE_@local
+ MCSymbol *GOTSymbol =
+ OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(GOTSymbol,
+ MCSymbolRefExpr::VK_PPC_LOCAL,
+ OutContext),
+ MCConstantExpr::create(4, OutContext),
+ OutContext);
+
+ // Emit the 'bl'.
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr));
+ return;
+ }
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8: {
+ // Transform %LR = MovePCtoLR
+ // Into this, where the label is the PIC base:
+ // bl L1$pb
+ // L1$pb:
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+
+ // Emit the 'bl'.
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we
+ // don't have to do a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
+
+ // Emit the label.
+ OutStreamer->EmitLabel(PICBase);
+ return;
+ }
+ case PPC::UpdateGBR: {
+ // Transform %Rd = UpdateGBR(%Rt, %Ri)
+ // Into: lwz %Rt, .L0$poff - .L0$pb(%Ri)
+ // add %Rd, %Rt, %Ri
+ // Get the offset from the GOT Base Register to the GOT
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ MCSymbol *PICOffset =
+ MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol();
+ TmpInst.setOpcode(PPC::LWZ);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(PICOffset, MCSymbolRefExpr::VK_None, OutContext);
+ const MCExpr *PB =
+ MCSymbolRefExpr::create(MF->getPICBaseSymbol(),
+ MCSymbolRefExpr::VK_None,
+ OutContext);
+ const MCOperand TR = TmpInst.getOperand(1);
+ const MCOperand PICR = TmpInst.getOperand(0);
+
+ // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri)
+ TmpInst.getOperand(1) =
+ MCOperand::createExpr(MCBinaryExpr::createSub(Exp, PB, OutContext));
+ TmpInst.getOperand(0) = TR;
+ TmpInst.getOperand(2) = PICR;
+ EmitToStreamer(*OutStreamer, TmpInst);
+
+ TmpInst.setOpcode(PPC::ADD4);
+ TmpInst.getOperand(0) = PICR;
+ TmpInst.getOperand(1) = TR;
+ TmpInst.getOperand(2) = PICR;
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::LWZtoc: {
+ // Transform %R3 = LWZtoc <ga:@min1>, %R2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+
+ // Change the opcode to LWZ, and the global address operand to be a
+ // reference to the GOT entry we will synthesize later.
+ TmpInst.setOpcode(PPC::LWZ);
+ const MachineOperand &MO = MI->getOperand(1);
+
+ // Map symbol -> label of TOC entry
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
+ MCSymbol *MOSymbol = nullptr;
+ if (MO.isGlobal())
+ MOSymbol = getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+ else if (MO.isBlockAddress())
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+
+ if (PL == PICLevel::Small) {
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_GOT,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ } else {
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None,
+ OutContext);
+ const MCExpr *PB =
+ MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
+ OutContext);
+ Exp = MCBinaryExpr::createSub(Exp, PB, OutContext);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ }
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::LDtocJTI:
+ case PPC::LDtocCPT:
+ case PPC::LDtocBA:
+ case PPC::LDtoc: {
+ // Transform %X3 = LDtoc <ga:@min1>, %X2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+
+ // Change the opcode to LD, and the global address operand to be a
+ // reference to the TOC entry we will synthesize later.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+
+ // Map symbol -> label of TOC entry
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
+ MCSymbol *MOSymbol = nullptr;
+ if (MO.isGlobal())
+ MOSymbol = getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+ else if (MO.isBlockAddress())
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+
+ case PPC::ADDIStocHA: {
+ // Transform %Xd = ADDIStocHA %X2, <ga:@sym>
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+
+ // Change the opcode to ADDIS8. If the global address is external, has
+ // common linkage, is a non-local function address, or is a jump table
+ // address, then generate a TOC entry and reference that. Otherwise
+ // reference the symbol directly.
+ TmpInst.setOpcode(PPC::ADDIS8);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+ MO.isBlockAddress()) &&
+ "Invalid operand for ADDIStocHA!");
+ MCSymbol *MOSymbol = nullptr;
+ bool GlobalToc = false;
+
+ if (MO.isGlobal()) {
+ const GlobalValue *GV = MO.getGlobal();
+ MOSymbol = getSymbol(GV);
+ unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+ GlobalToc = (GVFlags & PPCII::MO_NLP_FLAG);
+ } else if (MO.isCPI()) {
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ } else if (MO.isJTI()) {
+ MOSymbol = GetJTISymbol(MO.getIndex());
+ } else if (MO.isBlockAddress()) {
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+ }
+
+ if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
+ TM.getCodeModel() == CodeModel::Large)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::LDtocL: {
+ // Transform %Xd = LDtocL <ga:@sym>, %Xs
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+
+ // Change the opcode to LD. If the global address is external, has
+ // common linkage, or is a jump table address, then reference the
+ // associated TOC entry. Otherwise reference the symbol directly.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+ MO.isBlockAddress()) &&
+ "Invalid operand for LDtocL!");
+ MCSymbol *MOSymbol = nullptr;
+
+ if (MO.isJTI())
+ MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+ else if (MO.isBlockAddress()) {
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
+ else if (MO.isCPI()) {
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ if (TM.getCodeModel() == CodeModel::Large)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
+ else if (MO.isGlobal()) {
+ const GlobalValue *GV = MO.getGlobal();
+ MOSymbol = getSymbol(GV);
+ DEBUG(
+ unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+ assert((GVFlags & PPCII::MO_NLP_FLAG) &&
+ "LDtocL used on symbol that could be accessed directly is "
+ "invalid. Must match ADDIStocHA."));
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::ADDItocL: {
+ // Transform %Xd = ADDItocL %Xs, <ga:@sym>
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+
+ // Change the opcode to ADDI8. If the global address is external, then
+ // generate a TOC entry and reference that. Otherwise reference the
+ // symbol directly.
+ TmpInst.setOpcode(PPC::ADDI8);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
+ MCSymbol *MOSymbol = nullptr;
+
+ if (MO.isGlobal()) {
+ const GlobalValue *GV = MO.getGlobal();
+ DEBUG(
+ unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+ assert (
+ !(GVFlags & PPCII::MO_NLP_FLAG) &&
+ "Interposable definitions must use indirect access."));
+ MOSymbol = getSymbol(GV);
+ } else if (MO.isCPI()) {
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ }
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::ADDISgotTprelHA: {
+ // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymGotTprel =
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
+ OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTprel));
+ return;
+ }
+ case PPC::LDgotTprelL:
+ case PPC::LDgotTprelL32: {
+ // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+
+ // Change the opcode to LD.
+ TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ);
+ const MachineOperand &MO = MI->getOperand(1);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+
+ case PPC::PPC32PICGOT: {
+ MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ MCSymbol *GOTRef = OutContext.createTempSymbol();
+ MCSymbol *NextInstr = OutContext.createTempSymbol();
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::create(NextInstr, OutContext)));
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(GOTSymbol, OutContext),
+ MCSymbolRefExpr::create(GOTRef, OutContext),
+ OutContext);
+ OutStreamer->EmitLabel(GOTRef);
+ OutStreamer->EmitValue(OffsExpr, 4);
+ OutStreamer->EmitLabel(NextInstr);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LWZ)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADD4)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ }
+ case PPC::PPC32GOT: {
+ MCSymbol *GOTSymbol =
+ OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ const MCExpr *SymGotTlsL = MCSymbolRefExpr::create(
+ GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, OutContext);
+ const MCExpr *SymGotTlsHA = MCSymbolRefExpr::create(
+ GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI)
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(SymGotTlsL));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(SymGotTlsHA));
+ return;
+ }
+ case PPC::ADDIStlsgdHA: {
+ // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymGotTlsGD =
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
+ OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::ADDItlsgdL:
+ // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
+ case PPC::ADDItlsgdL32: {
+ // Transform: %Rd = ADDItlsgdL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@got@tlsgd
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymGotTlsGD = MCSymbolRefExpr::create(
+ MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO
+ : MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
+ OutContext);
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::GETtlsADDR:
+ // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
+ case PPC::GETtlsADDR32: {
+ // Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym>
+ // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT
+ EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD);
+ return;
+ }
+ case PPC::ADDIStlsldHA: {
+ // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
+ assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymGotTlsLD =
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
+ OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::ADDItlsldL:
+ // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
+ case PPC::ADDItlsldL32: {
+ // Transform: %Rd = ADDItlsldL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@got@tlsld
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymGotTlsLD = MCSymbolRefExpr::create(
+ MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO
+ : MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
+ OutContext);
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::GETtlsldADDR:
+ // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld)
+ case PPC::GETtlsldADDR32: {
+ // Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym>
+ // Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT
+ EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD);
+ return;
+ }
+ case PPC::ADDISdtprelHA:
+ // Transform: %Xd = ADDISdtprelHA %Xs, <ga:@sym>
+ // Into: %Xd = ADDIS8 %Xs, sym@dtprel@ha
+ case PPC::ADDISdtprelHA32: {
+ // Transform: %Rd = ADDISdtprelHA32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDIS %Rs, sym@dtprel@ha
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
+ OutContext);
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
+ return;
+ }
+ case PPC::ADDIdtprelL:
+ // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@dtprel@l
+ case PPC::ADDIdtprelL32: {
+ // Transform: %Rd = ADDIdtprelL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@dtprel@l
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
+ OutContext);
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
+ return;
+ }
+ case PPC::MFOCRF:
+ case PPC::MFOCRF8:
+ if (!Subtarget->hasMFOCRF()) {
+ // Transform: %R3 = MFOCRF %CR7
+ // Into: %R3 = MFCR ;; cr7
+ unsigned NewOpcode =
+ MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8;
+ OutStreamer->AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(1).getReg()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(NewOpcode)
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ }
+ break;
+ case PPC::MTOCRF:
+ case PPC::MTOCRF8:
+ if (!Subtarget->hasMFOCRF()) {
+ // Transform: %CR7 = MTOCRF %R3
+ // Into: MTCRF mask, %R3 ;; cr7
+ unsigned NewOpcode =
+ MI->getOpcode() == PPC::MTOCRF ? PPC::MTCRF : PPC::MTCRF8;
+ unsigned Mask = 0x80 >> OutContext.getRegisterInfo()
+ ->getEncodingValue(MI->getOperand(0).getReg());
+ OutStreamer->AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(0).getReg()));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(NewOpcode)
+ .addImm(Mask)
+ .addReg(MI->getOperand(1).getReg()));
+ return;
+ }
+ break;
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::LWA_32:
+ case PPC::LWA: {
+ // Verify alignment is legal, so we don't create relocations
+ // that can't be supported.
+ // FIXME: This test is currently disabled for Darwin. The test
+ // suite shows a handful of test cases that fail this check for
+ // Darwin. Those need to be investigated before this sanity test
+ // can be enabled for those subtargets.
+ if (!Subtarget->isDarwin()) {
+ unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
+ llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
+ }
+ // Now process the instruction normally.
+ break;
+ }
+ }
+
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ EmitToStreamer(*OutStreamer, TmpInst);
+}
+
+void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (static_cast<const PPCTargetMachine &>(TM).isELFv2ABI()) {
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
+
+ if (TS)
+ TS->emitAbiVersion(2);
+ }
+
+ if (static_cast<const PPCTargetMachine &>(TM).isPPC64() ||
+ TM.getRelocationModel() != Reloc::PIC_)
+ return AsmPrinter::EmitStartOfAsmFile(M);
+
+ if (M.getPICLevel() == PICLevel::Small)
+ return AsmPrinter::EmitStartOfAsmFile(M);
+
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC));
+
+ MCSymbol *TOCSym = OutContext.getOrCreateSymbol(Twine(".LTOC"));
+ MCSymbol *CurrentPos = OutContext.createTempSymbol();
+
+ OutStreamer->EmitLabel(CurrentPos);
+
+ // The GOT pointer points to the middle of the GOT, in order to reference the
+ // entire 64kB range. 0x8000 is the midpoint.
+ const MCExpr *tocExpr =
+ MCBinaryExpr::createAdd(MCSymbolRefExpr::create(CurrentPos, OutContext),
+ MCConstantExpr::create(0x8000, OutContext),
+ OutContext);
+
+ OutStreamer->EmitAssignment(TOCSym, tocExpr);
+
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+}
+
+void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
+ // linux/ppc32 - Normal entry label.
+ if (!Subtarget->isPPC64() &&
+ (TM.getRelocationModel() != Reloc::PIC_ ||
+ MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small))
+ return AsmPrinter::EmitFunctionEntryLabel();
+
+ if (!Subtarget->isPPC64()) {
+ const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+ if (PPCFI->usesPICBase()) {
+ MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+ OutStreamer->EmitLabel(RelocSymbol);
+
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
+ OutContext),
+ MCSymbolRefExpr::create(PICBase, OutContext),
+ OutContext);
+ OutStreamer->EmitValue(OffsExpr, 4);
+ OutStreamer->EmitLabel(CurrentFnSym);
+ return;
+ } else
+ return AsmPrinter::EmitFunctionEntryLabel();
+ }
+
+ // ELFv2 ABI - Normal entry label.
+ if (Subtarget->isELFv2ABI())
+ return AsmPrinter::EmitFunctionEntryLabel();
+
+ // Emit an official procedure descriptor.
+ MCSectionSubPair Current = OutStreamer->getCurrentSection();
+ MCSectionELF *Section = OutStreamer->getContext().getELFSection(
+ ".opd", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ OutStreamer->SwitchSection(Section);
+ OutStreamer->EmitLabel(CurrentFnSym);
+ OutStreamer->EmitValueToAlignment(8);
+ MCSymbol *Symbol1 = CurrentFnSymForSize;
+ // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
+ // entry point.
+ OutStreamer->EmitValue(MCSymbolRefExpr::create(Symbol1, OutContext),
+ 8 /*size*/);
+ MCSymbol *Symbol2 = OutContext.getOrCreateSymbol(StringRef(".TOC."));
+ // Generates a R_PPC64_TOC relocation for TOC base insertion.
+ OutStreamer->EmitValue(
+ MCSymbolRefExpr::create(Symbol2, MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
+ 8/*size*/);
+ // Emit a null environment pointer.
+ OutStreamer->EmitIntValue(0, 8 /* size */);
+ OutStreamer->SwitchSection(Current.first, Current.second);
+}
+
+bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+ const DataLayout &DL = getDataLayout();
+
+ bool isPPC64 = DL.getPointerSizeInBits() == 64;
+
+ PPCTargetStreamer &TS =
+ static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
+
+ if (!TOC.empty()) {
+ MCSectionELF *Section;
+
+ if (isPPC64)
+ Section = OutStreamer->getContext().getELFSection(
+ ".toc", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ else
+ Section = OutStreamer->getContext().getELFSection(
+ ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ OutStreamer->SwitchSection(Section);
+
+ for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
+ E = TOC.end(); I != E; ++I) {
+ OutStreamer->EmitLabel(I->second);
+ MCSymbol *S = I->first;
+ if (isPPC64)
+ TS.emitTCEntry(*S);
+ else
+ OutStreamer->EmitSymbolValue(S, 4);
+ }
+ }
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2.
+void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
+ // In the ELFv2 ABI, in functions that use the TOC register, we need to
+ // provide two entry points. The ABI guarantees that when calling the
+ // local entry point, r2 is set up by the caller to contain the TOC base
+ // for this function, and when calling the global entry point, r12 is set
+ // up by the caller to hold the address of the global entry point. We
+ // thus emit a prefix sequence along the following lines:
+ //
+ // func:
+ // # global entry point
+ // addis r2,r12,(.TOC.-func)@ha
+ // addi r2,r2,(.TOC.-func)@l
+ // .localentry func, .-func
+ // # local entry point, followed by function body
+ //
+ // This ensures we have r2 set up correctly while executing the function
+ // body, no matter which entry point is called.
+ if (Subtarget->isELFv2ABI()
+ // Only do all that if the function uses r2 in the first place.
+ && !MF->getRegInfo().use_empty(PPC::X2)) {
+
+ MCSymbol *GlobalEntryLabel = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(GlobalEntryLabel);
+ const MCSymbolRefExpr *GlobalEntryLabelExp =
+ MCSymbolRefExpr::create(GlobalEntryLabel, OutContext);
+
+ MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
+ const MCExpr *TOCDeltaExpr =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
+ GlobalEntryLabelExp, OutContext);
+
+ const MCExpr *TOCDeltaHi =
+ PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::X2)
+ .addReg(PPC::X12)
+ .addExpr(TOCDeltaHi));
+
+ const MCExpr *TOCDeltaLo =
+ PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
+ .addReg(PPC::X2)
+ .addReg(PPC::X2)
+ .addExpr(TOCDeltaLo));
+
+ MCSymbol *LocalEntryLabel = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(LocalEntryLabel);
+ const MCSymbolRefExpr *LocalEntryLabelExp =
+ MCSymbolRefExpr::create(LocalEntryLabel, OutContext);
+ const MCExpr *LocalOffsetExp =
+ MCBinaryExpr::createSub(LocalEntryLabelExp,
+ GlobalEntryLabelExp, OutContext);
+
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
+
+ if (TS)
+ TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), LocalOffsetExp);
+ }
+}
+
+/// EmitFunctionBodyEnd - Print the traceback table before the .size
+/// directive.
+///
+void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
+ // Only the 64-bit target requires a traceback table. For now,
+ // we only emit the word of zeroes that GDB requires to find
+ // the end of the function, and zeroes for the eight-byte
+ // mandatory fields.
+ // FIXME: We should fill in the eight-byte mandatory fields as described in
+ // the PPC64 ELF ABI (this is a low-priority item because GDB does not
+ // currently make use of these fields).
+ if (Subtarget->isPPC64()) {
+ OutStreamer->EmitIntValue(0, 4/*size*/);
+ OutStreamer->EmitIntValue(0, 8/*size*/);
+ }
+}
+
+void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ static const char *const CPUDirectives[] = {
+ "",
+ "ppc",
+ "ppc440",
+ "ppc601",
+ "ppc602",
+ "ppc603",
+ "ppc7400",
+ "ppc750",
+ "ppc970",
+ "ppcA2",
+ "ppce500mc",
+ "ppce5500",
+ "power3",
+ "power4",
+ "power5",
+ "power5x",
+ "power6",
+ "power6x",
+ "power7",
+ "ppc64",
+ "ppc64le"
+ };
+
+ // Get the numerically largest directive.
+ // FIXME: How should we merge darwin directives?
+ unsigned Directive = PPC::DIR_NONE;
+ for (const Function &F : M) {
+ const PPCSubtarget &STI = TM.getSubtarget<PPCSubtarget>(F);
+ unsigned FDir = STI.getDarwinDirective();
+ Directive = Directive > FDir ? FDir : STI.getDarwinDirective();
+ if (STI.hasMFOCRF() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_970;
+ if (STI.hasAltivec() && Directive < PPC::DIR_7400)
+ Directive = PPC::DIR_7400;
+ if (STI.isPPC64() && Directive < PPC::DIR_64)
+ Directive = PPC::DIR_64;
+ }
+
+ assert(Directive <= PPC::DIR_64 && "Directive out of range.");
+
+ assert(Directive < array_lengthof(CPUDirectives) &&
+ "CPUDirectives[] might not be up-to-date!");
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
+ TStreamer.emitMachine(CPUDirectives[Directive]);
+
+ // Prime text sections so they are adjacent. This reduces the likelihood a
+ // large data or debug section causes a branch to exceed 16M limit.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ OutStreamer->SwitchSection(TLOFMacho.getTextCoalSection());
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ OutStreamer->SwitchSection(
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
+ 32, SectionKind::getText()));
+ } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
+ OutStreamer->SwitchSection(
+ OutContext.getMachOSection("__TEXT","__symbol_stub1",
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
+ 16, SectionKind::getText()));
+ }
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+}
+
+static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
+ // Remove $stub suffix, add $lazy_ptr.
+ StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
+ return Ctx.getOrCreateSymbol(NoStub + "$lazy_ptr");
+}
+
+static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
+ // Add $tmp suffix to $stub, yielding $stub$tmp.
+ return Ctx.getOrCreateSymbol(Sym->getName() + "$tmp");
+}
+
+void PPCDarwinAsmPrinter::
+EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
+ bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
+
+ // Construct a local MCSubtargetInfo and shadow EmitToStreamer here.
+ // This is because the MachineFunction won't exist (but have not yet been
+ // freed) and since we're at the global level we can use the default
+ // constructed subtarget.
+ std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+ TM.getTargetTriple().str(), TM.getTargetCPU(),
+ TM.getTargetFeatureString()));
+ auto EmitToStreamer = [&STI] (MCStreamer &S, const MCInst &Inst) {
+ S.EmitInstruction(Inst, *STI);
+ };
+
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+
+ // .lazy_symbol_pointer
+ MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
+
+ // Output stubs for dynamically-linked functions
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ MCSection *StubSection = OutContext.getMachOSection(
+ "__TEXT", "__picsymbolstub1",
+ MachO::S_SYMBOL_STUBS | MachO::S_ATTR_PURE_INSTRUCTIONS, 32,
+ SectionKind::getText());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer->SwitchSection(StubSection);
+ EmitAlignment(4);
+
+ MCSymbol *Stub = Stubs[i].first;
+ MCSymbol *RawSym = Stubs[i].second.getPointer();
+ MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+ MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
+
+ OutStreamer->EmitLabel(Stub);
+ OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ const MCExpr *Anon = MCSymbolRefExpr::create(AnonSymbol, OutContext);
+ const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext);
+ const MCExpr *Sub =
+ MCBinaryExpr::createSub(LazyPtrExpr, Anon, OutContext);
+
+ // mflr r0
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
+ // bcl 20, 31, AnonSymbol
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon));
+ OutStreamer->EmitLabel(AnonSymbol);
+ // mflr r11
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+ // addis r11, r11, ha16(LazyPtr - AnonSymbol)
+ const MCExpr *SubHa16 = PPCMCExpr::createHa(Sub, true, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::R11)
+ .addReg(PPC::R11)
+ .addExpr(SubHa16));
+ // mtlr r0
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+
+ // ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ const MCExpr *SubLo16 = PPCMCExpr::createLo(Sub, true, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(SubLo16).addExpr(SubLo16)
+ .addReg(PPC::R11));
+ // mtctr r12
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTR));
+
+ OutStreamer->SwitchSection(LSPSection);
+ OutStreamer->EmitLabel(LazyPtr);
+ OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.getOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
+ }
+ OutStreamer->AddBlankLine();
+ return;
+ }
+
+ MCSection *StubSection = OutContext.getMachOSection(
+ "__TEXT", "__symbol_stub1",
+ MachO::S_SYMBOL_STUBS | MachO::S_ATTR_PURE_INSTRUCTIONS, 16,
+ SectionKind::getText());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ MCSymbol *Stub = Stubs[i].first;
+ MCSymbol *RawSym = Stubs[i].second.getPointer();
+ MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+ const MCExpr *LazyPtrExpr = MCSymbolRefExpr::create(LazyPtr, OutContext);
+
+ OutStreamer->SwitchSection(StubSection);
+ EmitAlignment(4);
+ OutStreamer->EmitLabel(Stub);
+ OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ // lis r11, ha16(LazyPtr)
+ const MCExpr *LazyPtrHa16 =
+ PPCMCExpr::createHa(LazyPtrExpr, true, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LIS)
+ .addReg(PPC::R11)
+ .addExpr(LazyPtrHa16));
+
+ // ldu r12, lo16(LazyPtr)(r11)
+ // lwzu r12, lo16(LazyPtr)(r11)
+ const MCExpr *LazyPtrLo16 =
+ PPCMCExpr::createLo(LazyPtrExpr, true, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
+ .addReg(PPC::R11));
+
+ // mtctr r12
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTR));
+
+ OutStreamer->SwitchSection(LSPSection);
+ OutStreamer->EmitLabel(LazyPtr);
+ OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.getOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
+ }
+
+ OutStreamer->AddBlankLine();
+}
+
+bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
+ bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
+
+ // Darwin/PPC always uses mach-o.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
+ if (!Stubs.empty())
+ EmitFunctionStubs(Stubs);
+
+ if (MAI->doesSupportExceptionHandling() && MMI) {
+ // Add the (possibly multiple) personalities to the set of global values.
+ // Only referenced functions get into the Personalities list.
+ for (const Function *Personality : MMI->getPersonalities()) {
+ if (Personality) {
+ MCSymbol *NLPSym =
+ getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMIMacho.getGVStubEntry(NLPSym);
+ StubSym =
+ MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true);
+ }
+ }
+ }
+
+ // Output stubs for dynamically-linked functions.
+ Stubs = MMIMacho.GetGVStubList();
+
+ // Output macho stubs for external and common global variables.
+ if (!Stubs.empty()) {
+ // Switch with ".non_lazy_symbol_pointer" directive.
+ OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ EmitAlignment(isPPC64 ? 3 : 2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer->EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer->EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer->EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info pointers
+ // need to be indirect and pc-rel. We accomplish this by using NLPs.
+ // However, sometimes the types are local to the file. So we need to
+ // fill in the value for the NLP in those cases.
+ OutStreamer->EmitValue(MCSymbolRefExpr::create(MCSym.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/);
+ }
+
+ Stubs.clear();
+ OutStreamer->AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer->SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(isPPC64 ? 3 : 2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer->EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer->EmitValue(MCSymbolRefExpr::
+ create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/);
+ }
+
+ Stubs.clear();
+ OutStreamer->AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never generates
+ // code that does this, it is always safe to set.
+ OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
+/// for a MachineFunction to the given output stream, in a format that the
+/// Darwin assembler can deal with.
+///
+static AsmPrinter *
+createPPCAsmPrinterPass(TargetMachine &tm,
+ std::unique_ptr<MCStreamer> &&Streamer) {
+ if (tm.getTargetTriple().isMacOSX())
+ return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
+ return new PPCLinuxAsmPrinter(tm, std::move(Streamer));
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmPrinter() {
+ TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
new file mode 100644
index 0000000..7920240
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -0,0 +1,253 @@
+//===- PPCBoolRetToInt.cpp - Convert bool literals to i32 if they are returned ==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements converting i1 values to i32 if they could be more
+// profitably allocated as GPRs rather than CRs. This pass will become totally
+// unnecessary if Register Bank Allocation and Global Instruction Selection ever
+// go upstream.
+//
+// Presently, the pass converts i1 Constants, and Arguments to i32 if the
+// transitive closure of their uses includes only PHINodes, CallInsts, and
+// ReturnInsts. The rational is that arguments are generally passed and returned
+// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will
+// actually save casts at the Machine Instruction level.
+//
+// It might be useful to expand this pass to add bit-wise operations to the list
+// of safe transitive closure types. Also, we miss some opportunities when LLVM
+// represents logical AND and OR operations with control flow rather than data
+// flow. For example by lowering the expression: return (A && B && C)
+//
+// as: return A ? true : B && C.
+//
+// There's code in SimplifyCFG that code be used to turn control flow in data
+// flow using SelectInsts. Selects are slow on some architectures (P7/P8), so
+// this probably isn't good in general, but for the special case of i1, the
+// Selects could be further lowered to bit operations that are fast everywhere.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace {
+
+#define DEBUG_TYPE "bool-ret-to-int"
+
+STATISTIC(NumBoolRetPromotion,
+ "Number of times a bool feeding a RetInst was promoted to an int");
+STATISTIC(NumBoolCallPromotion,
+ "Number of times a bool feeding a CallInst was promoted to an int");
+STATISTIC(NumBoolToIntPromotion,
+ "Total number of times a bool was promoted to an int");
+
+class PPCBoolRetToInt : public FunctionPass {
+
+ static SmallPtrSet<Value *, 8> findAllDefs(Value *V) {
+ SmallPtrSet<Value *, 8> Defs;
+ SmallVector<Value *, 8> WorkList;
+ WorkList.push_back(V);
+ Defs.insert(V);
+ while (!WorkList.empty()) {
+ Value *Curr = WorkList.back();
+ WorkList.pop_back();
+ if (User *CurrUser = dyn_cast<User>(Curr))
+ for (auto &Op : CurrUser->operands())
+ if (Defs.insert(Op).second)
+ WorkList.push_back(Op);
+ }
+ return Defs;
+ }
+
+ // Translate a i1 value to an equivalent i32 value:
+ static Value *translate(Value *V) {
+ Type *Int32Ty = Type::getInt32Ty(V->getContext());
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getZExt(C, Int32Ty);
+ if (PHINode *P = dyn_cast<PHINode>(V)) {
+ // Temporarily set the operands to 0. We'll fix this later in
+ // runOnUse.
+ Value *Zero = Constant::getNullValue(Int32Ty);
+ PHINode *Q =
+ PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P);
+ for (unsigned i = 0; i < P->getNumOperands(); ++i)
+ Q->addIncoming(Zero, P->getIncomingBlock(i));
+ return Q;
+ }
+
+ Argument *A = dyn_cast<Argument>(V);
+ Instruction *I = dyn_cast<Instruction>(V);
+ assert((A || I) && "Unknown value type");
+
+ auto InstPt =
+ A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode();
+ return new ZExtInst(V, Int32Ty, "", InstPt);
+ }
+
+ typedef SmallPtrSet<const PHINode *, 8> PHINodeSet;
+
+ // A PHINode is Promotable if:
+ // 1. Its type is i1 AND
+ // 2. All of its uses are ReturnInt, CallInst, PHINode, or DbgInfoIntrinsic
+ // AND
+ // 3. All of its operands are Constant or Argument or
+ // CallInst or PHINode AND
+ // 4. All of its PHINode uses are Promotable AND
+ // 5. All of its PHINode operands are Promotable
+ static PHINodeSet getPromotablePHINodes(const Function &F) {
+ PHINodeSet Promotable;
+ // Condition 1
+ for (auto &BB : F)
+ for (auto &I : BB)
+ if (const PHINode *P = dyn_cast<PHINode>(&I))
+ if (P->getType()->isIntegerTy(1))
+ Promotable.insert(P);
+
+ SmallVector<const PHINode *, 8> ToRemove;
+ for (const auto &P : Promotable) {
+ // Condition 2 and 3
+ auto IsValidUser = [] (const Value *V) -> bool {
+ return isa<ReturnInst>(V) || isa<CallInst>(V) || isa<PHINode>(V) ||
+ isa<DbgInfoIntrinsic>(V);
+ };
+ auto IsValidOperand = [] (const Value *V) -> bool {
+ return isa<Constant>(V) || isa<Argument>(V) || isa<CallInst>(V) ||
+ isa<PHINode>(V);
+ };
+ const auto &Users = P->users();
+ const auto &Operands = P->operands();
+ if (!std::all_of(Users.begin(), Users.end(), IsValidUser) ||
+ !std::all_of(Operands.begin(), Operands.end(), IsValidOperand))
+ ToRemove.push_back(P);
+ }
+
+ // Iterate to convergence
+ auto IsPromotable = [&Promotable] (const Value *V) -> bool {
+ const PHINode *Phi = dyn_cast<PHINode>(V);
+ return !Phi || Promotable.count(Phi);
+ };
+ while (!ToRemove.empty()) {
+ for (auto &User : ToRemove)
+ Promotable.erase(User);
+ ToRemove.clear();
+
+ for (const auto &P : Promotable) {
+ // Condition 4 and 5
+ const auto &Users = P->users();
+ const auto &Operands = P->operands();
+ if (!std::all_of(Users.begin(), Users.end(), IsPromotable) ||
+ !std::all_of(Operands.begin(), Operands.end(), IsPromotable))
+ ToRemove.push_back(P);
+ }
+ }
+
+ return Promotable;
+ }
+
+ typedef DenseMap<Value *, Value *> B2IMap;
+
+ public:
+ static char ID;
+ PPCBoolRetToInt() : FunctionPass(ID) {
+ initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) {
+ PHINodeSet PromotablePHINodes = getPromotablePHINodes(F);
+ B2IMap Bool2IntMap;
+ bool Changed = false;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (ReturnInst *R = dyn_cast<ReturnInst>(&I))
+ if (F.getReturnType()->isIntegerTy(1))
+ Changed |=
+ runOnUse(R->getOperandUse(0), PromotablePHINodes, Bool2IntMap);
+
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ for (auto &U : CI->operands())
+ if (U->getType()->isIntegerTy(1))
+ Changed |= runOnUse(U, PromotablePHINodes, Bool2IntMap);
+ }
+ }
+
+ return Changed;
+ }
+
+ static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
+ B2IMap &BoolToIntMap) {
+ auto Defs = findAllDefs(U);
+
+ // If the values are all Constants or Arguments, don't bother
+ if (!std::any_of(Defs.begin(), Defs.end(), isa<Instruction, Value *>))
+ return false;
+
+ // Presently, we only know how to handle PHINode, Constant, and Arguments.
+ // Potentially, bitwise operations (AND, OR, XOR, NOT) and sign extension
+ // could also be handled in the future.
+ for (const auto &V : Defs)
+ if (!isa<PHINode>(V) && !isa<Constant>(V) && !isa<Argument>(V))
+ return false;
+
+ for (const auto &V : Defs)
+ if (const PHINode *P = dyn_cast<PHINode>(V))
+ if (!PromotablePHINodes.count(P))
+ return false;
+
+ if (isa<ReturnInst>(U.getUser()))
+ ++NumBoolRetPromotion;
+ if (isa<CallInst>(U.getUser()))
+ ++NumBoolCallPromotion;
+ ++NumBoolToIntPromotion;
+
+ for (const auto &V : Defs)
+ if (!BoolToIntMap.count(V))
+ BoolToIntMap[V] = translate(V);
+
+ // Replace the operands of the translated instructions. There were set to
+ // zero in the translate function.
+ for (auto &Pair : BoolToIntMap) {
+ User *First = dyn_cast<User>(Pair.first);
+ User *Second = dyn_cast<User>(Pair.second);
+ assert((!First || Second) && "translated from user to non-user!?");
+ if (First)
+ for (unsigned i = 0; i < First->getNumOperands(); ++i)
+ Second->setOperand(i, BoolToIntMap[First->getOperand(i)]);
+ }
+
+ Value *IntRetVal = BoolToIntMap[U];
+ Type *Int1Ty = Type::getInt1Ty(U->getContext());
+ Instruction *I = cast<Instruction>(U.getUser());
+ Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I);
+ U.set(BackToBool);
+
+ return true;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char PPCBoolRetToInt::ID = 0;
+INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
+ "Convert i1 constants to i32 if they are returned",
+ false, false)
+
+FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
new file mode 100644
index 0000000..73a5305
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -0,0 +1,237 @@
+//===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 16 bits of displacement to reach their
+// target basic block. It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-branch-select"
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace llvm {
+ void initializePPCBSelPass(PassRegistry&);
+}
+
+namespace {
+ struct PPCBSel : public MachineFunctionPass {
+ static char ID;
+ PPCBSel() : MachineFunctionPass(ID) {
+ initializePPCBSelPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// BlockSizes - The sizes of the basic blocks in the function.
+ std::vector<unsigned> BlockSizes;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ const char *getPassName() const override {
+ return "PowerPC Branch Selector";
+ }
+ };
+ char PPCBSel::ID = 0;
+}
+
+INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
+ false, false)
+
+/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createPPCBranchSelectionPass() {
+ return new PPCBSel();
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+
+ auto GetAlignmentAdjustment =
+ [TII](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
+ unsigned Align = MBB.getAlignment();
+ if (!Align)
+ return 0;
+
+ unsigned AlignAmt = 1 << Align;
+ unsigned ParentAlign = MBB.getParent()->getAlignment();
+
+ if (Align <= ParentAlign)
+ return OffsetToAlignment(Offset, AlignAmt);
+
+ // The alignment of this MBB is larger than the function's alignment, so we
+ // can't tell whether or not it will insert nops. Assume that it will.
+ return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
+ };
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = 0;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = &*MFI;
+
+ // The end of the previous block may have extra nops if this block has an
+ // alignment requirement.
+ if (MBB->getNumber() > 0) {
+ unsigned AlignExtra = GetAlignmentAdjustment(*MBB, FuncSize);
+ BlockSizes[MBB->getNumber()-1] += AlignExtra;
+ FuncSize += AlignExtra;
+ }
+
+ unsigned BlockSize = 0;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI)
+ BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+ BlockSizes[MBB->getNumber()] = BlockSize;
+ FuncSize += BlockSize;
+ }
+
+ // If the entire function is smaller than the displacement of a branch field,
+ // we know we don't need to shrink any branches in this function. This is a
+ // common case.
+ if (FuncSize < (1 << 15)) {
+ BlockSizes.clear();
+ return false;
+ }
+
+ // For each conditional branch, if the offset to its destination is larger
+ // than the offset field allows, transform it into a long branch sequence
+ // like this:
+ // short branch:
+ // bCC MBB
+ // long branch:
+ // b!CC $PC+8
+ // b MBB
+ //
+ bool MadeChange = true;
+ bool EverMadeChange = false;
+ while (MadeChange) {
+ // Iteratively expand branches until we reach a fixed point.
+ MadeChange = false;
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ unsigned MBBStartOffset = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineBasicBlock *Dest = nullptr;
+ if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
+ Dest = I->getOperand(2).getMBB();
+ else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) &&
+ !I->getOperand(1).isImm())
+ Dest = I->getOperand(1).getMBB();
+ else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
+ I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) &&
+ !I->getOperand(0).isImm())
+ Dest = I->getOperand(0).getMBB();
+
+ if (!Dest) {
+ MBBStartOffset += TII->GetInstSizeInBytes(I);
+ continue;
+ }
+
+ // Determine the offset from the current branch to the destination
+ // block.
+ int BranchSize;
+ if (Dest->getNumber() <= MBB.getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = MBBStartOffset;
+
+ for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ BranchSize = -MBBStartOffset;
+
+ for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ }
+
+ // If this branch is in range, ignore it.
+ if (isInt<16>(BranchSize)) {
+ MBBStartOffset += 4;
+ continue;
+ }
+
+ // Otherwise, we have to expand it to a long branch.
+ MachineInstr *OldBranch = I;
+ DebugLoc dl = OldBranch->getDebugLoc();
+
+ if (I->getOpcode() == PPC::BCC) {
+ // The BCC operands are:
+ // 0. PPC branch predicate
+ // 1. CR register
+ // 2. Target MBB
+ PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+ unsigned CRReg = I->getOperand(1).getReg();
+
+ // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+ BuildMI(MBB, I, dl, TII->get(PPC::BCC))
+ .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+ } else if (I->getOpcode() == PPC::BC) {
+ unsigned CRBit = I->getOperand(0).getReg();
+ BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2);
+ } else if (I->getOpcode() == PPC::BCn) {
+ unsigned CRBit = I->getOperand(0).getReg();
+ BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2);
+ } else if (I->getOpcode() == PPC::BDNZ) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDNZ8) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDZ) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDZ8) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2);
+ } else {
+ llvm_unreachable("Unhandled branch type!");
+ }
+
+ // Uncond branch to the real destination.
+ I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
+
+ // Remove the old branch from the function.
+ OldBranch->eraseFromParent();
+
+ // Remember that this instruction is 8-bytes, increase the size of the
+ // block by 4, remember to iterate.
+ BlockSizes[MBB.getNumber()] += 4;
+ MBBStartOffset += 8;
+ ++NumExpanded;
+ MadeChange = true;
+ }
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ BlockSizes.clear();
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
new file mode 100644
index 0000000..b6ac4d5
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -0,0 +1,697 @@
+//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the PPC branch instructions
+// that decrement and test the count register (CTR) (bdnz and friends).
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+//
+// Criteria for CTR loops:
+// - Countable loops (w/ ind. var for a trip count)
+// - Try inner-most loops first
+// - No nested CTR loops.
+// - No function calls in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#ifndef NDEBUG
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#endif
+
+#include <algorithm>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ctrloops"
+
+#ifndef NDEBUG
+static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
+#endif
+
+STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
+
+namespace llvm {
+ void initializePPCCTRLoopsPass(PassRegistry&);
+#ifndef NDEBUG
+ void initializePPCCTRLoopsVerifyPass(PassRegistry&);
+#endif
+}
+
+namespace {
+ struct PPCCTRLoops : public FunctionPass {
+
+#ifndef NDEBUG
+ static int Counter;
+#endif
+
+ public:
+ static char ID;
+
+ PPCCTRLoops() : FunctionPass(ID), TM(nullptr) {
+ initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+ }
+ PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
+ initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ }
+
+ private:
+ bool mightUseCTR(const Triple &TT, BasicBlock *BB);
+ bool convertToCTRLoop(Loop *L);
+
+ private:
+ PPCTargetMachine *TM;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ const DataLayout *DL;
+ DominatorTree *DT;
+ const TargetLibraryInfo *LibInfo;
+ bool PreserveLCSSA;
+ };
+
+ char PPCCTRLoops::ID = 0;
+#ifndef NDEBUG
+ int PPCCTRLoops::Counter = 0;
+#endif
+
+#ifndef NDEBUG
+ struct PPCCTRLoopsVerify : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
+ initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ MachineDominatorTree *MDT;
+ };
+
+ char PPCCTRLoopsVerify::ID = 0;
+#endif // NDEBUG
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
+
+FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
+ return new PPCCTRLoops(TM);
+}
+
+#ifndef NDEBUG
+INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
+ "PowerPC CTR Loops Verify", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
+ "PowerPC CTR Loops Verify", false, false)
+
+FunctionPass *llvm::createPPCCTRLoopsVerify() {
+ return new PPCCTRLoopsVerify();
+}
+#endif // NDEBUG
+
+bool PPCCTRLoops::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DL = &F.getParent()->getDataLayout();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ bool MadeChange = false;
+
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end();
+ I != E; ++I) {
+ Loop *L = *I;
+ if (!L->getParentLoop())
+ MadeChange |= convertToCTRLoop(L);
+ }
+
+ return MadeChange;
+}
+
+static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
+
+ return false;
+}
+
+// Determining the address of a TLS variable results in a function call in
+// certain TLS models.
+static bool memAddrUsesCTR(const PPCTargetMachine *TM,
+ const Value *MemAddr) {
+ const auto *GV = dyn_cast<GlobalValue>(MemAddr);
+ if (!GV) {
+ // Recurse to check for constants that refer to TLS global variables.
+ if (const auto *CV = dyn_cast<Constant>(MemAddr))
+ for (const auto &CO : CV->operands())
+ if (memAddrUsesCTR(TM, CO))
+ return true;
+
+ return false;
+ }
+
+ if (!GV->isThreadLocal())
+ return false;
+ if (!TM)
+ return true;
+ TLSModel::Model Model = TM->getTLSModel(GV);
+ return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
+}
+
+bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
+ for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
+ J != JE; ++J) {
+ if (CallInst *CI = dyn_cast<CallInst>(J)) {
+ if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
+ // Inline ASM is okay, unless it clobbers the ctr register.
+ InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
+ for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
+ InlineAsm::ConstraintInfo &C = CIV[i];
+ if (C.Type != InlineAsm::isInput)
+ for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
+ if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
+ return true;
+ }
+
+ continue;
+ }
+
+ if (!TM)
+ return true;
+ const TargetLowering *TLI =
+ TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
+
+ if (Function *F = CI->getCalledFunction()) {
+ // Most intrinsics don't become function calls, but some might.
+ // sin, cos, exp and log are always calls.
+ unsigned Opcode;
+ if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
+ switch (F->getIntrinsicID()) {
+ default: continue;
+ // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
+ // we're definitely using CTR.
+ case Intrinsic::ppc_is_decremented_ctr_nonzero:
+ case Intrinsic::ppc_mtctr:
+ return true;
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::setjmp:
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::longjmp:
+
+ // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
+ // because, although it does clobber the counter register, the
+ // control can't then return to inside the loop unless there is also
+ // an eh_sjlj_setjmp.
+ case Intrinsic::eh_sjlj_setjmp:
+
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ case Intrinsic::powi:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::pow:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ return true;
+ case Intrinsic::copysign:
+ if (CI->getArgOperand(0)->getType()->getScalarType()->
+ isPPC_FP128Ty())
+ return true;
+ else
+ continue; // ISD::FCOPYSIGN is never a library call.
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
+ }
+ }
+
+ // PowerPC does not use [US]DIVREM or other library calls for
+ // operations on regular types which are not otherwise library calls
+ // (i.e. soft float or atomics). If adapting for targets that do,
+ // additional care is required here.
+
+ LibFunc::Func Func;
+ if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ // Non-read-only functions are never treated as intrinsics.
+ if (!CI->onlyReadsMemory())
+ return true;
+
+ // Conversion happens only for FP calls.
+ if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
+ return true;
+
+ switch (Func) {
+ default: return true;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ continue; // ISD::FCOPYSIGN is never a library call.
+ case LibFunc::copysignl:
+ return true;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ continue; // ISD::FABS is never a library call.
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ case LibFunc::sqrtl:
+ Opcode = ISD::FSQRT; break;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ Opcode = ISD::FFLOOR; break;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ Opcode = ISD::FNEARBYINT; break;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ Opcode = ISD::FCEIL; break;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ Opcode = ISD::FRINT; break;
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ Opcode = ISD::FROUND; break;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ Opcode = ISD::FTRUNC; break;
+ }
+
+ auto &DL = CI->getModule()->getDataLayout();
+ MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(),
+ true);
+ if (VTy == MVT::Other)
+ return true;
+
+ if (TLI->isOperationLegalOrCustom(Opcode, VTy))
+ continue;
+ else if (VTy.isVector() &&
+ TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
+ continue;
+
+ return true;
+ }
+ }
+
+ return true;
+ } else if (isa<BinaryOperator>(J) &&
+ J->getType()->getScalarType()->isPPC_FP128Ty()) {
+ // Most operations on ppc_f128 values become calls.
+ return true;
+ } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
+ isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
+ CastInst *CI = cast<CastInst>(J);
+ if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
+ CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
+ return true;
+ } else if (isLargeIntegerTy(TT.isArch32Bit(),
+ J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::UDiv ||
+ J->getOpcode() == Instruction::SDiv ||
+ J->getOpcode() == Instruction::URem ||
+ J->getOpcode() == Instruction::SRem)) {
+ return true;
+ } else if (TT.isArch32Bit() &&
+ isLargeIntegerTy(false, J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::Shl ||
+ J->getOpcode() == Instruction::AShr ||
+ J->getOpcode() == Instruction::LShr)) {
+ // Only on PPC32, for 128-bit integers (specifically not 64-bit
+ // integers), these might be runtime calls.
+ return true;
+ } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
+ // On PowerPC, indirect jumps use the counter register.
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
+ if (!TM)
+ return true;
+ const TargetLowering *TLI =
+ TM->getSubtargetImpl(*BB->getParent())->getTargetLowering();
+
+ if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
+ return true;
+ }
+ for (Value *Operand : J->operands())
+ if (memAddrUsesCTR(TM, Operand))
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
+ bool MadeChange = false;
+
+ const Triple TT =
+ Triple(L->getHeader()->getParent()->getParent()->getTargetTriple());
+ if (!TT.isArch32Bit() && !TT.isArch64Bit())
+ return MadeChange; // Unknown arch. type.
+
+ // Process nested loops first.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ MadeChange |= convertToCTRLoop(*I);
+ DEBUG(dbgs() << "Nested loop converted\n");
+ }
+
+ // If a nested loop has been converted, then we can't convert this loop.
+ if (MadeChange)
+ return MadeChange;
+
+#ifndef NDEBUG
+ // Stop trying after reaching the limit (if any).
+ int Limit = CTRLoopLimit;
+ if (Limit >= 0) {
+ if (Counter >= CTRLoopLimit)
+ return false;
+ Counter++;
+ }
+#endif
+
+ // We don't want to spill/restore the counter register, and so we don't
+ // want to use the counter register if the loop contains calls.
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I)
+ if (mightUseCTR(TT, *I))
+ return MadeChange;
+
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ BasicBlock *CountedExitBlock = nullptr;
+ const SCEV *ExitCount = nullptr;
+ BranchInst *CountedExitBranch = nullptr;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ IE = ExitingBlocks.end(); I != IE; ++I) {
+ const SCEV *EC = SE->getExitCount(L, *I);
+ DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
+ (*I)->getName() << ": " << *EC << "\n");
+ if (isa<SCEVCouldNotCompute>(EC))
+ continue;
+ if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
+ if (ConstEC->getValue()->isZero())
+ continue;
+ } else if (!SE->isLoopInvariant(EC, L))
+ continue;
+
+ if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
+ continue;
+
+ // We now have a loop-invariant count of loop iterations (which is not the
+ // constant zero) for which we know that this loop will not exit via this
+ // exisiting block.
+
+ // We need to make sure that this block will run on every loop iteration.
+ // For this to be true, we must dominate all blocks with backedges. Such
+ // blocks are in-loop predecessors to the header block.
+ bool NotAlways = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
+ if (!L->contains(*PI))
+ continue;
+
+ if (!DT->dominates(*I, *PI)) {
+ NotAlways = true;
+ break;
+ }
+ }
+
+ if (NotAlways)
+ continue;
+
+ // Make sure this blocks ends with a conditional branch.
+ Instruction *TI = (*I)->getTerminator();
+ if (!TI)
+ continue;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (!BI->isConditional())
+ continue;
+
+ CountedExitBranch = BI;
+ } else
+ continue;
+
+ // Note that this block may not be the loop latch block, even if the loop
+ // has a latch block.
+ CountedExitBlock = *I;
+ ExitCount = EC;
+ break;
+ }
+
+ if (!CountedExitBlock)
+ return MadeChange;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+
+ // If we don't have a preheader, then insert one. If we already have a
+ // preheader, then we can use it (except if the preheader contains a use of
+ // the CTR register because some such uses might be reordered by the
+ // selection DAG after the mtctr instruction).
+ if (!Preheader || mightUseCTR(TT, Preheader))
+ Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+ if (!Preheader)
+ return MadeChange;
+
+ DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
+
+ // Insert the count into the preheader and replace the condition used by the
+ // selected branch.
+ MadeChange = true;
+
+ SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt");
+ LLVMContext &C = SE->getContext();
+ Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
+ Type::getInt32Ty(C);
+ if (!ExitCount->getType()->isPointerTy() &&
+ ExitCount->getType() != CountType)
+ ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
+ ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
+ Value *ECValue =
+ SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
+
+ IRBuilder<> CountBuilder(Preheader->getTerminator());
+ Module *M = Preheader->getParent()->getParent();
+ Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
+ CountType);
+ CountBuilder.CreateCall(MTCTRFunc, ECValue);
+
+ IRBuilder<> CondBuilder(CountedExitBranch);
+ Value *DecFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
+ Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
+ Value *OldCond = CountedExitBranch->getCondition();
+ CountedExitBranch->setCondition(NewCond);
+
+ // The false branch must exit the loop.
+ if (!L->contains(CountedExitBranch->getSuccessor(0)))
+ CountedExitBranch->swapSuccessors();
+
+ // The old condition may be dead now, and may have even created a dead PHI
+ // (the original induction variable).
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ DeleteDeadPHIs(CountedExitBlock);
+
+ ++NumCTRLoops;
+ return MadeChange;
+}
+
+#ifndef NDEBUG
+static bool clobbersCTR(const MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
+ return true;
+ } else if (MO.isRegMask()) {
+ if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool verifyCTRBranch(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator BI = I;
+ SmallSet<MachineBasicBlock *, 16> Visited;
+ SmallVector<MachineBasicBlock *, 8> Preds;
+ bool CheckPreds;
+
+ if (I == MBB->begin()) {
+ Visited.insert(MBB);
+ goto queue_preds;
+ } else
+ --I;
+
+check_block:
+ Visited.insert(MBB);
+ if (I == MBB->end())
+ goto queue_preds;
+
+ CheckPreds = true;
+ for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
+ unsigned Opc = I->getOpcode();
+ if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
+ CheckPreds = false;
+ break;
+ }
+
+ if (I != BI && clobbersCTR(I)) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" <<
+ MBB->getFullName() << ") instruction " << *I <<
+ " clobbers CTR, invalidating " << "BB#" <<
+ BI->getParent()->getNumber() << " (" <<
+ BI->getParent()->getFullName() << ") instruction " <<
+ *BI << "\n");
+ return false;
+ }
+
+ if (I == IE)
+ break;
+ }
+
+ if (!CheckPreds && Preds.empty())
+ return true;
+
+ if (CheckPreds) {
+queue_preds:
+ if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
+ DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" <<
+ BI->getParent()->getNumber() << " (" <<
+ BI->getParent()->getFullName() << ") instruction " <<
+ *BI << "\n");
+ return false;
+ }
+
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PIE = MBB->pred_end(); PI != PIE; ++PI)
+ Preds.push_back(*PI);
+ }
+
+ do {
+ MBB = Preds.pop_back_val();
+ if (!Visited.count(MBB)) {
+ I = MBB->getLastNonDebugInstr();
+ goto check_block;
+ }
+ } while (!Preds.empty());
+
+ return true;
+}
+
+bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
+ MDT = &getAnalysis<MachineDominatorTree>();
+
+ // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
+ // any other instructions that might clobber the ctr register.
+ for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
+ I != IE; ++I) {
+ MachineBasicBlock *MBB = &*I;
+ if (!MDT->isReachableFromEntry(MBB))
+ continue;
+
+ for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
+ MIIE = MBB->end(); MII != MIIE; ++MII) {
+ unsigned Opc = MII->getOpcode();
+ if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
+ Opc == PPC::BDZ8 || Opc == PPC::BDZ)
+ if (!verifyCTRBranch(MBB, MII))
+ llvm_unreachable("Invalid PPC CTR loop!");
+ }
+ }
+
+ return false;
+}
+#endif // NDEBUG
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h
new file mode 100644
index 0000000..eb904a8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.h
@@ -0,0 +1,35 @@
+//=== PPCCallingConv.h - PPC Custom Calling Convention Routines -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the PPC Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H
+#define LLVM_LIB_TARGET_PPC_PPCCALLINGCONV_H
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
+ CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+ CCState &) {
+ llvm_unreachable("The AnyReg calling convention is only supported by the " \
+ "stackmap and patchpoint intrinsics.");
+ // gracefully fallback to PPC C calling convention on Release builds.
+ return false;
+}
+
+} // End llvm namespace
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
new file mode 100644
index 0000000..5bc9124
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -0,0 +1,265 @@
+//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PowerPC 32- and 64-bit
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("static_cast<const PPCSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).",
+ F),
+ A>;
+class CCIfNotSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("!static_cast<const PPCSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).",
+ F),
+ A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// PPC64 AnyReg return-value convention. No explicit register is specified for
+// the return-value. The register allocator is allowed and expected to choose
+// any free register.
+//
+// This calling convention is currently only supported by the stackmap and
+// patchpoint intrinsics. All other uses will result in an assert on Debug
+// builds. On Release builds we fallback to the PPC C calling convention.
+def RetCC_PPC64_AnyReg : CallingConv<[
+ CCCustom<"CC_PPC_AnyReg_Error">
+]>;
+
+// Return-value convention for PowerPC
+def RetCC_PPC : CallingConv<[
+ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
+
+ // On PPC64, integer return values are always promoted to i64
+ CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
+ CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>,
+
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+
+ // Floating point types returned as "direct" go into F1 .. F8; note that
+ // only the ELFv2 ABI fully utilizes all these registers.
+ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+ // QPX vectors are returned in QF1 and QF2.
+ CCIfType<[v4f64, v4f32, v4i1],
+ CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
+
+ // Vector types returned as "direct" go into V2 .. V9; note that only the
+ // ELFv2 ABI fully utilizes all these registers.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
+ CCIfSubtarget<"hasAltivec()",
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+ CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
+]>;
+
+// No explicit register is specified for the AnyReg calling convention. The
+// register allocator may assign the arguments to any free register.
+//
+// This calling convention is currently only supported by the stackmap and
+// patchpoint intrinsics. All other uses will result in an assert on Debug
+// builds. On Release builds we fallback to the PPC C calling convention.
+def CC_PPC64_AnyReg : CallingConv<[
+ CCCustom<"CC_PPC_AnyReg_Error">
+]>;
+
+// Note that we don't currently have calling conventions for 64-bit
+// PowerPC, but handle all the complexities of the ABI in the lowering
+// logic. FIXME: See if the logic can be simplified with use of CCs.
+// This may require some extensions to current table generation.
+
+// Simple calling convention for 64-bit ELF PowerPC fast isel.
+// Only handle ints and floats. All ints are promoted to i64.
+// Vector types and quadword ints are not handled.
+def CC_PPC64_ELF_FIS : CallingConv<[
+ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>,
+
+ CCIfType<[i1], CCPromoteToType<i64>>,
+ CCIfType<[i8], CCPromoteToType<i64>>,
+ CCIfType<[i16], CCPromoteToType<i64>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>
+]>;
+
+// Simple return-value convention for 64-bit ELF PowerPC fast isel.
+// All small ints are promoted to i64. Vector types, quadword ints,
+// and multiple register returns are "supported" to avoid compile
+// errors, but none are handled by the fast selector.
+def RetCC_PPC64_ELF_FIS : CallingConv<[
+ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
+
+ CCIfType<[i1], CCPromoteToType<i64>>,
+ CCIfType<[i8], CCPromoteToType<i64>>,
+ CCIfType<[i16], CCPromoteToType<i64>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[v4f64, v4f32, v4i1],
+ CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
+ CCIfSubtarget<"hasAltivec()",
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+ CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC System V Release 4 32-bit ABI
+//===----------------------------------------------------------------------===//
+
+def CC_PPC32_SVR4_Common : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i32>>,
+
+ // The ABI requires i64 to be passed in two adjacent registers with the first
+ // register having an odd register number.
+ CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
+
+ // The 'nest' parameter, if any, is passed in R11.
+ CCIfNest<CCAssignToReg<[R11]>>,
+
+ // The first 8 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+
+ // Make sure the i64 words from a long double are either both passed in
+ // registers or both passed on the stack.
+ CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>,
+
+ // FP values are passed in F1 - F8.
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+ // Split arguments have an alignment of 8 bytes on the stack.
+ CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+
+ // Floats are stored in double precision format, thus they have the same
+ // alignment and size as doubles.
+ CCIfType<[f32,f64], CCAssignToStack<8, 8>>,
+
+ // QPX vectors that are stored in double precision need 32-byte alignment.
+ CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>
+]>;
+
+// This calling convention puts vector arguments always on the stack. It is used
+// to assign vector arguments which belong to the variable portion of the
+// parameter list of a variable argument function.
+def CC_PPC32_SVR4_VarArg : CallingConv<[
+ CCDelegateTo<CC_PPC32_SVR4_Common>
+]>;
+
+// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
+// put vector arguments in vector registers before putting them on the stack.
+def CC_PPC32_SVR4 : CallingConv<[
+ // QPX vectors mirror the scalar FP convention.
+ CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
+ CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
+
+ // The first 12 Vector arguments are passed in AltiVec registers.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
+ CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
+ V8, V9, V10, V11, V12, V13]>>>,
+ CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
+ VSH10, VSH11, VSH12, VSH13]>>>,
+
+ CCDelegateTo<CC_PPC32_SVR4_Common>
+]>;
+
+// Helper "calling convention" to handle aggregate by value arguments.
+// Aggregate by value arguments are always placed in the local variable space
+// of the caller. This calling convention is only used to assign those stack
+// offsets in the callers stack frame.
+//
+// Still, the address of the aggregate copy in the callers stack frame is passed
+// in a GPR (or in the parameter list area if all GPRs are allocated) from the
+// caller to the callee. The location for the address argument is assigned by
+// the CC_PPC32_SVR4 calling convention.
+//
+// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
+// not passed by value.
+
+def CC_PPC32_SVR4_ByVal : CallingConv<[
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ CCCustom<"CC_PPC32_SVR4_Custom_Dummy">
+]>;
+
+def CSR_Altivec : CalleeSavedRegs<(add V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
+def CSR_Darwin32_Altivec : CalleeSavedRegs<(add CSR_Darwin32, CSR_Altivec)>;
+
+def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
+def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>;
+
+def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
+def CSR_Darwin64_Altivec : CalleeSavedRegs<(add CSR_Darwin64, CSR_Altivec)>;
+
+def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
+def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
+
+def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>;
+
+def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>;
+
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10),
+ (sequence "X%u", 14, 31),
+ (sequence "F%u", 0, 31),
+ (sequence "CR%u", 0, 7))>;
+
+def CSR_64_AllRegs_Altivec : CalleeSavedRegs<(add CSR_64_AllRegs,
+ (sequence "V%u", 0, 31))>;
+
+def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
+ (sequence "VSL%u", 0, 31),
+ (sequence "VSH%u", 0, 31))>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
new file mode 100644
index 0000000..7cb1bb5
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -0,0 +1,207 @@
+//===------------- PPCEarlyReturn.cpp - Form Early Returns ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that form early (predicated) returns. If-conversion handles some of
+// this, but this pass picks up some remaining cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-early-ret"
+STATISTIC(NumBCLR, "Number of early conditional returns");
+STATISTIC(NumBLR, "Number of early returns");
+
+namespace llvm {
+ void initializePPCEarlyReturnPass(PassRegistry&);
+}
+
+namespace {
+ // PPCEarlyReturn pass - For simple functions without epilogue code, move
+ // returns up, and create conditional returns, to avoid unnecessary
+ // branch-to-blr sequences.
+ struct PPCEarlyReturn : public MachineFunctionPass {
+ static char ID;
+ PPCEarlyReturn() : MachineFunctionPass(ID) {
+ initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
+ }
+
+ const TargetInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &ReturnMBB) {
+ bool Changed = false;
+
+ MachineBasicBlock::iterator I = ReturnMBB.begin();
+ I = ReturnMBB.SkipPHIsAndLabels(I);
+
+ // The block must be essentially empty except for the blr.
+ if (I == ReturnMBB.end() ||
+ (I->getOpcode() != PPC::BLR && I->getOpcode() != PPC::BLR8) ||
+ I != ReturnMBB.getLastNonDebugInstr())
+ return Changed;
+
+ SmallVector<MachineBasicBlock*, 8> PredToRemove;
+ for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
+ PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
+ bool OtherReference = false, BlockChanged = false;
+
+ if ((*PI)->empty())
+ continue;
+
+ for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
+ if (J == (*PI)->end())
+ break;
+
+ if (J->getOpcode() == PPC::B) {
+ if (J->getOperand(0).getMBB() == &ReturnMBB) {
+ // This is an unconditional branch to the return. Replace the
+ // branch with a blr.
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()))
+ .copyImplicitOps(I);
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBLR;
+ continue;
+ }
+ } else if (J->getOpcode() == PPC::BCC) {
+ if (J->getOperand(2).getMBB() == &ReturnMBB) {
+ // This is a conditional branch to the return. Replace the branch
+ // with a bclr.
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
+ .addImm(J->getOperand(0).getImm())
+ .addReg(J->getOperand(1).getReg())
+ .copyImplicitOps(I);
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBCLR;
+ continue;
+ }
+ } else if (J->getOpcode() == PPC::BC || J->getOpcode() == PPC::BCn) {
+ if (J->getOperand(1).getMBB() == &ReturnMBB) {
+ // This is a conditional branch to the return. Replace the branch
+ // with a bclr.
+ BuildMI(
+ **PI, J, J->getDebugLoc(),
+ TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn))
+ .addReg(J->getOperand(0).getReg())
+ .copyImplicitOps(I);
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBCLR;
+ continue;
+ }
+ } else if (J->isBranch()) {
+ if (J->isIndirectBranch()) {
+ if (ReturnMBB.hasAddressTaken())
+ OtherReference = true;
+ } else
+ for (unsigned i = 0; i < J->getNumOperands(); ++i)
+ if (J->getOperand(i).isMBB() &&
+ J->getOperand(i).getMBB() == &ReturnMBB)
+ OtherReference = true;
+ } else if (!J->isTerminator() && !J->isDebugValue())
+ break;
+
+ if (J == (*PI)->begin())
+ break;
+
+ --J;
+ }
+
+ if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
+ OtherReference = true;
+
+ // Predecessors are stored in a vector and can't be removed here.
+ if (!OtherReference && BlockChanged) {
+ PredToRemove.push_back(*PI);
+ }
+
+ if (BlockChanged)
+ Changed = true;
+ }
+
+ for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
+ PredToRemove[i]->removeSuccessor(&ReturnMBB, true);
+
+ if (Changed && !ReturnMBB.hasAddressTaken()) {
+ // We now might be able to merge this blr-only block into its
+ // by-layout predecessor.
+ if (ReturnMBB.pred_size() == 1) {
+ MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
+ if (PrevMBB.isLayoutSuccessor(&ReturnMBB) && PrevMBB.canFallThrough()) {
+ // Move the blr into the preceding block.
+ PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
+ PrevMBB.removeSuccessor(&ReturnMBB, true);
+ }
+ }
+
+ if (ReturnMBB.pred_empty())
+ ReturnMBB.eraseFromParent();
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TII = MF.getSubtarget().getInstrInfo();
+
+ bool Changed = false;
+
+ // If the function does not have at least two blocks, then there is
+ // nothing to do.
+ if (MF.size() < 2)
+ return Changed;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
+ "PowerPC Early-Return Creation", false, false)
+
+char PPCEarlyReturn::ID = 0;
+FunctionPass*
+llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
new file mode 100644
index 0000000..b451ebf
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -0,0 +1,2344 @@
+//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// PPCGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCCallingConv.h"
+#include "PPCISelLowering.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+//===----------------------------------------------------------------------===//
+//
+// TBD:
+// fastLowerArguments: Handle simple cases.
+// PPCMaterializeGV: Handle TLS.
+// SelectCall: Handle function pointers.
+// SelectCall: Handle multi-register return values.
+// SelectCall: Optimize away nops for local calls.
+// processCallArgs: Handle bit-converted arguments.
+// finishCall: Handle multi-register return values.
+// PPCComputeAddress: Handle parameter references as FrameIndex's.
+// PPCEmitCmp: Handle immediate as operand 1.
+// SelectCall: Handle small byval arguments.
+// SelectIntrinsicCall: Implement.
+// SelectSelect: Implement.
+// Consider factoring isTypeLegal into the base class.
+// Implement switches and jump tables.
+//
+//===----------------------------------------------------------------------===//
+using namespace llvm;
+
+#define DEBUG_TYPE "ppcfastisel"
+
+namespace {
+
+typedef struct Address {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
+
+ long Offset;
+
+ // Innocuous defaults for our address.
+ Address()
+ : BaseType(RegBase), Offset(0) {
+ Base.Reg = 0;
+ }
+} Address;
+
+class PPCFastISel final : public FastISel {
+
+ const TargetMachine &TM;
+ const PPCSubtarget *PPCSubTarget;
+ PPCFunctionInfo *PPCFuncInfo;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ LLVMContext *Context;
+
+ public:
+ explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo)
+ : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
+ PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
+ PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
+ TII(*PPCSubTarget->getInstrInfo()),
+ TLI(*PPCSubTarget->getTargetLowering()),
+ Context(&FuncInfo.Fn->getContext()) {}
+
+ // Backend specific FastISel code.
+ private:
+ bool fastSelectInstruction(const Instruction *I) override;
+ unsigned fastMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
+ bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) override;
+ bool fastLowerArguments() override;
+ unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
+ unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ unsigned fastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+
+ bool fastLowerCall(CallLoweringInfo &CLI) override;
+
+ // Instruction selection routines.
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectIndirectBr(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectIToFP(const Instruction *I, bool IsSigned);
+ bool SelectFPToI(const Instruction *I, bool IsSigned);
+ bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectRet(const Instruction *I);
+ bool SelectTrunc(const Instruction *I);
+ bool SelectIntExt(const Instruction *I);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool isValueAvailable(const Value *V) const;
+ bool isVSFRCRegister(unsigned Register) const {
+ return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
+ }
+ bool isVSSRCRegister(unsigned Register) const {
+ return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID;
+ }
+ bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt, unsigned DestReg);
+ bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ const TargetRegisterClass *RC, bool IsZExt = true,
+ unsigned FP64LoadOpc = PPC::LFD);
+ bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
+ bool PPCComputeAddress(const Value *Obj, Address &Addr);
+ void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
+ unsigned &IndexReg);
+ bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt);
+ unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
+ unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
+ bool UseSExt = true);
+ unsigned PPCMaterialize32BitInt(int64_t Imm,
+ const TargetRegisterClass *RC);
+ unsigned PPCMaterialize64BitInt(int64_t Imm,
+ const TargetRegisterClass *RC);
+ unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned);
+ unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
+
+ // Call handling routines.
+ private:
+ bool processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg);
+ bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
+ CCAssignFn *usePPC32CCs(unsigned Flag);
+
+ private:
+ #include "PPCGenFastISel.inc"
+
+};
+
+} // end anonymous namespace
+
+#include "PPCGenCallingConv.inc"
+
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
+ if (Flag == 1)
+ return CC_PPC32_SVR4;
+ else if (Flag == 2)
+ return CC_PPC32_SVR4_ByVal;
+ else if (Flag == 3)
+ return CC_PPC32_SVR4_VarArg;
+ else
+ return RetCC_PPC;
+}
+
+static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // These are not representable with any single compare.
+ case CmpInst::FCMP_FALSE:
+ case CmpInst::FCMP_UEQ:
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_ULT:
+ case CmpInst::FCMP_ULE:
+ case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_TRUE:
+ default:
+ return Optional<PPC::Predicate>();
+
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::ICMP_EQ:
+ return PPC::PRED_EQ;
+
+ case CmpInst::FCMP_OGT:
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_SGT:
+ return PPC::PRED_GT;
+
+ case CmpInst::FCMP_OGE:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGE:
+ return PPC::PRED_GE;
+
+ case CmpInst::FCMP_OLT:
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_SLT:
+ return PPC::PRED_LT;
+
+ case CmpInst::FCMP_OLE:
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SLE:
+ return PPC::PRED_LE;
+
+ case CmpInst::FCMP_ONE:
+ case CmpInst::ICMP_NE:
+ return PPC::PRED_NE;
+
+ case CmpInst::FCMP_ORD:
+ return PPC::PRED_NU;
+
+ case CmpInst::FCMP_UNO:
+ return PPC::PRED_UN;
+ }
+}
+
+// Determine whether the type Ty is simple enough to be handled by
+// fast-isel, and return its equivalent machine type in VT.
+// FIXME: Copied directly from ARM -- factor into base class?
+bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
+ EVT Evt = TLI.getValueType(DL, Ty, true);
+
+ // Only handle simple types.
+ if (Evt == MVT::Other || !Evt.isSimple()) return false;
+ VT = Evt.getSimpleVT();
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+// Determine whether the type Ty is simple enough to be handled by
+// fast-isel as a load target, and return its equivalent machine type in VT.
+bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCFastISel::isValueAvailable(const Value *V) const {
+ if (!isa<Instruction>(V))
+ return true;
+
+ const auto *I = cast<Instruction>(V);
+ return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
+}
+
+// Given a value Obj, create an Address object Addr that represents its
+// address. Return false if we can't handle it.
+bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
+ const User *U = nullptr;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast:
+ // Look through bitcasts.
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::GetElementPtr: {
+ Address SavedAddr = Addr;
+ long TmpOffset = Addr.Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
+ II != IE; ++II, ++GTI) {
+ const Value *Op = *II;
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ break;
+ }
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ // Unsupported
+ goto unsupported_gep;
+ }
+ }
+ }
+
+ // Try to grab the base operand now.
+ Addr.Offset = TmpOffset;
+ if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
+
+ // We failed, restore everything and try the other options.
+ Addr = SavedAddr;
+
+ unsupported_gep:
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // FIXME: References to parameters fall through to the behavior
+ // below. They should be able to reference a frame index since
+ // they are stored to the stack, so we can get "ld rx, offset(r1)"
+ // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
+ // just contain the parameter. Try to handle this with a FI.
+
+ // Try to get this in a register if nothing else has worked.
+ if (Addr.Base.Reg == 0)
+ Addr.Base.Reg = getRegForValue(Obj);
+
+ // Prevent assignment of base register to X0, which is inappropriate
+ // for loads and stores alike.
+ if (Addr.Base.Reg != 0)
+ MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ return Addr.Base.Reg != 0;
+}
+
+// Fix up some addresses that can't be used directly. For example, if
+// an offset won't fit in an instruction field, we may need to move it
+// into an index register.
+void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
+ unsigned &IndexReg) {
+
+ // Check whether the offset fits in the instruction field.
+ if (!isInt<16>(Addr.Offset))
+ UseOffset = false;
+
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
+ unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
+ ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
+ if (!UseOffset) {
+ IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context)
+ : Type::getInt64Ty(*Context));
+ const ConstantInt *Offset =
+ ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
+ IndexReg = PPCMaterializeInt(Offset, MVT::i64);
+ assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
+ }
+}
+
+// Emit a load instruction if possible, returning true if we succeeded,
+// otherwise false. See commentary below for how the register class of
+// the load is determined.
+bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ const TargetRegisterClass *RC,
+ bool IsZExt, unsigned FP64LoadOpc) {
+ unsigned Opc;
+ bool UseOffset = true;
+
+ // If ResultReg is given, it determines the register class of the load.
+ // Otherwise, RC is the register class to use. If the result of the
+ // load isn't anticipated in this block, both may be zero, in which
+ // case we must make a conservative guess. In particular, don't assign
+ // R0 or X0 to the result register, as the result may be used in a load,
+ // store, add-immediate, or isel that won't permit this. (Though
+ // perhaps the spill and reload of live-exit values would handle this?)
+ const TargetRegisterClass *UseRC =
+ (ResultReg ? MRI.getRegClass(ResultReg) :
+ (RC ? RC :
+ (VT == MVT::f64 ? &PPC::F8RCRegClass :
+ (VT == MVT::f32 ? &PPC::F4RCRegClass :
+ (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
+ &PPC::GPRC_and_GPRC_NOR0RegClass)))));
+
+ bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ switch (VT.SimpleTy) {
+ default: // e.g., vector types not handled
+ return false;
+ case MVT::i8:
+ Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
+ break;
+ case MVT::i16:
+ Opc = (IsZExt ?
+ (Is32BitInt ? PPC::LHZ : PPC::LHZ8) :
+ (Is32BitInt ? PPC::LHA : PPC::LHA8));
+ break;
+ case MVT::i32:
+ Opc = (IsZExt ?
+ (Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
+ (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
+ if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
+ UseOffset = false;
+ break;
+ case MVT::i64:
+ Opc = PPC::LD;
+ assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
+ "64-bit load with 32-bit target??");
+ UseOffset = ((Addr.Offset & 3) == 0);
+ break;
+ case MVT::f32:
+ Opc = PPC::LFS;
+ break;
+ case MVT::f64:
+ Opc = FP64LoadOpc;
+ break;
+ }
+
+ // If necessary, materialize the offset into a register and use
+ // the indexed form. Also handle stack pointers with special needs.
+ unsigned IndexReg = 0;
+ PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+
+ // If this is a potential VSX load with an offset of 0, a VSX indexed load can
+ // be used.
+ bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg);
+ bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg);
+ bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
+ bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD;
+ if ((Is32VSXLoad || Is64VSXLoad) &&
+ (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
+ (Addr.Offset == 0)) {
+ UseOffset = false;
+ }
+
+ if (ResultReg == 0)
+ ResultReg = createResultReg(UseRC);
+
+ // Note: If we still have a frame index here, we know the offset is
+ // in range, as otherwise PPCSimplifyAddress would have converted it
+ // into a RegBase.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ // VSX only provides an indexed load.
+ if (Is32VSXLoad || Is64VSXLoad) return false;
+
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
+ Addr.Offset),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
+ MFI.getObjectAlignment(Addr.Base.FI));
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
+
+ // Base reg with offset in range.
+ } else if (UseOffset) {
+ // VSX only provides an indexed load.
+ if (Is32VSXLoad || Is64VSXLoad) return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addImm(Addr.Offset).addReg(Addr.Base.Reg);
+
+ // Indexed form.
+ } else {
+ // Get the RR opcode corresponding to the RI one. FIXME: It would be
+ // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
+ // is hard to get at.
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case PPC::LBZ: Opc = PPC::LBZX; break;
+ case PPC::LBZ8: Opc = PPC::LBZX8; break;
+ case PPC::LHZ: Opc = PPC::LHZX; break;
+ case PPC::LHZ8: Opc = PPC::LHZX8; break;
+ case PPC::LHA: Opc = PPC::LHAX; break;
+ case PPC::LHA8: Opc = PPC::LHAX8; break;
+ case PPC::LWZ: Opc = PPC::LWZX; break;
+ case PPC::LWZ8: Opc = PPC::LWZX8; break;
+ case PPC::LWA: Opc = PPC::LWAX; break;
+ case PPC::LWA_32: Opc = PPC::LWAX_32; break;
+ case PPC::LD: Opc = PPC::LDX; break;
+ case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
+ case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(Addr.Base.Reg).addReg(IndexReg);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select a load instruction.
+bool PPCFastISel::SelectLoad(const Instruction *I) {
+ // FIXME: No atomic loads are supported.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(I->getOperand(0), Addr))
+ return false;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class. This is necessary
+ // to constrain RA from using R0/X0 when this is not legal.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
+
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
+ return false;
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+// Emit a store instruction to store SrcReg at Addr.
+bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
+ assert(SrcReg && "Nothing to store!");
+ unsigned Opc;
+ bool UseOffset = true;
+
+ const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
+ bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ switch (VT.SimpleTy) {
+ default: // e.g., vector types not handled
+ return false;
+ case MVT::i8:
+ Opc = Is32BitInt ? PPC::STB : PPC::STB8;
+ break;
+ case MVT::i16:
+ Opc = Is32BitInt ? PPC::STH : PPC::STH8;
+ break;
+ case MVT::i32:
+ assert(Is32BitInt && "Not GPRC for i32??");
+ Opc = PPC::STW;
+ break;
+ case MVT::i64:
+ Opc = PPC::STD;
+ UseOffset = ((Addr.Offset & 3) == 0);
+ break;
+ case MVT::f32:
+ Opc = PPC::STFS;
+ break;
+ case MVT::f64:
+ Opc = PPC::STFD;
+ break;
+ }
+
+ // If necessary, materialize the offset into a register and use
+ // the indexed form. Also handle stack pointers with special needs.
+ unsigned IndexReg = 0;
+ PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+
+ // If this is a potential VSX store with an offset of 0, a VSX indexed store
+ // can be used.
+ bool IsVSSRC = isVSSRCRegister(SrcReg);
+ bool IsVSFRC = isVSFRCRegister(SrcReg);
+ bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
+ bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
+ if ((Is32VSXStore || Is64VSXStore) &&
+ (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
+ (Addr.Offset == 0)) {
+ UseOffset = false;
+ }
+
+ // Note: If we still have a frame index here, we know the offset is
+ // in range, as otherwise PPCSimplifyAddress would have converted it
+ // into a RegBase.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ // VSX only provides an indexed store.
+ if (Is32VSXStore || Is64VSXStore) return false;
+
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
+ Addr.Offset),
+ MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
+ MFI.getObjectAlignment(Addr.Base.FI));
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg)
+ .addImm(Addr.Offset)
+ .addFrameIndex(Addr.Base.FI)
+ .addMemOperand(MMO);
+
+ // Base reg with offset in range.
+ } else if (UseOffset) {
+ // VSX only provides an indexed store.
+ if (Is32VSXStore || Is64VSXStore) return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
+
+ // Indexed form.
+ } else {
+ // Get the RR opcode corresponding to the RI one. FIXME: It would be
+ // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
+ // is hard to get at.
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case PPC::STB: Opc = PPC::STBX; break;
+ case PPC::STH : Opc = PPC::STHX; break;
+ case PPC::STW : Opc = PPC::STWX; break;
+ case PPC::STB8: Opc = PPC::STBX8; break;
+ case PPC::STH8: Opc = PPC::STHX8; break;
+ case PPC::STW8: Opc = PPC::STWX8; break;
+ case PPC::STD: Opc = PPC::STDX; break;
+ case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
+ case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
+ }
+
+ auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg);
+
+ // If we have an index register defined we use it in the store inst,
+ // otherwise we use X0 as base as it makes the vector instructions to
+ // use zero in the computation of the effective address regardless the
+ // content of the register.
+ if (IndexReg)
+ MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
+ else
+ MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select a store instruction.
+bool PPCFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // FIXME: No atomics loads are supported.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(Op0->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0)
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(I->getOperand(1), Addr))
+ return false;
+
+ if (!PPCEmitStore(VT, SrcReg, Addr))
+ return false;
+
+ return true;
+}
+
+// Attempt to fast-select a branch instruction.
+bool PPCFastISel::SelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *BrBB = FuncInfo.MBB;
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // For now, just try the simplest case where it's fed by a compare.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ if (isValueAvailable(CI)) {
+ Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
+ if (!OptPPCPred)
+ return false;
+
+ PPC::Predicate PPCPred = OptPPCPred.getValue();
+
+ // Take advantage of fall-through opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ PPCPred = PPC::InvertPredicate(PPCPred);
+ }
+
+ unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
+
+ if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CondReg))
+ return false;
+
+ BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
+ .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
+ return true;
+ }
+ } else if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(BI->getCondition())) {
+ uint64_t Imm = CI->getZExtValue();
+ MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
+ fastEmitBranch(Target, DbgLoc);
+ return true;
+ }
+
+ // FIXME: ARM looks for a case where the block containing the compare
+ // has been split from the block containing the branch. If this happens,
+ // there is a vreg available containing the result of the compare. I'm
+ // not sure we can do much, as we've lost the predicate information with
+ // the compare instruction -- we have a 4-bit CR but don't know which bit
+ // to test here.
+ return false;
+}
+
+// Attempt to emit a compare of the two source values. Signed and unsigned
+// comparisons are supported. Return false if we can't handle it.
+bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
+ bool IsZExt, unsigned DestReg) {
+ Type *Ty = SrcValue1->getType();
+ EVT SrcEVT = TLI.getValueType(DL, Ty, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
+ return false;
+
+ // See if operand 2 is an immediate encodeable in the compare.
+ // FIXME: Operands are not in canonical order at -O0, so an immediate
+ // operand in position 1 is a lost opportunity for now. We are
+ // similar to ARM in this regard.
+ long Imm = 0;
+ bool UseImm = false;
+
+ // Only 16-bit integer constants can be represented in compares for
+ // PowerPC. Others will be materialized into a register.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
+ if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i8 || SrcVT == MVT::i1) {
+ const APInt &CIVal = ConstInt->getValue();
+ Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
+ if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
+ UseImm = true;
+ }
+ }
+
+ unsigned CmpOpc;
+ bool NeedsExt = false;
+ switch (SrcVT.SimpleTy) {
+ default: return false;
+ case MVT::f32:
+ CmpOpc = PPC::FCMPUS;
+ break;
+ case MVT::f64:
+ CmpOpc = PPC::FCMPUD;
+ break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ NeedsExt = true;
+ // Intentional fall-through.
+ case MVT::i32:
+ if (!UseImm)
+ CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
+ else
+ CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
+ break;
+ case MVT::i64:
+ if (!UseImm)
+ CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
+ else
+ CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
+ break;
+ }
+
+ unsigned SrcReg1 = getRegForValue(SrcValue1);
+ if (SrcReg1 == 0)
+ return false;
+
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(SrcValue2);
+ if (SrcReg2 == 0)
+ return false;
+ }
+
+ if (NeedsExt) {
+ unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
+ return false;
+ SrcReg1 = ExtReg;
+
+ if (!UseImm) {
+ unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
+ return false;
+ SrcReg2 = ExtReg;
+ }
+ }
+
+ if (!UseImm)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
+ .addReg(SrcReg1).addReg(SrcReg2);
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
+ .addReg(SrcReg1).addImm(Imm);
+
+ return true;
+}
+
+// Attempt to fast-select a floating-point extend instruction.
+bool PPCFastISel::SelectFPExt(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
+
+ if (SrcVT != MVT::f32 || DestVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // No code is generated for a FP extend.
+ updateValueMap(I, SrcReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point truncate instruction.
+bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
+
+ if (SrcVT != MVT::f64 || DestVT != MVT::f32)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // Round the result to single precision.
+ unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
+ .addReg(SrcReg);
+
+ updateValueMap(I, DestReg);
+ return true;
+}
+
+// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
+// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
+// case to 8 bytes which produces tighter code but wastes stack space.
+unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
+ bool IsSigned) {
+
+ // If necessary, extend 32-bit int to 64-bit.
+ if (SrcVT == MVT::i32) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return 0;
+ SrcReg = TmpReg;
+ }
+
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the GPR.
+ if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
+ return 0;
+
+ // Load the integer value into an FPR. The kind of load used depends
+ // on a number of conditions.
+ unsigned LoadOpc = PPC::LFD;
+
+ if (SrcVT == MVT::i32) {
+ if (!IsSigned) {
+ LoadOpc = PPC::LFIWZX;
+ Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
+ } else if (PPCSubTarget->hasLFIWAX()) {
+ LoadOpc = PPC::LFIWAX;
+ Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
+ }
+ }
+
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select an integer-to-floating-point conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+// direct moves should be implemented.
+bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
+ MVT DstVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::f32 && DstVT != MVT::f64)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
+ if (!SrcEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
+ SrcVT != MVT::i32 && SrcVT != MVT::i64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // We can only lower an unsigned convert if we have the newer
+ // floating-point conversion operations.
+ if (!IsSigned && !PPCSubTarget->hasFPCVT())
+ return false;
+
+ // FIXME: For now we require the newer floating-point conversion operations
+ // (which are present only on P7 and A2 server models) when converting
+ // to single-precision float. Otherwise we have to generate a lot of
+ // fiddly code to avoid double rounding. If necessary, the fiddly code
+ // can be found in PPCTargetLowering::LowerINT_TO_FP().
+ if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
+ return false;
+
+ // Extend the input if necessary.
+ if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return false;
+ SrcVT = MVT::i64;
+ SrcReg = TmpReg;
+ }
+
+ // Move the integer value to an FPR.
+ unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
+ if (FPReg == 0)
+ return false;
+
+ // Determine the opcode for the conversion.
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned Opc;
+
+ if (DstVT == MVT::f32)
+ Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
+ else
+ Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addReg(FPReg);
+
+ updateValueMap(I, DestReg);
+ return true;
+}
+
+// Move the floating-point value in SrcReg into an integer destination
+// register, and return the register (or zero if we can't handle it).
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned) {
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ // Note that if have STFIWX available, we could use a 4-byte stack
+ // slot for i32, but this being fast-isel we'll just go with the
+ // easiest code gen possible.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the FPR.
+ if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
+ return 0;
+
+ // Reload it into a GPR. If we want an i32, modify the address
+ // to have a 4-byte offset so we load from the right place.
+ if (VT == MVT::i32)
+ Addr.Offset = 4;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
+
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select a floating-point-to-integer conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+// direct moves should be implemented.
+bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
+ MVT DstVT, SrcVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::i32 && DstVT != MVT::i64)
+ return false;
+
+ // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
+ if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
+ return false;
+
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+ if (!isTypeLegal(SrcTy, SrcVT))
+ return false;
+
+ if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // Convert f32 to f64 if necessary. This is just a meaningless copy
+ // to get the register class right. COPY_TO_REGCLASS is needed since
+ // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream.
+ const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
+ if (InRC == &PPC::F4RCRegClass) {
+ unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
+ .addReg(SrcReg).addImm(PPC::F8RCRegClassID);
+ SrcReg = TmpReg;
+ }
+
+ // Determine the opcode for the conversion, which takes place
+ // entirely within FPRs.
+ unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
+ unsigned Opc;
+
+ if (DstVT == MVT::i32)
+ if (IsSigned)
+ Opc = PPC::FCTIWZ;
+ else
+ Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
+ else
+ Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+
+ // Now move the integer value from a float register to an integer register.
+ unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
+ if (IntReg == 0)
+ return false;
+
+ updateValueMap(I, IntReg);
+ return true;
+}
+
+// Attempt to fast-select a binary integer operation that isn't already
+// handled automatically.
+bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
+
+ // We can get here in the case when we have a binary operation on a non-legal
+ // type and the target independent selector doesn't know how to handle it.
+ if (DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class. If there is no register,
+ // make a conservative choice (don't assign R0).
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ (AssignedReg ? MRI.getRegClass(AssignedReg) :
+ &PPC::GPRC_and_GPRC_NOR0RegClass);
+ bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ unsigned Opc;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::ADD:
+ Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
+ break;
+ case ISD::OR:
+ Opc = IsGPRC ? PPC::OR : PPC::OR8;
+ break;
+ case ISD::SUB:
+ Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
+ break;
+ }
+
+ unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
+ unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+ if (SrcReg1 == 0) return false;
+
+ // Handle case of small immediate operand.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ const APInt &CIVal = ConstInt->getValue();
+ int Imm = (int)CIVal.getSExtValue();
+ bool UseImm = true;
+ if (isInt<16>(Imm)) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Missing case!");
+ case PPC::ADD4:
+ Opc = PPC::ADDI;
+ MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ break;
+ case PPC::ADD8:
+ Opc = PPC::ADDI8;
+ MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
+ break;
+ case PPC::OR:
+ Opc = PPC::ORI;
+ break;
+ case PPC::OR8:
+ Opc = PPC::ORI8;
+ break;
+ case PPC::SUBF:
+ if (Imm == -32768)
+ UseImm = false;
+ else {
+ Opc = PPC::ADDI;
+ MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ Imm = -Imm;
+ }
+ break;
+ case PPC::SUBF8:
+ if (Imm == -32768)
+ UseImm = false;
+ else {
+ Opc = PPC::ADDI8;
+ MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
+ Imm = -Imm;
+ }
+ break;
+ }
+
+ if (UseImm) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ ResultReg)
+ .addReg(SrcReg1)
+ .addImm(Imm);
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+
+ // Reg-reg case.
+ unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+ if (SrcReg2 == 0) return false;
+
+ // Reverse operands for subtract-from.
+ if (ISDOpcode == ISD::SUB)
+ std::swap(SrcReg1, SrcReg2);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addReg(SrcReg2);
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+// Handle arguments to a call that we're attempting to fast-select.
+// Return false if the arguments are too complex for us at the moment.
+bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
+
+ // Reserve space for the linkage area on the stack.
+ unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
+ CCInfo.AllocateStack(LinkageSize, 8);
+
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
+
+ // Bail out if we can't handle any of the arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Skip vector arguments for now, as well as long double and
+ // uint128_t, and anything that isn't passed in a register.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
+ !VA.isRegLoc() || VA.needsCustom())
+ return false;
+
+ // Skip bit-converted arguments for now.
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ return false;
+ }
+
+ // Get a count of how many bytes are to be pushed onto the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
+ NumBytes = std::max(NumBytes, LinkageSize + 64);
+
+ // Issue CALLSEQ_START.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TII.getCallFrameSetupOpcode()))
+ .addImm(NumBytes);
+
+ // Prepare to assign register arguments. Every argument uses up a
+ // GPR protocol register even if it's passed in a floating-point
+ // register (unless we're using the fast calling convention).
+ unsigned NextGPR = PPC::X3;
+ unsigned NextFPR = PPC::F1;
+
+ // Process arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Handle argument promotion and bitcasts.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
+ llvm_unreachable("Failed to emit a sext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
+ llvm_unreachable("Failed to emit a zext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::BCvt: {
+ // FIXME: Not yet handled.
+ llvm_unreachable("Should have bailed before getting here!");
+ break;
+ }
+ }
+
+ // Copy this argument to the appropriate register.
+ unsigned ArgReg;
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
+ ArgReg = NextFPR++;
+ if (CC != CallingConv::Fast)
+ ++NextGPR;
+ } else
+ ArgReg = NextGPR++;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
+ RegArgs.push_back(ArgReg);
+ }
+
+ return true;
+}
+
+// For a call that we've determined we can fast-select, finish the
+// call sequence and generate a copy to obtain the return value (if any).
+bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
+
+ // Issue CallSEQ_END.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TII.getCallFrameDestroyOpcode()))
+ .addImm(NumBytes).addImm(0);
+
+ // Next, generate a copy to obtain the return value.
+ // FIXME: No multi-register return values yet, though I don't foresee
+ // any real difficulties there.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ CCValAssign &VA = RVLocs[0];
+ assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ MVT DestVT = VA.getValVT();
+ MVT CopyVT = DestVT;
+
+ // Ints smaller than a register still arrive in a full 64-bit
+ // register, so make sure we recognize this.
+ if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
+ CopyVT = MVT::i64;
+
+ unsigned SourcePhysReg = VA.getLocReg();
+ unsigned ResultReg = 0;
+
+ if (RetVT == CopyVT) {
+ const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
+ ResultReg = createResultReg(CpyRC);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+
+ // If necessary, round the floating result to single precision.
+ } else if (CopyVT == MVT::f64) {
+ ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
+ ResultReg).addReg(SourcePhysReg);
+
+ // If only the low half of a general register is needed, generate
+ // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
+ // used along the fast-isel path (not lowered), and downstream logic
+ // also doesn't like a direct subreg copy on a physical reg.)
+ } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
+ ResultReg = createResultReg(&PPC::GPRCRegClass);
+ // Convert physical register from G8RC to GPRC.
+ SourcePhysReg -= PPC::X0 - PPC::R0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+ }
+
+ assert(ResultReg && "ResultReg unset!");
+ CLI.InRegs.push_back(SourcePhysReg);
+ CLI.ResultReg = ResultReg;
+ CLI.NumResultRegs = 1;
+ }
+
+ return true;
+}
+
+bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
+ CallingConv::ID CC = CLI.CallConv;
+ bool IsTailCall = CLI.IsTailCall;
+ bool IsVarArg = CLI.IsVarArg;
+ const Value *Callee = CLI.Callee;
+ const MCSymbol *Symbol = CLI.Symbol;
+
+ if (!Callee && !Symbol)
+ return false;
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (IsTailCall)
+ return false;
+
+ // Let SDISel handle vararg functions.
+ if (IsVarArg)
+ return false;
+
+ // Handle simple calls for now, with legal return types and
+ // those that can be extended.
+ Type *RetTy = CLI.RetTy;
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+ RetVT != MVT::i8)
+ return false;
+ else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
+ // We can't handle boolean returns when CR bits are in use.
+ return false;
+
+ // FIXME: No multi-register return values yet.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
+ RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
+ RetVT != MVT::f64) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ if (RVLocs.size() > 1)
+ return false;
+ }
+
+ // Bail early if more than 8 arguments, as we only currently
+ // handle arguments passed in registers.
+ unsigned NumArgs = CLI.OutVals.size();
+ if (NumArgs > 8)
+ return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+
+ Args.reserve(NumArgs);
+ ArgRegs.reserve(NumArgs);
+ ArgVTs.reserve(NumArgs);
+ ArgFlags.reserve(NumArgs);
+
+ for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
+ // Only handle easy calls for now. It would be reasonably easy
+ // to handle <= 8-byte structures passed ByVal in registers, but we
+ // have to ensure they are right-justified in the register.
+ ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
+ if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
+ return false;
+
+ Value *ArgValue = CLI.OutVals[i];
+ Type *ArgTy = ArgValue->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
+ return false;
+
+ if (ArgVT.isVector())
+ return false;
+
+ unsigned Arg = getRegForValue(ArgValue);
+ if (Arg == 0)
+ return false;
+
+ Args.push_back(ArgValue);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Process the arguments.
+ SmallVector<unsigned, 8> RegArgs;
+ unsigned NumBytes;
+
+ if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, IsVarArg))
+ return false;
+
+ MachineInstrBuilder MIB;
+ // FIXME: No handling for function pointers yet. This requires
+ // implementing the function descriptor (OPD) setup.
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV) {
+ // patchpoints are a special case; they always dispatch to a pointer value.
+ // However, we don't actually want to generate the indirect call sequence
+ // here (that will be generated, as necessary, during asm printing), and
+ // the call we generate here will be erased by FastISel::selectPatchpoint,
+ // so don't try very hard...
+ if (CLI.IsPatchPoint)
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
+ else
+ return false;
+ } else {
+ // Build direct call with NOP for TOC restore.
+ // FIXME: We can and should optimize away the NOP for local calls.
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(PPC::BL8_NOP));
+ // Add callee.
+ MIB.addGlobalAddress(GV);
+ }
+
+ // Add implicit physical register uses to the call.
+ for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
+ MIB.addReg(RegArgs[II], RegState::Implicit);
+
+ // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
+ // into the call.
+ PPCFuncInfo->setUsesTOCBasePtr();
+ MIB.addReg(PPC::X2, RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers. Proper
+ // defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
+
+ CLI.Call = MIB;
+
+ // Finish off the call including any return values.
+ return finishCall(RetVT, CLI, NumBytes);
+}
+
+// Attempt to fast-select a return instruction.
+bool PPCFastISel::SelectRet(const Instruction *I) {
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+ CallingConv::ID CC = F.getCallingConv();
+
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
+ CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
+ const Value *RV = Ret->getOperand(0);
+
+ // FIXME: Only one output register for now.
+ if (ValLocs.size() > 1)
+ return false;
+
+ // Special case for returning a constant integer of any size - materialize
+ // the constant as an i64 and copy it to the return register.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
+ CCValAssign &VA = ValLocs[0];
+
+ unsigned RetReg = VA.getLocReg();
+ // We still need to worry about properly extending the sign. For example,
+ // we could have only a single bit or a constant that needs zero
+ // extension rather than sign extension. Make sure we pass the return
+ // value extension property to integer materialization.
+ unsigned SrcReg =
+ PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() == CCValAssign::SExt);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+
+ RetRegs.push_back(RetReg);
+
+ } else {
+ unsigned Reg = getRegForValue(RV);
+
+ if (Reg == 0)
+ return false;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i < ValLocs.size(); ++i) {
+
+ CCValAssign &VA = ValLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ RetRegs.push_back(VA.getLocReg());
+ unsigned SrcReg = Reg + VA.getValNo();
+
+ EVT RVEVT = TLI.getValueType(DL, RV->getType());
+ if (!RVEVT.isSimple())
+ return false;
+ MVT RVVT = RVEVT.getSimpleVT();
+ MVT DestVT = VA.getLocVT();
+
+ if (RVVT != DestVT && RVVT != MVT::i8 &&
+ RVVT != MVT::i16 && RVVT != MVT::i32)
+ return false;
+
+ if (RVVT != DestVT) {
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ llvm_unreachable("Full value assign but types don't match?");
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt: {
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
+ return false;
+ SrcReg = TmpReg;
+ break;
+ }
+ case CCValAssign::SExt: {
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
+ return false;
+ SrcReg = TmpReg;
+ break;
+ }
+ }
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), RetRegs[i])
+ .addReg(SrcReg);
+ }
+ }
+ }
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(PPC::BLR8));
+
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
+
+ return true;
+}
+
+// Attempt to emit an integer extend of SrcReg into DestReg. Both
+// signed and zero extensions are supported. Return false if we
+// can't handle it.
+bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt) {
+ if (DestVT != MVT::i32 && DestVT != MVT::i64)
+ return false;
+ if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
+ return false;
+
+ // Signed extensions use EXTSB, EXTSH, EXTSW.
+ if (!IsZExt) {
+ unsigned Opc;
+ if (SrcVT == MVT::i8)
+ Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
+ else if (SrcVT == MVT::i16)
+ Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
+ else {
+ assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
+ Opc = PPC::EXTSW_32_64;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+
+ // Unsigned 32-bit extensions use RLWINM.
+ } else if (DestVT == MVT::i32) {
+ unsigned MB;
+ if (SrcVT == MVT::i8)
+ MB = 24;
+ else {
+ assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
+ MB = 16;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
+ DestReg)
+ .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
+
+ // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
+ } else {
+ unsigned MB;
+ if (SrcVT == MVT::i8)
+ MB = 56;
+ else if (SrcVT == MVT::i16)
+ MB = 48;
+ else
+ MB = 32;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(PPC::RLDICL_32_64), DestReg)
+ .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select an indirect branch instruction.
+bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
+ unsigned AddrReg = getRegForValue(I->getOperand(0));
+ if (AddrReg == 0)
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
+ .addReg(AddrReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
+
+ const IndirectBrInst *IB = cast<IndirectBrInst>(I);
+ for (const BasicBlock *SuccBB : IB->successors())
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
+
+ return true;
+}
+
+// Attempt to fast-select an integer truncate instruction.
+bool PPCFastISel::SelectTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+ EVT DestVT = TLI.getValueType(DL, I->getType(), true);
+
+ if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
+ return false;
+
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // The only interesting case is when we need to switch register classes.
+ if (SrcVT == MVT::i64) {
+ unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(SrcReg, 0, PPC::sub_32);
+ SrcReg = ResultReg;
+ }
+
+ updateValueMap(I, SrcReg);
+ return true;
+}
+
+// Attempt to fast-select an integer extend instruction.
+bool PPCFastISel::SelectIntExt(const Instruction *I) {
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ bool IsZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg) return false;
+
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(DL, SrcTy, true);
+ DestEVT = TLI.getValueType(DL, DestTy, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ if (!DestEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
+
+ // If we know the register class needed for the result of this
+ // instruction, use it. Otherwise pick the register class of the
+ // correct size that does not contain X0/R0, since we don't know
+ // whether downstream uses permit that assignment.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ (AssignedReg ? MRI.getRegClass(AssignedReg) :
+ (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
+ &PPC::GPRC_and_GPRC_NOR0RegClass));
+ unsigned ResultReg = createResultReg(RC);
+
+ if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+// Attempt to fast-select an instruction that wasn't handled by
+// the table-generated machinery.
+bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return SelectLoad(I);
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Br:
+ return SelectBranch(I);
+ case Instruction::IndirectBr:
+ return SelectIndirectBr(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectIToFP(I, /*IsSigned*/ true);
+ case Instruction::UIToFP:
+ return SelectIToFP(I, /*IsSigned*/ false);
+ case Instruction::FPToSI:
+ return SelectFPToI(I, /*IsSigned*/ true);
+ case Instruction::FPToUI:
+ return SelectFPToI(I, /*IsSigned*/ false);
+ case Instruction::Add:
+ return SelectBinaryIntOp(I, ISD::ADD);
+ case Instruction::Or:
+ return SelectBinaryIntOp(I, ISD::OR);
+ case Instruction::Sub:
+ return SelectBinaryIntOp(I, ISD::SUB);
+ case Instruction::Call:
+ return selectCall(I);
+ case Instruction::Ret:
+ return SelectRet(I);
+ case Instruction::Trunc:
+ return SelectTrunc(I);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return SelectIntExt(I);
+ // Here add other flavors of Instruction::XXX that automated
+ // cases don't catch. For example, switches are terminators
+ // that aren't yet handled.
+ default:
+ break;
+ }
+ return false;
+}
+
+// Materialize a floating-point constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
+ // No plans to handle long double here.
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return 0;
+
+ // All FP constants are loaded from the constant pool.
+ unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
+ assert(Align > 0 && "Unexpectedly missing alignment information!");
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ CodeModel::Model CModel = TM.getCodeModel();
+
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(*FuncInfo.MF),
+ MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
+
+ unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
+ unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ PPCFuncInfo->setUsesTOCBasePtr();
+ // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
+ if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
+ TmpReg)
+ .addConstantPoolIndex(Idx).addReg(PPC::X2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addImm(0).addReg(TmpReg).addMemOperand(MMO);
+ } else {
+ // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
+ TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
+ // But for large code model, we must generate a LDtocL followed
+ // by the LF[SD].
+ if (CModel == CodeModel::Large) {
+ unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
+ TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addImm(0).addReg(TmpReg2);
+ } else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
+ .addReg(TmpReg)
+ .addMemOperand(MMO);
+ }
+
+ return DestReg;
+}
+
+// Materialize the address of a global value into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
+ assert(VT == MVT::i64 && "Non-address!");
+ const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
+ unsigned DestReg = createResultReg(RC);
+
+ // Global values may be plain old object addresses, TLS object
+ // addresses, constant pool entries, or jump tables. How we generate
+ // code for these may depend on small, medium, or large code model.
+ CodeModel::Model CModel = TM.getCodeModel();
+
+ // FIXME: Jump tables are not yet required because fast-isel doesn't
+ // handle switches; if that changes, we need them as well. For now,
+ // what follows assumes everything's a generic (or TLS) global address.
+
+ // FIXME: We don't yet handle the complexity of TLS.
+ if (GV->isThreadLocal())
+ return 0;
+
+ PPCFuncInfo->setUsesTOCBasePtr();
+ // For small code model, generate a simple TOC load.
+ if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
+ DestReg)
+ .addGlobalAddress(GV)
+ .addReg(PPC::X2);
+ else {
+ // If the address is an externally defined symbol, a symbol with common
+ // or externally available linkage, a non-local function address, or a
+ // jump table address (not yet needed), or if we are generating code
+ // for large code model, we generate:
+ // LDtocL(GV, ADDIStocHA(%X2, GV))
+ // Otherwise we generate:
+ // ADDItocL(ADDIStocHA(%X2, GV), GV)
+ // Either way, start with the ADDIStocHA:
+ unsigned HighPartReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
+ HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
+
+ unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
+ if (GVFlags & PPCII::MO_NLP_FLAG) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
+ DestReg).addGlobalAddress(GV).addReg(HighPartReg);
+ } else {
+ // Otherwise generate the ADDItocL.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
+ DestReg).addReg(HighPartReg).addGlobalAddress(GV);
+ }
+ }
+
+ return DestReg;
+}
+
+// Materialize a 32-bit integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
+ const TargetRegisterClass *RC) {
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ unsigned ResultReg = createResultReg(RC);
+ bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ if (isInt<16>(Imm))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
+ .addImm(Imm);
+ else if (Lo) {
+ // Both Lo and Hi have nonzero bits.
+ unsigned TmpReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
+ .addImm(Hi);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
+ .addReg(TmpReg).addImm(Lo);
+ } else
+ // Just Hi bits.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
+ .addImm(Hi);
+
+ return ResultReg;
+}
+
+// Materialize a 64-bit integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
+ const TargetRegisterClass *RC) {
+ unsigned Remainder = 0;
+ unsigned Shift = 0;
+
+ // If the value doesn't fit in 32 bits, see if we can shift it
+ // so that it fits in 32 bits.
+ if (!isInt<32>(Imm)) {
+ Shift = countTrailingZeros<uint64_t>(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ if (isInt<32>(ImmSh))
+ Imm = ImmSh;
+ else {
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Handle the high-order 32 bits (if shifted) or the whole 32 bits
+ // (if not shifted).
+ unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
+ if (!Shift)
+ return TmpReg1;
+
+ // If upper 32 bits were not zero, we've built them and need to shift
+ // them into place.
+ unsigned TmpReg2;
+ if (Imm) {
+ TmpReg2 = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
+ TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
+ } else
+ TmpReg2 = TmpReg1;
+
+ unsigned TmpReg3, Hi, Lo;
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ TmpReg3 = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
+ TmpReg3).addReg(TmpReg2).addImm(Hi);
+ } else
+ TmpReg3 = TmpReg2;
+
+ if ((Lo = Remainder & 0xFFFF)) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
+ ResultReg).addReg(TmpReg3).addImm(Lo);
+ return ResultReg;
+ }
+
+ return TmpReg3;
+}
+
+
+// Materialize an integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
+ bool UseSExt) {
+ // If we're using CR bit registers for i1 values, handle that as a special
+ // case first.
+ if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
+ unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
+ return ImmReg;
+ }
+
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8 && VT != MVT::i1)
+ return 0;
+
+ const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
+ &PPC::GPRCRegClass);
+
+ // If the constant is in range, use a load-immediate.
+ if (UseSExt && isInt<16>(CI->getSExtValue())) {
+ unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
+ unsigned ImmReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
+ .addImm(CI->getSExtValue());
+ return ImmReg;
+ } else if (!UseSExt && isUInt<16>(CI->getZExtValue())) {
+ unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
+ unsigned ImmReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
+ .addImm(CI->getZExtValue());
+ return ImmReg;
+ }
+
+ // Construct the constant piecewise.
+ int64_t Imm = CI->getZExtValue();
+
+ if (VT == MVT::i64)
+ return PPCMaterialize64BitInt(Imm, RC);
+ else if (VT == MVT::i32)
+ return PPCMaterialize32BitInt(Imm, RC);
+
+ return 0;
+}
+
+// Materialize a constant into a register, and return the register
+// number (or zero if we failed to handle it).
+unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);
+
+ // Only handle simple types.
+ if (!CEVT.isSimple()) return 0;
+ MVT VT = CEVT.getSimpleVT();
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return PPCMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return PPCMaterializeGV(GV, VT);
+ else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return PPCMaterializeInt(CI, VT, VT != MVT::i1);
+
+ return 0;
+}
+
+// Materialize the address created by an alloca into a register, and
+// return the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
+ ResultReg).addFrameIndex(SI->second).addImm(0);
+ return ResultReg;
+ }
+
+ return 0;
+}
+
+// Fold loads into extends when possible.
+// FIXME: We can have multiple redundant extend/trunc instructions
+// following a load. The folding only picks up one. Extend this
+// to check subsequent instructions for the same pattern and remove
+// them. Thus ResultReg should be the def reg for the last redundant
+// instruction in a chain, and all intervening instructions can be
+// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
+// to add ELF64-NOT: rldicl to the appropriate tests when this works.
+bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(LI->getType(), VT))
+ return false;
+
+ // Combine load followed by zero- or sign-extend.
+ bool IsZExt = false;
+ switch(MI->getOpcode()) {
+ default:
+ return false;
+
+ case PPC::RLDICL:
+ case PPC::RLDICL_32_64: {
+ IsZExt = true;
+ unsigned MB = MI->getOperand(3).getImm();
+ if ((VT == MVT::i8 && MB <= 56) ||
+ (VT == MVT::i16 && MB <= 48) ||
+ (VT == MVT::i32 && MB <= 32))
+ break;
+ return false;
+ }
+
+ case PPC::RLWINM:
+ case PPC::RLWINM8: {
+ IsZExt = true;
+ unsigned MB = MI->getOperand(3).getImm();
+ if ((VT == MVT::i8 && MB <= 24) ||
+ (VT == MVT::i16 && MB <= 16))
+ break;
+ return false;
+ }
+
+ case PPC::EXTSB:
+ case PPC::EXTSB8:
+ case PPC::EXTSB8_32_64:
+ /* There is no sign-extending load-byte instruction. */
+ return false;
+
+ case PPC::EXTSH:
+ case PPC::EXTSH8:
+ case PPC::EXTSH8_32_64: {
+ if (VT != MVT::i16 && VT != MVT::i8)
+ return false;
+ break;
+ }
+
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64: {
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
+ return false;
+ break;
+ }
+ }
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(LI->getOperand(0), Addr))
+ return false;
+
+ unsigned ResultReg = MI->getOperand(0).getReg();
+
+ if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
+ return false;
+
+ MI->eraseFromParent();
+ return true;
+}
+
+// Attempt to lower call arguments in a faster way than done by
+// the selection DAG code.
+bool PPCFastISel::fastLowerArguments() {
+ // Defer to normal argument lowering for now. It's reasonably
+ // efficient. Consider doing something like ARM to handle the
+ // case where all args fit in registers, no varargs, no float
+ // or vector args.
+ return false;
+}
+
+// Handle materializing integer constants into a register. This is not
+// automatically generated for PowerPC, so must be explicitly created here.
+unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
+
+ if (Opc != ISD::Constant)
+ return 0;
+
+ // If we're using CR bit registers for i1 values, handle that as a special
+ // case first.
+ if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
+ unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
+ return ImmReg;
+ }
+
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8 && VT != MVT::i1)
+ return 0;
+
+ const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
+ &PPC::GPRCRegClass);
+ if (VT == MVT::i64)
+ return PPCMaterialize64BitInt(Imm, RC);
+ else
+ return PPCMaterialize32BitInt(Imm, RC);
+}
+
+// Override for ADDI and ADDI8 to set the correct register class
+// on RHS operand 0. The automatic infrastructure naively assumes
+// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
+// for these cases. At the moment, none of the other automatically
+// generated RI instructions require special treatment. However, once
+// SelectSelect is implemented, "isel" requires similar handling.
+//
+// Also be conservative about the output register class. Avoid
+// assigning R0 or X0 to the output register for GPRC and G8RC
+// register classes, as any such result could be used in ADDI, etc.,
+// where those regs have another meaning.
+unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ if (MachineInstOpcode == PPC::ADDI)
+ MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ else if (MachineInstOpcode == PPC::ADDI8)
+ MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
+ Op0, Op0IsKill, Imm);
+}
+
+// Override for instructions with one register operand to avoid use of
+// R0/X0. The automatic infrastructure isn't aware of the context so
+// we must be conservative.
+unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC,
+ unsigned Op0, bool Op0IsKill) {
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
+}
+
+// Override for instructions with two register operands to avoid use
+// of R0/X0. The automatic infrastructure isn't aware of the context
+// so we must be conservative.
+unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
+ Op1, Op1IsKill);
+}
+
+namespace llvm {
+ // Create the fast instruction selector for PowerPC64 ELF.
+ FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) {
+ // Only available on 64-bit ELF for now.
+ const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
+ return new PPCFastISel(FuncInfo, LibInfo);
+ return nullptr;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
new file mode 100644
index 0000000..beab844
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -0,0 +1,1780 @@
+//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PPC implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCFrameLowering.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const MCPhysReg VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
+ if (STI.isDarwinABI())
+ return STI.isPPC64() ? 16 : 8;
+ // SVR4 ABI:
+ return STI.isPPC64() ? 16 : 4;
+}
+
+static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
+ return STI.isELFv2ABI() ? 24 : 40;
+}
+
+static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
+ // For the Darwin ABI:
+ // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+ // for saving the frame pointer (if needed.) While the published ABI has
+ // not used this slot since at least MacOSX 10.2, there is older code
+ // around that does use it, and that needs to continue to work.
+ if (STI.isDarwinABI())
+ return STI.isPPC64() ? -8U : -4U;
+
+ // SVR4 ABI: First slot in the general register save area.
+ return STI.isPPC64() ? -8U : -4U;
+}
+
+static unsigned computeLinkageSize(const PPCSubtarget &STI) {
+ if (STI.isDarwinABI() || STI.isPPC64())
+ return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
+
+ // SVR4 ABI:
+ return 8;
+}
+
+static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
+ if (STI.isDarwinABI())
+ return STI.isPPC64() ? -16U : -8U;
+
+ // SVR4 ABI: First slot in the general register save area.
+ return STI.isPPC64()
+ ? -16U
+ : (STI.getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ ? -12U
+ : -8U;
+}
+
+PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ STI.getPlatformStackAlignment(), 0),
+ Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
+ TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
+ FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
+ LinkageSize(computeLinkageSize(Subtarget)),
+ BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {}
+
+// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
+ unsigned &NumEntries) const {
+ if (Subtarget.isDarwinABI()) {
+ NumEntries = 1;
+ if (Subtarget.isPPC64()) {
+ static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+ return &darwin64Offsets;
+ } else {
+ static const SpillSlot darwinOffsets = {PPC::R31, -4};
+ return &darwinOffsets;
+ }
+ }
+
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ NumEntries = 0;
+ return nullptr;
+ }
+
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
+ static const SpillSlot Offsets[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::R31, -4},
+ {PPC::R30, -8},
+ {PPC::R29, -12},
+ {PPC::R28, -16},
+ {PPC::R27, -20},
+ {PPC::R26, -24},
+ {PPC::R25, -28},
+ {PPC::R24, -32},
+ {PPC::R23, -36},
+ {PPC::R22, -40},
+ {PPC::R21, -44},
+ {PPC::R20, -48},
+ {PPC::R19, -52},
+ {PPC::R18, -56},
+ {PPC::R17, -60},
+ {PPC::R16, -64},
+ {PPC::R15, -68},
+ {PPC::R14, -72},
+
+ // CR save area offset. We map each of the nonvolatile CR fields
+ // to the slot for CR2, which is the first of the nonvolatile CR
+ // fields to be assigned, so that we only allocate one save slot.
+ // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
+ {PPC::CR2, -4},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ static const SpillSlot Offsets64[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::X31, -8},
+ {PPC::X30, -16},
+ {PPC::X29, -24},
+ {PPC::X28, -32},
+ {PPC::X27, -40},
+ {PPC::X26, -48},
+ {PPC::X25, -56},
+ {PPC::X24, -64},
+ {PPC::X23, -72},
+ {PPC::X22, -80},
+ {PPC::X21, -88},
+ {PPC::X20, -96},
+ {PPC::X19, -104},
+ {PPC::X18, -112},
+ {PPC::X17, -120},
+ {PPC::X16, -128},
+ {PPC::X15, -136},
+ {PPC::X14, -144},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(Offsets64);
+
+ return Offsets64;
+ } else {
+ NumEntries = array_lengthof(Offsets);
+
+ return Offsets;
+ }
+}
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function. Remove all of the VRSAVE related code from the function.
+/// FIXME: The removal of the code results in a compile failure at -O0 when the
+/// function contains a function call, as the GPR containing original VRSAVE
+/// contents is spilled and reloaded around the call. Without the prolog code,
+/// the spill instruction refers to an undefined register. This code needs
+/// to account for all uses of that GPR.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+ MachineBasicBlock *Entry = MI->getParent();
+ MachineFunction *MF = Entry->getParent();
+
+ // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
+ MachineBasicBlock::iterator MBBI = MI;
+ ++MBBI;
+ assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+ MBBI->eraseFromParent();
+
+ bool RemovedAllMTVRSAVEs = true;
+ // See if we can find and remove the MTVRSAVE instruction from all of the
+ // epilog blocks.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (I->isReturnBlock()) {
+ bool FoundIt = false;
+ for (MBBI = I->end(); MBBI != I->begin(); ) {
+ --MBBI;
+ if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+ MBBI->eraseFromParent(); // remove it.
+ FoundIt = true;
+ break;
+ }
+ }
+ RemovedAllMTVRSAVEs &= FoundIt;
+ }
+ }
+
+ // If we found and removed all MTVRSAVE instructions, remove the read of
+ // VRSAVE as well.
+ if (RemovedAllMTVRSAVEs) {
+ MBBI = MI;
+ assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+ --MBBI;
+ assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+ MBBI->eraseFromParent();
+ }
+
+ // Finally, nuke the UPDATE_VRSAVE.
+ MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector. Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned UsedRegMask = 0;
+ for (unsigned i = 0; i != 32; ++i)
+ if (MRI.isPhysRegModified(VRRegNo[i]))
+ UsedRegMask |= 1 << (31-i);
+
+ // Live in and live out values already must be in the mask, so don't bother
+ // marking them.
+ for (MachineRegisterInfo::livein_iterator
+ I = MF->getRegInfo().livein_begin(),
+ E = MF->getRegInfo().livein_end(); I != E; ++I) {
+ unsigned RegNo = TRI->getEncodingValue(I->first);
+ if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+
+ // Live out registers appear as use operands on return instructions.
+ for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
+ UsedRegMask != 0 && BI != BE; ++BI) {
+ const MachineBasicBlock &MBB = *BI;
+ if (!MBB.isReturnBlock())
+ continue;
+ const MachineInstr &Ret = MBB.back();
+ for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = Ret.getOperand(I);
+ if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
+ continue;
+ unsigned RegNo = TRI->getEncodingValue(MO.getReg());
+ UsedRegMask &= ~(1 << (31-RegNo));
+ }
+ }
+
+ // If no registers are used, turn this into a copy.
+ if (UsedRegMask == 0) {
+ // Remove all VRSAVE code.
+ RemoveVRSaveCode(MI);
+ return;
+ }
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask);
+ } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+ } else {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(DstReg, RegState::Kill)
+ .addImm(UsedRegMask & 0xFFFF);
+ }
+
+ // Remove the old UPDATE_VRSAVE instruction.
+ MI->eraseFromParent();
+}
+
+static bool spillsCR(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isCRSpilled();
+}
+
+static bool spillsVRSAVE(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isVRSAVESpilled();
+}
+
+static bool hasSpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasSpills();
+}
+
+static bool hasNonRISpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasNonRISpills();
+}
+
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+ const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+ // We need a save/restore of LR if there is any def of LR (which is
+ // defined by calls, including the PIC setup sequence), or if there is
+ // some use of the LR stack slot (e.g. for builtin_return_address).
+ // (LR comes in 32 and 64 bit versions.)
+ MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF,
+ bool UseEstimate) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize =
+ UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
+
+ // Get stack alignments. The frame must be aligned to the greatest of these:
+ unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
+ unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame
+ unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
+
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+
+ // If we are a leaf function, and use up to 224 bytes of stack space,
+ // don't have a frame pointer, calls, or dynamic alloca then we do not need
+ // to adjust the stack pointer (we fit in the Red Zone).
+ // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
+ // stackless code if all local vars are reg-allocated.
+ bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+ unsigned LR = RegInfo->getRARegister();
+ if (!DisableRedZone &&
+ (Subtarget.isPPC64() || // 32-bit SVR4, no stack-
+ !Subtarget.isSVR4ABI() || // allocated locals.
+ FrameSize == 0) &&
+ FrameSize <= 224 && // Fits in red zone.
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !MustSaveLR(MF, LR) &&
+ !RegInfo->hasBasePointer(MF)) { // No special alignment.
+ // No need for frame
+ if (UpdateMF)
+ MFI->setStackSize(0);
+ return 0;
+ }
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // Maximum call frame needs to be at least big enough for linkage area.
+ unsigned minCallFrameSize = getLinkageSize();
+ maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ if (UpdateMF)
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ if (UpdateMF)
+ MFI->setStackSize(FrameSize);
+
+ return FrameSize;
+}
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.
+bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // FIXME: This is pretty much broken by design: hasFP() might be called really
+ // early, before the stack layout was calculated and thus hasFP() might return
+ // true or false here depending on the time of call.
+ return (MFI->getStackSize()) && needsFP(MF);
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Naked functions have no stack frame pushed, so we don't have a frame
+ // pointer.
+ if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
+ return false;
+
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->hasStackMap() || MFI->hasPatchPoint() ||
+ (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+}
+
+void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
+ bool is31 = needsFP(MF);
+ unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
+ unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
+
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ bool HasBP = RegInfo->hasBasePointer(MF);
+ unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
+ unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI)
+ for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+ --MBBI;
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MBBI->getOperand(I);
+ if (!MO.isReg())
+ continue;
+
+ switch (MO.getReg()) {
+ case PPC::FP:
+ MO.setReg(FPReg);
+ break;
+ case PPC::FP8:
+ MO.setReg(FP8Reg);
+ break;
+ case PPC::BP:
+ MO.setReg(BPReg);
+ break;
+ case PPC::BP8:
+ MO.setReg(BP8Reg);
+ break;
+
+ }
+ }
+ }
+}
+
+bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
+ bool UseAtEnd,
+ unsigned *ScratchRegister) const {
+ RegScavenger RS;
+ unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
+
+ if (ScratchRegister)
+ *ScratchRegister = R0;
+
+ // If MBB is an entry or exit block, use R0 as the scratch register
+ if ((UseAtEnd && MBB->isReturnBlock()) ||
+ (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
+ return true;
+
+ RS.enterBasicBlock(MBB);
+
+ if (UseAtEnd && !MBB->empty()) {
+ // The scratch register will be used at the end of the block, so must consider
+ // all registers used within the block
+
+ MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
+ // If no terminator, back iterator up to previous instruction.
+ if (MBBI == MBB->end())
+ MBBI = std::prev(MBBI);
+
+ if (MBBI != MBB->begin())
+ RS.forward(MBBI);
+ }
+
+ if (!RS.isRegUsed(R0))
+ return true;
+
+ unsigned Reg = RS.FindUnusedReg(Subtarget.isPPC64() ? &PPC::G8RCRegClass
+ : &PPC::GPRCRegClass);
+
+ // Make sure the register scavenger was able to find an available register
+ // If not, use R0 but return false to indicate no register was available and
+ // R0 must be used (as recommended by the ABI)
+ if (Reg == 0)
+ return false;
+
+ if (ScratchRegister)
+ *ScratchRegister = Reg;
+
+ return true;
+}
+
+bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+
+ return findScratchRegister(TmpMBB, false, nullptr);
+}
+
+bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+
+ return findScratchRegister(TmpMBB, true, nullptr);
+}
+
+void PPCFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ DebugLoc dl;
+ bool needsCFI = MMI.hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get the ABI.
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
+ assert((Subtarget.isDarwinABI() || isSVR4ABI) &&
+ "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
+
+ // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
+ // process it.
+ if (!isSVR4ABI)
+ for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+ if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ HandleVRSaveUpdate(MBBI, TII);
+ break;
+ }
+ }
+
+ // Move MBBI back to the beginning of the prologue block.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ unsigned FrameSize = determineFrameLayout(MF);
+ int NegFrameSize = -FrameSize;
+ if (!isInt<32>(NegFrameSize))
+ llvm_unreachable("Unhandled stack size!");
+
+ if (MFI->isFrameAddressTaken())
+ replaceFPWithRealFP(MF);
+
+ // Check if the link register (LR) must be saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ // Do we have a frame pointer and/or base pointer for this function?
+ bool HasFP = hasFP(MF);
+ bool HasBP = RegInfo->hasBasePointer(MF);
+
+ unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned BPReg = RegInfo->getBaseRegister(MF);
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
+ unsigned ScratchReg = 0;
+ unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
+ const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
+ : PPC::MFLR );
+ const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
+ : PPC::STW );
+ const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
+ : PPC::STWU );
+ const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
+ : PPC::STWUX);
+ const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
+ : PPC::LIS );
+ const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
+ : PPC::ORI );
+ const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
+ : PPC::OR );
+ const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
+ : PPC::SUBFC);
+ const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
+ : PPC::SUBFIC);
+
+ // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
+ // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
+ // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
+ // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
+ assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
+ "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
+
+ findScratchRegister(&MBB, false, &ScratchReg);
+ assert(ScratchReg && "No scratch register!");
+
+ int LROffset = getReturnSaveOffset();
+
+ int FPOffset = 0;
+ if (HasFP) {
+ if (isSVR4ABI) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset = getFramePointerSaveOffset();
+ }
+ }
+
+ int BPOffset = 0;
+ if (HasBP) {
+ if (isSVR4ABI) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int BPIndex = FI->getBasePointerSaveIndex();
+ assert(BPIndex && "No Base Pointer Save Slot!");
+ BPOffset = FFI->getObjectOffset(BPIndex);
+ } else {
+ BPOffset = getBasePointerSaveOffset();
+ }
+ }
+
+ int PBPOffset = 0;
+ if (FI->usesPICBase()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int PBPIndex = FI->getPICBasePointerSaveIndex();
+ assert(PBPIndex && "No PIC Base Pointer Save Slot!");
+ PBPOffset = FFI->getObjectOffset(PBPIndex);
+ }
+
+ // Get stack alignments.
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ if (HasBP && MaxAlign > 1)
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+
+ // Frames of 32KB & larger require special handling because they cannot be
+ // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
+ bool isLargeFrame = !isInt<16>(NegFrameSize);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
+
+ assert((isPPC64 || MustSaveCRs.empty()) &&
+ "Prologue CR saving supported only in 64-bit mode");
+
+ if (!MustSaveCRs.empty()) { // will only occur for PPC64
+ // FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
+ // If only one or two CR fields are clobbered, it could be more
+ // efficient to use mfocrf to selectively save just those fields.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
+ }
+
+ if (HasFP)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(FPReg)
+ .addImm(FPOffset)
+ .addReg(SPReg);
+
+ if (FI->usesPICBase())
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(PPC::R30)
+ .addImm(PBPOffset)
+ .addReg(SPReg);
+
+ if (HasBP)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(BPReg)
+ .addImm(BPOffset)
+ .addReg(SPReg);
+
+ if (MustSaveLR)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(ScratchReg)
+ .addImm(LROffset)
+ .addReg(SPReg);
+
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+ .addReg(TempReg, getKillRegState(true))
+ .addImm(8)
+ .addReg(SPReg);
+
+ // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
+ if (!FrameSize) return;
+
+ // Adjust stack pointer: r1 += NegFrameSize.
+ // If there is a preferred stack alignment, align R1 now
+
+ if (HasBP) {
+ // Save a copy of r1 as the base pointer.
+ BuildMI(MBB, MBBI, dl, OrInst, BPReg)
+ .addReg(SPReg)
+ .addReg(SPReg);
+ }
+
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(64 - Log2_32(MaxAlign));
+ else // PPC32...
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(32 - Log2_32(MaxAlign))
+ .addImm(31);
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
+ .addReg(TempReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(TempReg, RegState::Kill);
+ }
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+
+ } else if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+ .addReg(SPReg)
+ .addImm(NegFrameSize)
+ .addReg(SPReg);
+
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ }
+
+ // Add Call Frame Information for the instructions we generated above.
+ if (needsCFI) {
+ unsigned CFIIndex;
+
+ if (HasBP) {
+ // Define CFA in terms of BP. Do this in preference to using FP/SP,
+ // because if the stack needed aligning then CFA won't be at a fixed
+ // offset from FP/SP.
+ unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ } else {
+ // Adjust the definition of CFA to account for the change in SP.
+ assert(NegFrameSize);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
+ }
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ if (HasFP) {
+ // Describe where FP was saved, at a fixed offset from CFA.
+ unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
+ if (FI->usesPICBase()) {
+ // Describe where FP was saved, at a fixed offset from CFA.
+ unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
+ if (HasBP) {
+ // Describe where BP was saved, at a fixed offset from CFA.
+ unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
+ if (MustSaveLR) {
+ // Describe where LR was saved, at a fixed offset from CFA.
+ unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+ }
+
+ // If there is a frame pointer, copy R1 into R31
+ if (HasFP) {
+ BuildMI(MBB, MBBI, dl, OrInst, FPReg)
+ .addReg(SPReg)
+ .addReg(SPReg);
+
+ if (!HasBP && needsCFI) {
+ // Change the definition of CFA from SP+offset to FP+offset, because SP
+ // will change at every alloca.
+ unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+ }
+
+ if (needsCFI) {
+ // Describe where callee saved registers were saved, at fixed offsets from
+ // CFA.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+
+ // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
+ // subregisters of CR2. We just need to emit a move of CR2.
+ if (PPC::CRBITRCRegClass.contains(Reg))
+ continue;
+
+ // For SVR4, don't emit a move for the CR spill slot if we haven't
+ // spilled CRs.
+ if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
+ && MustSaveCRs.empty())
+ continue;
+
+ // For 64-bit SVR4 when we have spilled CRs, the spill location
+ // is SP+8, not a frame-relative slot.
+ if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
+ // the whole CR word. In the ELFv2 ABI, every CR that was
+ // actually saved gets its own CFI record.
+ unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ continue;
+ }
+
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+ }
+}
+
+void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl;
+
+ if (MBBI != MBB.end())
+ dl = MBBI->getDebugLoc();
+
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+
+ // Get alignment info so we know how to restore the SP.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes allocated from the FrameInfo.
+ int FrameSize = MFI->getStackSize();
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get the ABI.
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+
+ // Check if the link register (LR) has been saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ // Do we have a frame pointer and/or base pointer for this function?
+ bool HasFP = hasFP(MF);
+ bool HasBP = RegInfo->hasBasePointer(MF);
+
+ unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned BPReg = RegInfo->getBaseRegister(MF);
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned ScratchReg = 0;
+ unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
+ : PPC::MTLR );
+ const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
+ : PPC::LWZ );
+ const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
+ : PPC::LIS );
+ const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
+ : PPC::ORI );
+ const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
+ : PPC::ADDI );
+ const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
+ : PPC::ADD4 );
+
+ int LROffset = getReturnSaveOffset();
+
+ int FPOffset = 0;
+
+ findScratchRegister(&MBB, true, &ScratchReg);
+ assert(ScratchReg && "No scratch register!");
+
+ if (HasFP) {
+ if (isSVR4ABI) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset = getFramePointerSaveOffset();
+ }
+ }
+
+ int BPOffset = 0;
+ if (HasBP) {
+ if (isSVR4ABI) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int BPIndex = FI->getBasePointerSaveIndex();
+ assert(BPIndex && "No Base Pointer Save Slot!");
+ BPOffset = FFI->getObjectOffset(BPIndex);
+ } else {
+ BPOffset = getBasePointerSaveOffset();
+ }
+ }
+
+ int PBPOffset = 0;
+ if (FI->usesPICBase()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int PBPIndex = FI->getPICBasePointerSaveIndex();
+ assert(PBPIndex && "No PIC Base Pointer Save Slot!");
+ PBPOffset = FFI->getObjectOffset(PBPIndex);
+ }
+
+ bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
+
+ if (IsReturnBlock) {
+ unsigned RetOpcode = MBBI->getOpcode();
+ bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8;
+
+ if (UsesTCRet) {
+ int MaxTCRetDelta = FI->getTailCallSPDelta();
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int Delta = StackAdj - MaxTCRetDelta;
+ assert((Delta >= 0) && "Delta must be positive");
+ if (MaxTCRetDelta>0)
+ FrameSize += (StackAdj +Delta);
+ else
+ FrameSize += StackAdj;
+ }
+ }
+
+ // Frames of 32KB & larger require special handling because they cannot be
+ // indexed into with a simple LD/LWZ immediate offset operand.
+ bool isLargeFrame = !isInt<16>(FrameSize);
+
+ if (FrameSize) {
+ // In the prologue, the loaded (or persistent) stack pointer value is offset
+ // by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now.
+
+ // If this function contained a fastcc call and GuaranteedTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall()) {
+ assert(HasFP && "Expecting a valid frame pointer.");
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(FPReg).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, AddInst)
+ .addReg(SPReg)
+ .addReg(FPReg)
+ .addReg(ScratchReg);
+ }
+ } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg)
+ .addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadInst, SPReg)
+ .addImm(0)
+ .addReg(SPReg);
+ }
+ }
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+ .addImm(LROffset)
+ .addReg(SPReg);
+
+ assert((isPPC64 || MustSaveCRs.empty()) &&
+ "Epilogue CR restoring supported only in 64-bit mode");
+
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
+ .addImm(8)
+ .addReg(SPReg);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
+ .addImm(FPOffset)
+ .addReg(SPReg);
+
+ if (FI->usesPICBase())
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, LoadInst)
+ .addReg(PPC::R30)
+ .addImm(PBPOffset)
+ .addReg(SPReg);
+
+ if (HasBP)
+ BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
+ .addImm(BPOffset)
+ .addReg(SPReg);
+
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
+ .addReg(TempReg, getKillRegState(i == e-1));
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
+
+ // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+ // call optimization
+ if (IsReturnBlock) {
+ unsigned RetOpcode = MBBI->getOpcode();
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
+ MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+
+ if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg).addImm(CallerAllocatedAmt);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(CallerAllocatedAmt >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(CallerAllocatedAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, AddInst)
+ .addReg(SPReg)
+ .addReg(FPReg)
+ .addReg(ScratchReg);
+ }
+ } else if (RetOpcode == PPC::TCRETURNdi) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+ } else if (RetOpcode == PPC::TCRETURNai) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+ } else if (RetOpcode == PPC::TCRETURNdi8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+ } else if (RetOpcode == PPC::TCRETURNai8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
+ }
+}
+
+void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+
+ // Save and clear the LR state.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned LR = RegInfo->getRARegister();
+ FI->setMustSaveLR(MustSaveLR(MF, LR));
+ SavedRegs.reset(LR);
+
+ // Save R31 if necessary
+ int FPSI = FI->getFramePointerSaveIndex();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI && needsFP(MF)) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = getFramePointerSaveOffset();
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+
+ int BPSI = FI->getBasePointerSaveIndex();
+ if (!BPSI && RegInfo->hasBasePointer(MF)) {
+ int BPOffset = getBasePointerSaveOffset();
+ // Allocate the frame index for the base pointer save area.
+ BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
+ // Save the result.
+ FI->setBasePointerSaveIndex(BPSI);
+ }
+
+ // Reserve stack space for the PIC Base register (R30).
+ // Only used in SVR4 32-bit.
+ if (FI->usesPICBase()) {
+ int PBPSI = MFI->CreateFixedObject(4, -8, true);
+ FI->setPICBasePointerSaveIndex(PBPSI);
+ }
+
+ // Reserve stack space to move the linkage area to in case of a tail call.
+ int TCSPDelta = 0;
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+ MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
+ }
+
+ // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
+ // function uses CR 2, 3, or 4.
+ if (!isPPC64 && !isDarwinABI &&
+ (SavedRegs.test(PPC::CR2) ||
+ SavedRegs.test(PPC::CR3) ||
+ SavedRegs.test(PPC::CR4))) {
+ int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+ FI->setCRSpillFrameIndex(FrameIdx);
+ }
+}
+
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ addScavengingSpillSlot(MF, RS);
+ return;
+ }
+
+ // Get callee saved register information.
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty() && !needsFP(MF)) {
+ addScavengingSpillSlot(MF, RS);
+ return;
+ }
+
+ unsigned MinGPR = PPC::R31;
+ unsigned MinG8R = PPC::X31;
+ unsigned MinFPR = PPC::F31;
+ unsigned MinVR = PPC::V31;
+
+ bool HasGPSaveArea = false;
+ bool HasG8SaveArea = false;
+ bool HasFPSaveArea = false;
+ bool HasVRSAVESaveArea = false;
+ bool HasVRSaveArea = false;
+
+ SmallVector<CalleeSavedInfo, 18> GPRegs;
+ SmallVector<CalleeSavedInfo, 18> G8Regs;
+ SmallVector<CalleeSavedInfo, 18> FPRegs;
+ SmallVector<CalleeSavedInfo, 18> VRegs;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (PPC::GPRCRegClass.contains(Reg)) {
+ HasGPSaveArea = true;
+
+ GPRegs.push_back(CSI[i]);
+
+ if (Reg < MinGPR) {
+ MinGPR = Reg;
+ }
+ } else if (PPC::G8RCRegClass.contains(Reg)) {
+ HasG8SaveArea = true;
+
+ G8Regs.push_back(CSI[i]);
+
+ if (Reg < MinG8R) {
+ MinG8R = Reg;
+ }
+ } else if (PPC::F8RCRegClass.contains(Reg)) {
+ HasFPSaveArea = true;
+
+ FPRegs.push_back(CSI[i]);
+
+ if (Reg < MinFPR) {
+ MinFPR = Reg;
+ }
+ } else if (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)) {
+ ; // do nothing, as we already know whether CRs are spilled
+ } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
+ HasVRSAVESaveArea = true;
+ } else if (PPC::VRRCRegClass.contains(Reg)) {
+ HasVRSaveArea = true;
+
+ VRegs.push_back(CSI[i]);
+
+ if (Reg < MinVR) {
+ MinVR = Reg;
+ }
+ } else {
+ llvm_unreachable("Unknown RegisterClass!");
+ }
+ }
+
+ PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+ int64_t LowerBound = 0;
+
+ // Take into account stack space reserved for tail calls.
+ int TCSPDelta = 0;
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+ LowerBound = TCSPDelta;
+ }
+
+ // The Floating-point register save area is right below the back chain word
+ // of the previous stack frame.
+ if (HasFPSaveArea) {
+ for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
+ int FI = FPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
+ }
+
+ // Check whether the frame pointer register is allocated. If so, make sure it
+ // is spilled to the correct offset.
+ if (needsFP(MF)) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getFramePointerSaveIndex();
+ assert(FI && "No Frame Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ if (PFI->usesPICBase()) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getPICBasePointerSaveIndex();
+ assert(FI && "No PIC Base Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ if (RegInfo->hasBasePointer(MF)) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getBasePointerSaveIndex();
+ assert(FI && "No Base Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // General register save area starts right below the Floating-point
+ // register save area.
+ if (HasGPSaveArea || HasG8SaveArea) {
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
+ int FI = GPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
+ int FI = G8Regs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ unsigned MinReg =
+ std::min<unsigned>(TRI->getEncodingValue(MinGPR),
+ TRI->getEncodingValue(MinG8R));
+
+ if (Subtarget.isPPC64()) {
+ LowerBound -= (31 - MinReg + 1) * 8;
+ } else {
+ LowerBound -= (31 - MinReg + 1) * 4;
+ }
+ }
+
+ // For 32-bit only, the CR save area is below the general register
+ // save area. For 64-bit SVR4, the CR save area is addressed relative
+ // to the stack pointer and hence does not need an adjustment here.
+ // Only CR2 (the first nonvolatile spilled) has an associated frame
+ // index so that we have a single uniform save area.
+ if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
+ // Adjust the frame index of the CR spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
+ // Leave Darwin logic as-is.
+ || (!Subtarget.isSVR4ABI() &&
+ (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)))) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The CR save area is always 4 bytes long.
+ }
+
+ if (HasVRSAVESaveArea) {
+ // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+ // which have the VRSAVE register class?
+ // Adjust the frame index of the VRSAVE spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if (PPC::VRSAVERCRegClass.contains(Reg)) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
+ }
+
+ if (HasVRSaveArea) {
+ // Insert alignment padding, we need 16-byte alignment.
+ LowerBound = (LowerBound - 15) & ~(15);
+
+ for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
+ int FI = VRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ addScavengingSpillSlot(MF, RS);
+}
+
+void
+PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+
+ // We need to have a scavenger spill slot for spills if the frame size is
+ // large. In case there is no free register for large-offset addressing,
+ // this slot is used for the necessary emergency spill. Also, we need the
+ // slot for dynamic stack allocations.
+
+ // The scavenger might be invoked if the frame offset does not fit into
+ // the 16-bit immediate. We don't know the complete frame size here
+ // because we've not yet computed callee-saved register spills or the
+ // needed alignment padding.
+ unsigned StackSize = determineFrameLayout(MF, false, true);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
+ hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ // Might we have over-aligned allocas?
+ bool HasAlVars = MFI->hasVarSizedObjects() &&
+ MFI->getMaxAlignment() > getStackAlignment();
+
+ // These kinds of spills might need two registers.
+ if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ }
+}
+
+bool
+PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ // Currently, this function only handles SVR4 32- and 64-bit ABIs.
+ // Return false otherwise to maintain pre-existing behavior.
+ if (!Subtarget.isSVR4ABI())
+ return false;
+
+ MachineFunction *MF = MBB.getParent();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ DebugLoc DL;
+ bool CRSpilled = false;
+ MachineInstrBuilder CRMIB;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ // Only Darwin actually uses the VRSAVE register, but it can still appear
+ // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
+ // Darwin, ignore it.
+ if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ continue;
+
+ // CR2 through CR4 are the nonvolatile CR fields.
+ bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
+
+ // Add the callee-saved register as live-in; it's killed at the spill.
+ MBB.addLiveIn(Reg);
+
+ if (CRSpilled && IsCRField) {
+ CRMIB.addReg(Reg, RegState::ImplicitKill);
+ continue;
+ }
+
+ // Insert the spill to the stack frame.
+ if (IsCRField) {
+ PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
+ if (Subtarget.isPPC64()) {
+ // The actual spill will happen at the start of the prologue.
+ FuncInfo->addMustSaveCR(Reg);
+ } else {
+ CRSpilled = true;
+ FuncInfo->setSpillsCR();
+
+ // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
+ // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
+ CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
+ .addReg(Reg, RegState::ImplicitKill);
+
+ MBB.insert(MI, CRMIB);
+ MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
+ .addReg(PPC::R12,
+ getKillRegState(true)),
+ CSI[i].getFrameIdx()));
+ }
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
+ }
+ return true;
+}
+
+static void
+restoreCRs(bool isPPC64, bool is31,
+ bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
+
+ MachineFunction *MF = MBB.getParent();
+ const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
+ DebugLoc DL;
+ unsigned RestoreOp, MoveReg;
+
+ if (isPPC64)
+ // This is handled during epilogue generation.
+ return;
+ else {
+ // 32-bit: FP-relative
+ MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
+ PPC::R12),
+ CSI[CSIIndex].getFrameIdx()));
+ RestoreOp = PPC::MTOCRF;
+ MoveReg = PPC::R12;
+ }
+
+ if (CR2Spilled)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
+ .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
+
+ if (CR3Spilled)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
+ .addReg(MoveReg, getKillRegState(!CR4Spilled)));
+
+ if (CR4Spilled)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
+ .addReg(MoveReg, getKillRegState(true)));
+}
+
+void PPCFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually subtract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+
+ if (isInt<16>(CalleeAmt)) {
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addImm(CalleeAmt);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addReg(TmpReg);
+ }
+ }
+ }
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+bool
+PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ // Currently, this function only handles SVR4 32- and 64-bit ABIs.
+ // Return false otherwise to maintain pre-existing behavior.
+ if (!Subtarget.isSVR4ABI())
+ return false;
+
+ MachineFunction *MF = MBB.getParent();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ bool CR2Spilled = false;
+ bool CR3Spilled = false;
+ bool CR4Spilled = false;
+ unsigned CSIIndex = 0;
+
+ // Initialize insertion-point logic; we will be restoring in reverse
+ // order of spill.
+ MachineBasicBlock::iterator I = MI, BeforeI = I;
+ bool AtStart = I == MBB.begin();
+
+ if (!AtStart)
+ --BeforeI;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ // Only Darwin actually uses the VRSAVE register, but it can still appear
+ // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
+ // Darwin, ignore it.
+ if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ continue;
+
+ if (Reg == PPC::CR2) {
+ CR2Spilled = true;
+ // The spill slot is associated only with CR2, which is the
+ // first nonvolatile spilled. Save it here.
+ CSIIndex = i;
+ continue;
+ } else if (Reg == PPC::CR3) {
+ CR3Spilled = true;
+ continue;
+ } else if (Reg == PPC::CR4) {
+ CR4Spilled = true;
+ continue;
+ } else {
+ // When we first encounter a non-CR register after seeing at
+ // least one CR register, restore all spilled CRs together.
+ if ((CR2Spilled || CR3Spilled || CR4Spilled)
+ && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ bool is31 = needsFP(*MF);
+ restoreCRs(Subtarget.isPPC64(), is31,
+ CR2Spilled, CR3Spilled, CR4Spilled,
+ MBB, I, CSI, CSIIndex);
+ CR2Spilled = CR3Spilled = CR4Spilled = false;
+ }
+
+ // Default behavior for non-CR saves.
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
+ RC, TRI);
+ assert(I != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ }
+
+ // Insert in reverse order.
+ if (AtStart)
+ I = MBB.begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+
+ // If we haven't yet spilled the CRs, do so now.
+ if (CR2Spilled || CR3Spilled || CR4Spilled) {
+ bool is31 = needsFP(*MF);
+ restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
+ MBB, I, CSI, CSIIndex);
+ }
+
+ return true;
+}
+
+bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+ return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
+ MF.getSubtarget<PPCSubtarget>().isPPC64());
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
new file mode 100644
index 0000000..bbe1329
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -0,0 +1,129 @@
+//===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
+
+#include "PPC.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class PPCSubtarget;
+
+class PPCFrameLowering: public TargetFrameLowering {
+ const PPCSubtarget &Subtarget;
+ const unsigned ReturnSaveOffset;
+ const unsigned TOCSaveOffset;
+ const unsigned FramePointerSaveOffset;
+ const unsigned LinkageSize;
+ const unsigned BasePointerSaveOffset;
+
+ /**
+ * \brief Find a register that can be used in function prologue and epilogue
+ *
+ * Find a register that can be use as the scratch register in function
+ * prologue and epilogue to save various registers (Link Register, Base
+ * Pointer, etc.). Prefer R0, if it is available. If it is not available,
+ * then choose a different register.
+ *
+ * This method will return true if an available register was found (including
+ * R0). If no available registers are found, the method returns false and sets
+ * ScratchRegister to R0, as per the recommendation in the ABI.
+ *
+ * \param[in] MBB The machine basic block to find an available register for
+ * \param[in] UseAtEnd Specify whether the scratch register will be used at
+ * the end of the basic block (i.e., will the scratch
+ * register kill a register defined in the basic block)
+ * \param[out] ScratchRegister The scratch register to use
+ * \return true if a scratch register was found. false of a scratch register
+ * was not found and R0 is being used as the default.
+ */
+ bool findScratchRegister(MachineBasicBlock *MBB,
+ bool UseAtEnd,
+ unsigned *ScratchRegister) const;
+
+public:
+ PPCFrameLowering(const PPCSubtarget &STI);
+
+ unsigned determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF = true,
+ bool UseEstimate = false) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ bool hasFP(const MachineFunction &MF) const override;
+ bool needsFP(const MachineFunction &MF) const;
+ void replaceFPWithRealFP(MachineFunction &MF) const;
+
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS = nullptr) const override;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = nullptr) const override;
+ void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const override { return true; }
+
+ /// getReturnSaveOffset - Return the previous frame offset to save the
+ /// return address.
+ unsigned getReturnSaveOffset() const { return ReturnSaveOffset; }
+
+ /// getTOCSaveOffset - Return the previous frame offset to save the
+ /// TOC register -- 64-bit SVR4 ABI only.
+ unsigned getTOCSaveOffset() const { return TOCSaveOffset; }
+
+ /// getFramePointerSaveOffset - Return the previous frame offset to save the
+ /// frame pointer.
+ unsigned getFramePointerSaveOffset() const { return FramePointerSaveOffset; }
+
+ /// getBasePointerSaveOffset - Return the previous frame offset to save the
+ /// base pointer.
+ unsigned getBasePointerSaveOffset() const { return BasePointerSaveOffset; }
+
+ /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
+ ///
+ unsigned getLinkageSize() const { return LinkageSize; }
+
+ const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
+
+ bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
+ /// Methods used by shrink wrapping to determine if MBB can be used for the
+ /// function prologue/epilogue.
+ bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
+ bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
new file mode 100644
index 0000000..7234e30
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -0,0 +1,433 @@
+//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCHazardRecognizers.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
+ // FIXME: Move this.
+ if (isBCTRAfterSet(SU))
+ return true;
+
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID)
+ return false;
+
+ if (!MCID->mayLoad())
+ return false;
+
+ // SU is a load; for any predecessors in this dispatch group, that are stores,
+ // and with which we have an ordering dependency, return true.
+ for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
+ const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
+ if (!PredMCID || !PredMCID->mayStore())
+ continue;
+
+ if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
+ continue;
+
+ for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
+ if (SU->Preds[i].getSUnit() == CurGroup[j])
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID)
+ return false;
+
+ if (!MCID->isBranch())
+ return false;
+
+ // SU is a branch; for any predecessors in this dispatch group, with which we
+ // have a data dependence and set the counter register, return true.
+ for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
+ const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
+ if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
+ continue;
+
+ if (SU->Preds[i].isCtrl())
+ continue;
+
+ for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
+ if (SU->Preds[i].getSUnit() == CurGroup[j])
+ return true;
+ }
+
+ return false;
+}
+
+// FIXME: Remove this when we don't need this:
+namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }
+
+// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
+
+bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
+ unsigned &NSlots) {
+ // FIXME: Indirectly, this information is contained in the itinerary, and
+ // we should derive it from there instead of separately specifying it
+ // here.
+ unsigned IIC = MCID->getSchedClass();
+ switch (IIC) {
+ default:
+ NSlots = 1;
+ break;
+ case PPC::Sched::IIC_IntDivW:
+ case PPC::Sched::IIC_IntDivD:
+ case PPC::Sched::IIC_LdStLoadUpd:
+ case PPC::Sched::IIC_LdStLDU:
+ case PPC::Sched::IIC_LdStLFDU:
+ case PPC::Sched::IIC_LdStLFDUX:
+ case PPC::Sched::IIC_LdStLHA:
+ case PPC::Sched::IIC_LdStLHAU:
+ case PPC::Sched::IIC_LdStLWA:
+ case PPC::Sched::IIC_LdStSTDU:
+ case PPC::Sched::IIC_LdStSTFDU:
+ NSlots = 2;
+ break;
+ case PPC::Sched::IIC_LdStLoadUpdX:
+ case PPC::Sched::IIC_LdStLDUX:
+ case PPC::Sched::IIC_LdStLHAUX:
+ case PPC::Sched::IIC_LdStLWARX:
+ case PPC::Sched::IIC_LdStLDARX:
+ case PPC::Sched::IIC_LdStSTDUX:
+ case PPC::Sched::IIC_LdStSTDCX:
+ case PPC::Sched::IIC_LdStSTWCX:
+ case PPC::Sched::IIC_BrMCRX: // mtcr
+ // FIXME: Add sync/isync (here and in the itinerary).
+ NSlots = 4;
+ break;
+ }
+
+ // FIXME: record-form instructions need a different itinerary class.
+ if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
+ NSlots = 2;
+
+ switch (IIC) {
+ default:
+ // All multi-slot instructions must come first.
+ return NSlots > 1;
+ case PPC::Sched::IIC_BrCR: // cr logicals
+ case PPC::Sched::IIC_SprMFCR:
+ case PPC::Sched::IIC_SprMFCRF:
+ case PPC::Sched::IIC_SprMTSPR:
+ return true;
+ }
+}
+
+ScheduleHazardRecognizer::HazardType
+PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (Stalls == 0 && isLoadAfterStore(SU))
+ return NoopHazard;
+
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ unsigned NSlots;
+ if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
+ return true;
+
+ return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
+}
+
+unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
+ // We only need to fill out a maximum of 5 slots here: The 6th slot could
+ // only be a second branch, and otherwise the next instruction will start a
+ // new group.
+ if (isLoadAfterStore(SU) && CurSlots < 6) {
+ unsigned Directive =
+ DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ // If we're using a special group-terminating nop, then we need only one.
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
+ Directive == PPC::DIR_PWR8 )
+ return 1;
+
+ return 5 - CurSlots;
+ }
+
+ return ScoreboardHazardRecognizer::PreEmitNoops(SU);
+}
+
+void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (MCID) {
+ if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ } else {
+ DEBUG(dbgs() << "**** Adding to dispatch group: SU(" <<
+ SU->NodeNum << "): ");
+ DEBUG(DAG->dumpNode(SU));
+
+ unsigned NSlots;
+ bool MustBeFirst = mustComeFirst(MCID, NSlots);
+
+ // If this instruction must come first, but does not, then it starts a
+ // new group.
+ if (MustBeFirst && CurSlots) {
+ CurSlots = CurBranches = 0;
+ CurGroup.clear();
+ }
+
+ CurSlots += NSlots;
+ CurGroup.push_back(SU);
+
+ if (MCID->isBranch())
+ ++CurBranches;
+ }
+ }
+
+ return ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
+ return ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("Bottom-up scheduling not supported");
+}
+
+void PPCDispatchGroupSBHazardRecognizer::Reset() {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ return ScoreboardHazardRecognizer::Reset();
+}
+
+void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
+ unsigned Directive =
+ DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ // If the group has now filled all of its slots, or if we're using a special
+ // group-terminating nop, the group is complete.
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
+ Directive == PPC::DIR_PWR8 || CurSlots == 6) {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ } else {
+ CurGroup.push_back(nullptr);
+ ++CurSlots;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// PowerPC 970 Hazard Recognizer
+//
+// This models the dispatch group formation of the PPC970 processor. Dispatch
+// groups are bundles of up to five instructions that can contain various mixes
+// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
+// branch instruction per-cycle.
+//
+// There are a number of restrictions to dispatch group formation: some
+// instructions can only be issued in the first slot of a dispatch group, & some
+// instructions fill an entire dispatch group. Additionally, only branches can
+// issue in the 5th (last) slot.
+//
+// Finally, there are a number of "structural" hazards on the PPC970. These
+// conditions cause large performance penalties due to misprediction, recovery,
+// and replay logic that has to happen. These cases include setting a CTR and
+// branching through it in the same dispatch group, and storing to an address,
+// then loading from the same address within a dispatch group. To avoid these
+// conditions, we insert no-op instructions when appropriate.
+//
+// FIXME: This is missing some significant cases:
+// 1. Modeling of microcoded instructions.
+// 2. Handling of serialized operations.
+// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
+//
+
+PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)
+ : DAG(DAG) {
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::EndDispatchGroup() {
+ DEBUG(errs() << "=== Start of dispatch group\n");
+ NumIssued = 0;
+
+ // Structural hazard info.
+ HasCTRSet = false;
+ NumStores = 0;
+}
+
+
+PPCII::PPC970_Unit
+PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,
+ bool &isCracked,
+ bool &isLoad, bool &isStore) {
+ const MCInstrDesc &MCID = DAG.TII->get(Opcode);
+
+ isLoad = MCID.mayLoad();
+ isStore = MCID.mayStore();
+
+ uint64_t TSFlags = MCID.TSFlags;
+
+ isFirst = TSFlags & PPCII::PPC970_First;
+ isSingle = TSFlags & PPCII::PPC970_Single;
+ isCracked = TSFlags & PPCII::PPC970_Cracked;
+ return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
+}
+
+/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
+/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
+bool PPCHazardRecognizer970::
+isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const {
+ for (unsigned i = 0, e = NumStores; i != e; ++i) {
+ // Handle exact and commuted addresses.
+ if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
+ return true;
+
+ // Okay, we don't have an exact match, if this is an indexed offset, see if
+ // we have overlap (which happens during fp->int conversion for example).
+ if (StoreValue[i] == LoadValue) {
+ // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
+ // to see if the load and store actually overlap.
+ if (StoreOffset[i] < LoadOffset) {
+ if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
+ } else {
+ if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// getHazardType - We return hazard for any non-branch instruction that would
+/// terminate the dispatch group. We turn NoopHazard for any
+/// instructions that wouldn't terminate the dispatch group that would cause a
+/// pipeline flush.
+ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
+getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
+
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return NoHazard;
+
+ unsigned Opcode = MI->getOpcode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
+
+ // We can only issue a PPC970_First/PPC970_Single instruction (such as
+ // crand/mtspr/etc) if this is the first cycle of the dispatch group.
+ if (NumIssued != 0 && (isFirst || isSingle))
+ return Hazard;
+
+ // If this instruction is cracked into two ops by the decoder, we know that
+ // it is not a branch and that it cannot issue if 3 other instructions are
+ // already in the dispatch group.
+ if (isCracked && NumIssued > 2)
+ return Hazard;
+
+ switch (InstrType) {
+ default: llvm_unreachable("Unknown instruction type!");
+ case PPCII::PPC970_FXU:
+ case PPCII::PPC970_LSU:
+ case PPCII::PPC970_FPU:
+ case PPCII::PPC970_VALU:
+ case PPCII::PPC970_VPERM:
+ // We can only issue a branch as the last instruction in a group.
+ if (NumIssued == 4) return Hazard;
+ break;
+ case PPCII::PPC970_CRU:
+ // We can only issue a CR instruction in the first two slots.
+ if (NumIssued >= 2) return Hazard;
+ break;
+ case PPCII::PPC970_BRU:
+ break;
+ }
+
+ // Do not allow MTCTR and BCTRL to be in the same dispatch group.
+ if (HasCTRSet && Opcode == PPC::BCTRL)
+ return NoopHazard;
+
+ // If this is a load following a store, make sure it's not to the same or
+ // overlapping address.
+ if (isLoad && NumStores && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ if (isLoadOfStoredAddress(MO->getSize(),
+ MO->getOffset(), MO->getValue()))
+ return NoopHazard;
+ }
+
+ return NoHazard;
+}
+
+void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return;
+
+ unsigned Opcode = MI->getOpcode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return;
+
+ // Update structural hazard information.
+ if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
+
+ // Track the address stored to.
+ if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ StoreSize[NumStores] = MO->getSize();
+ StoreOffset[NumStores] = MO->getOffset();
+ StoreValue[NumStores] = MO->getValue();
+ ++NumStores;
+ }
+
+ if (InstrType == PPCII::PPC970_BRU || isSingle)
+ NumIssued = 4; // Terminate a d-group.
+ ++NumIssued;
+
+ // If this instruction is cracked into two ops by the decoder, remember that
+ // we issued two pieces.
+ if (isCracked)
+ ++NumIssued;
+
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::AdvanceCycle() {
+ assert(NumIssued < 5 && "Illegal dispatch group!");
+ ++NumIssued;
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::Reset() {
+ EndDispatchGroup();
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
new file mode 100644
index 0000000..4b50214
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -0,0 +1,102 @@
+//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
+#define LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
+
+#include "PPCInstrInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+
+/// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based
+/// hazard recognizer for PPC ooo processors with dispatch-group hazards.
+class PPCDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer {
+ const ScheduleDAG *DAG;
+ SmallVector<SUnit *, 7> CurGroup;
+ unsigned CurSlots, CurBranches;
+
+ bool isLoadAfterStore(SUnit *SU);
+ bool isBCTRAfterSet(SUnit *SU);
+ bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots);
+public:
+ PPCDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData,
+ const ScheduleDAG *DAG_) :
+ ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_),
+ CurSlots(0), CurBranches(0) {}
+
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ bool ShouldPreferAnother(SUnit* SU) override;
+ unsigned PreEmitNoops(SUnit *SU) override;
+ void EmitInstruction(SUnit *SU) override;
+ void AdvanceCycle() override;
+ void RecedeCycle() override;
+ void Reset() override;
+ void EmitNoop() override;
+};
+
+/// PPCHazardRecognizer970 - This class defines a finite state automata that
+/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
+/// promotes good dispatch group formation and implements noop insertion to
+/// avoid structural hazards that cause significant performance penalties (e.g.
+/// setting the CTR register then branching through it within a dispatch group),
+/// or storing then loading from the same address within a dispatch group.
+class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
+ const ScheduleDAG &DAG;
+
+ unsigned NumIssued; // Number of insts issued, including advanced cycles.
+
+ // Various things that can cause a structural hazard.
+
+ // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
+ bool HasCTRSet;
+
+ // StoredPtr - Keep track of the address of any store. If we see a load from
+ // the same address (or one that aliases it), disallow the store. We can have
+ // up to four stores in one dispatch group, hence we track up to 4.
+ //
+ // This is null if we haven't seen a store yet. We keep track of both
+ // operands of the store here, since we support [r+r] and [r+i] addressing.
+ const Value *StoreValue[4];
+ int64_t StoreOffset[4];
+ uint64_t StoreSize[4];
+ unsigned NumStores;
+
+public:
+ PPCHazardRecognizer970(const ScheduleDAG &DAG);
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ void EmitInstruction(SUnit *SU) override;
+ void AdvanceCycle() override;
+ void Reset() override;
+
+private:
+ /// EndDispatchGroup - Called when we are finishing a new dispatch group.
+ ///
+ void EndDispatchGroup();
+
+ /// GetInstrType - Classify the specified powerpc opcode according to its
+ /// pipeline.
+ PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,bool &isCracked,
+ bool &isLoad, bool &isStore);
+
+ bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
new file mode 100644
index 0000000..1eaa811
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -0,0 +1,4379 @@
+//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for PowerPC,
+// converting from a legalized dag to a PPC dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-codegen"
+
+// FIXME: Remove this once the bug has been fixed!
+cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
+cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
+
+static cl::opt<bool>
+ UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
+ cl::desc("use aggressive ppc isel for bit permutations"),
+ cl::Hidden);
+static cl::opt<bool> BPermRewriterNoMasking(
+ "ppc-bit-perm-rewriter-stress-rotates",
+ cl::desc("stress rotate selection in aggressive ppc isel for "
+ "bit permutations"),
+ cl::Hidden);
+
+static cl::opt<bool> EnableBranchHint(
+ "ppc-use-branch-hint", cl::init(true),
+ cl::desc("Enable static hinting of branches on ppc"),
+ cl::Hidden);
+
+namespace llvm {
+ void initializePPCDAGToDAGISelPass(PassRegistry&);
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// PPCDAGToDAGISel - PPC specific code to select PPC machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class PPCDAGToDAGISel : public SelectionDAGISel {
+ const PPCTargetMachine &TM;
+ const PPCSubtarget *PPCSubTarget;
+ const PPCTargetLowering *PPCLowering;
+ unsigned GlobalBaseReg;
+ public:
+ explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
+ : SelectionDAGISel(tm), TM(tm) {
+ initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
+ PPCLowering = PPCSubTarget->getTargetLowering();
+ SelectionDAGISel::runOnMachineFunction(MF);
+
+ if (!PPCSubTarget->isSVR4ABI())
+ InsertVRSaveCode(MF);
+
+ return true;
+ }
+
+ void PreprocessISelDAG() override;
+ void PostprocessISelDAG() override;
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm, SDLoc dl) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) {
+ return CurDAG->getTargetConstant(
+ Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
+ }
+
+ /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
+ /// rotate and mask opcode and mask operation.
+ static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
+ unsigned &SH, unsigned &MB, unsigned &ME);
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ SDNode *getGlobalBaseReg();
+
+ SDNode *getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDNode *N) override;
+
+ SDNode *SelectBitfieldInsert(SDNode *N);
+ SDNode *SelectBitPermutation(SDNode *N);
+
+ /// SelectCC - Select a comparison of the specified values with the
+ /// specified condition code, returning the CR# of the expression.
+ SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDLoc dl);
+
+ /// SelectAddrImm - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement [r+imm].
+ bool SelectAddrImm(SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
+ }
+
+ /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
+ /// immediate field. Note that the operand at this point is already the
+ /// result of a prior SelectAddressRegImm call.
+ bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
+ if (N.getOpcode() == ISD::TargetConstant ||
+ N.getOpcode() == ISD::TargetGlobalAddress) {
+ Out = N;
+ return true;
+ }
+
+ return false;
+ }
+
+ /// SelectAddrIdx - Given the specified addressed, check to see if it can be
+ /// represented as an indexed [r+r] operation. Returns false if it can
+ /// be represented by [r+imm], which are preferred.
+ bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrImmX4 - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement that is a multiple of 4.
+ /// Suitable for use by STD and friends.
+ bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
+ }
+
+ // Select an address into a single register.
+ bool SelectAddr(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions. It is always correct to compute the value into
+ /// a register. The case of adding a (possibly relocatable) constant to a
+ /// register can be improved, but it is wrong to substitute Reg+Reg for
+ /// Reg in an asm, because the load or store opcode would have to change.
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) override {
+
+ switch(ConstraintID) {
+ default:
+ errs() << "ConstraintID: " << ConstraintID << "\n";
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_es:
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_o:
+ case InlineAsm::Constraint_Q:
+ case InlineAsm::Constraint_Z:
+ case InlineAsm::Constraint_Zy:
+ // We need to make sure that this one operand does not end up in r0
+ // (because we might end up lowering this as 0(%op)).
+ const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
+ const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
+ SDLoc dl(Op);
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
+ SDValue NewOp =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, Op.getValueType(),
+ Op, RC), 0);
+
+ OutOps.push_back(NewOp);
+ return false;
+ }
+ return true;
+ }
+
+ void InsertVRSaveCode(MachineFunction &MF);
+
+ const char *getPassName() const override {
+ return "PowerPC DAG->DAG Pattern Instruction Selection";
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "PPCGenDAGISel.inc"
+
+private:
+ SDNode *SelectSETCC(SDNode *N);
+
+ void PeepholePPC64();
+ void PeepholePPC64ZExt();
+ void PeepholeCROps();
+
+ SDValue combineToCMPB(SDNode *N);
+ void foldBoolExts(SDValue &Res, SDNode *&N);
+
+ bool AllUsersSelectZero(SDNode *N);
+ void SwapAllSelectUsers(SDNode *N);
+
+ SDNode *transferMemOperands(SDNode *N, SDNode *Result);
+ };
+}
+
+/// InsertVRSaveCode - Once the entire function has been instruction selected,
+/// all virtual registers are created and all machine instructions are built,
+/// check to see if we need to save/restore VRSAVE. If so, do it.
+void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
+ // Check to see if this function uses vector registers, which means we have to
+ // save and restore the VRSAVE register and update it with the regs we use.
+ //
+ // In this case, there will be virtual registers of vector type created
+ // by the scheduler. Detect them now.
+ bool HasVectorVReg = false;
+ for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
+ HasVectorVReg = true;
+ break;
+ }
+ }
+ if (!HasVectorVReg) return; // nothing to do.
+
+ // If we have a vector register, we want to emit code into the entry and exit
+ // blocks to save and restore the VRSAVE register. We do this here (instead
+ // of marking all vector instructions as clobbering VRSAVE) for two reasons:
+ //
+ // 1. This (trivially) reduces the load on the register allocator, by not
+ // having to represent the live range of the VRSAVE register.
+ // 2. This (more significantly) allows us to create a temporary virtual
+ // register to hold the saved VRSAVE value, allowing this temporary to be
+ // register allocated, instead of forcing it to be spilled to the stack.
+
+ // Create two vregs - one to hold the VRSAVE register that is live-in to the
+ // function and one for the value after having bits or'd into it.
+ unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+
+ const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
+ MachineBasicBlock &EntryBB = *Fn.begin();
+ DebugLoc dl;
+ // Emit the following code into the entry block:
+ // InVRSAVE = MFVRSAVE
+ // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
+ // MTVRSAVE UpdatedVRSAVE
+ MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
+ UpdatedVRSAVE).addReg(InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
+
+ // Find all return blocks, outputting a restore in each epilog.
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ if (BB->isReturnBlock()) {
+ IP = BB->end(); --IP;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = IP;
+ while (I2 != BB->begin() && (--I2)->isTerminator())
+ IP = I2;
+
+ // Emit: MTVRSAVE InVRSave
+ BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
+ }
+ }
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// base address to use for accessing globals into a register.
+///
+SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
+ if (!GlobalBaseReg) {
+ const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ const Module *M = MF->getFunction()->getParent();
+ DebugLoc dl;
+
+ if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
+ if (PPCSubTarget->isTargetELF()) {
+ GlobalBaseReg = PPC::R30;
+ if (M->getPICLevel() == PICLevel::Small) {
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
+ } else {
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ BuildMI(FirstMBB, MBBI, dl,
+ TII.get(PPC::UpdateGBR), GlobalBaseReg)
+ .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
+ MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
+ }
+ } else {
+ GlobalBaseReg =
+ RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ }
+ } else {
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
+ }
+ }
+ return CurDAG->getRegister(GlobalBaseReg,
+ PPCLowering->getPointerTy(CurDAG->getDataLayout()))
+ .getNode();
+}
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
+/// operand. If so Imm will receive the 64-bit value.
+static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
+ const SDValue &DestMBB) {
+ assert(isa<BasicBlockSDNode>(DestMBB));
+
+ if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
+
+ const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
+ const TerminatorInst *BBTerm = BB->getTerminator();
+
+ if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
+
+ const BasicBlock *TBB = BBTerm->getSuccessor(0);
+ const BasicBlock *FBB = BBTerm->getSuccessor(1);
+
+ auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
+ auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
+
+ // We only want to handle cases which are easy to predict at static time, e.g.
+ // C++ throw statement, that is very likely not taken, or calling never
+ // returned function, e.g. stdlib exit(). So we set Threshold to filter
+ // unwanted cases.
+ //
+ // Below is LLVM branch weight table, we only want to handle case 1, 2
+ //
+ // Case Taken:Nontaken Example
+ // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
+ // 2. Invoke-terminating 1:1048575
+ // 3. Coldblock 4:64 __builtin_expect
+ // 4. Loop Branch 124:4 For loop
+ // 5. PH/ZH/FPH 20:12
+ const uint32_t Threshold = 10000;
+
+ if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
+ return PPC::BR_NO_HINT;
+
+ DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
+ << BB->getName() << "'\n"
+ << " -> " << TBB->getName() << ": " << TProb << "\n"
+ << " -> " << FBB->getName() << ": " << FProb << "\n");
+
+ const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
+
+ // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
+ // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
+ if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
+ std::swap(TProb, FProb);
+
+ return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
+}
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc
+ && isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
+ SDLoc dl(SN);
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+ unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+ if (SN->hasOneUse())
+ return CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
+ getSmallIPtrImm(Offset, dl));
+ return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
+ getSmallIPtrImm(Offset, dl));
+}
+
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+ bool isShiftMask, unsigned &SH,
+ unsigned &MB, unsigned &ME) {
+ // Don't even go down this path for i64, since different logic will be
+ // necessary for rldicl/rldicr/rldimi.
+ if (N->getValueType(0) != MVT::i32)
+ return false;
+
+ unsigned Shift = 32;
+ unsigned Indeterminant = ~0; // bit mask marking indeterminant results
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() != 2 ||
+ !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
+ return false;
+
+ if (Opcode == ISD::SHL) {
+ // apply shift left to mask if it comes first
+ if (isShiftMask) Mask = Mask << Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu << Shift);
+ } else if (Opcode == ISD::SRL) {
+ // apply shift right to mask if it comes first
+ if (isShiftMask) Mask = Mask >> Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu >> Shift);
+ // adjust for the left rotate
+ Shift = 32 - Shift;
+ } else if (Opcode == ISD::ROTL) {
+ Indeterminant = 0;
+ } else {
+ return false;
+ }
+
+ // if the mask doesn't intersect any Indeterminant bits
+ if (Mask && !(Mask & Indeterminant)) {
+ SH = Shift & 31;
+ // make sure the mask is still a mask (wrap arounds may not be)
+ return isRunOfOnes(Mask, MB, ME);
+ }
+ return false;
+}
+
+/// SelectBitfieldInsert - turn an or of two masked values into
+/// the rotate left word immediate then mask insert (rlwimi) instruction.
+SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ APInt LKZ, LKO, RKZ, RKO;
+ CurDAG->computeKnownBits(Op0, LKZ, LKO);
+ CurDAG->computeKnownBits(Op1, RKZ, RKO);
+
+ unsigned TargetMask = LKZ.getZExtValue();
+ unsigned InsertMask = RKZ.getZExtValue();
+
+ if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
+ unsigned Op0Opc = Op0.getOpcode();
+ unsigned Op1Opc = Op1.getOpcode();
+ unsigned Value, SH = 0;
+ TargetMask = ~TargetMask;
+ InsertMask = ~InsertMask;
+
+ // If the LHS has a foldable shift and the RHS does not, then swap it to the
+ // RHS so that we can fold the shift into the insert.
+ if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
+ if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
+ Op0.getOperand(0).getOpcode() == ISD::SRL) {
+ if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+ } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
+ if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+
+ unsigned MB, ME;
+ if (isRunOfOnes(InsertMask, MB, ME)) {
+ SDValue Tmp1, Tmp2;
+
+ if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0);
+ SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
+ }
+ if (Op1Opc == ISD::AND) {
+ // The AND mask might not be a constant, and we need to make sure that
+ // if we're going to fold the masking with the insert, all bits not
+ // know to be zero in the mask are known to be one.
+ APInt MKZ, MKO;
+ CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO);
+ bool CanFoldMask = InsertMask == MKO.getZExtValue();
+
+ unsigned SHOpc = Op1.getOperand(0).getOpcode();
+ if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
+ isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
+ // Note that Value must be in range here (less than 32) because
+ // otherwise there would not be any bits set in InsertMask.
+ Op1 = Op1.getOperand(0).getOperand(0);
+ SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
+ }
+ }
+
+ SH &= 31;
+ SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
+ }
+ }
+ return nullptr;
+}
+
+// Predict the number of instructions that would be generated by calling
+// SelectInt64(N).
+static unsigned SelectInt64CountDirect(int64_t Imm) {
+ // Assume no remaining bits.
+ unsigned Remainder = 0;
+ // Assume no shift required.
+ unsigned Shift = 0;
+
+ // If it can't be represented as a 32 bit value.
+ if (!isInt<32>(Imm)) {
+ Shift = countTrailingZeros<uint64_t>(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ // If the shifted value fits 32 bits.
+ if (isInt<32>(ImmSh)) {
+ // Go with the shifted value.
+ Imm = ImmSh;
+ } else {
+ // Still stuck with a 64 bit value.
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Intermediate operand.
+ unsigned Result = 0;
+
+ // Handle first 32 bits.
+ unsigned Lo = Imm & 0xFFFF;
+
+ // Simple value.
+ if (isInt<16>(Imm)) {
+ // Just the Lo bits.
+ ++Result;
+ } else if (Lo) {
+ // Handle the Hi bits and Lo bits.
+ Result += 2;
+ } else {
+ // Just the Hi bits.
+ ++Result;
+ }
+
+ // If no shift, we're done.
+ if (!Shift) return Result;
+
+ // Shift for next step if the upper 32-bits were not zero.
+ if (Imm)
+ ++Result;
+
+ // Add in the last bits as required.
+ if ((Remainder >> 16) & 0xFFFF)
+ ++Result;
+ if (Remainder & 0xFFFF)
+ ++Result;
+
+ return Result;
+}
+
+static uint64_t Rot64(uint64_t Imm, unsigned R) {
+ return (Imm << R) | (Imm >> (64 - R));
+}
+
+static unsigned SelectInt64Count(int64_t Imm) {
+ unsigned Count = SelectInt64CountDirect(Imm);
+ if (Count == 1)
+ return Count;
+
+ for (unsigned r = 1; r < 63; ++r) {
+ uint64_t RImm = Rot64(Imm, r);
+ unsigned RCount = SelectInt64CountDirect(RImm) + 1;
+ Count = std::min(Count, RCount);
+
+ // See comments in SelectInt64 for an explanation of the logic below.
+ unsigned LS = findLastSet(RImm);
+ if (LS != r-1)
+ continue;
+
+ uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
+ uint64_t RImmWithOnes = RImm | OnesMask;
+
+ RCount = SelectInt64CountDirect(RImmWithOnes) + 1;
+ Count = std::min(Count, RCount);
+ }
+
+ return Count;
+}
+
+// Select a 64-bit constant. For cost-modeling purposes, SelectInt64Count
+// (above) needs to be kept in sync with this function.
+static SDNode *SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
+ // Assume no remaining bits.
+ unsigned Remainder = 0;
+ // Assume no shift required.
+ unsigned Shift = 0;
+
+ // If it can't be represented as a 32 bit value.
+ if (!isInt<32>(Imm)) {
+ Shift = countTrailingZeros<uint64_t>(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ // If the shifted value fits 32 bits.
+ if (isInt<32>(ImmSh)) {
+ // Go with the shifted value.
+ Imm = ImmSh;
+ } else {
+ // Still stuck with a 64 bit value.
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Intermediate operand.
+ SDNode *Result;
+
+ // Handle first 32 bits.
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+ };
+
+ // Simple value.
+ if (isInt<16>(Imm)) {
+ // Just the Lo bits.
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
+ } else if (Lo) {
+ // Handle the Hi bits.
+ unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
+ // And Lo bits.
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ } else {
+ // Just the Hi bits.
+ Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
+ }
+
+ // If no shift, we're done.
+ if (!Shift) return Result;
+
+ // Shift for next step if the upper 32-bits were not zero.
+ if (Imm) {
+ Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
+ SDValue(Result, 0),
+ getI32Imm(Shift),
+ getI32Imm(63 - Shift));
+ }
+
+ // Add in the last bits as required.
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Hi));
+ }
+ if ((Lo = Remainder & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ }
+
+ return Result;
+}
+
+static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
+ unsigned Count = SelectInt64CountDirect(Imm);
+ if (Count == 1)
+ return SelectInt64Direct(CurDAG, dl, Imm);
+
+ unsigned RMin = 0;
+
+ int64_t MatImm;
+ unsigned MaskEnd;
+
+ for (unsigned r = 1; r < 63; ++r) {
+ uint64_t RImm = Rot64(Imm, r);
+ unsigned RCount = SelectInt64CountDirect(RImm) + 1;
+ if (RCount < Count) {
+ Count = RCount;
+ RMin = r;
+ MatImm = RImm;
+ MaskEnd = 63;
+ }
+
+ // If the immediate to generate has many trailing zeros, it might be
+ // worthwhile to generate a rotated value with too many leading ones
+ // (because that's free with li/lis's sign-extension semantics), and then
+ // mask them off after rotation.
+
+ unsigned LS = findLastSet(RImm);
+ // We're adding (63-LS) higher-order ones, and we expect to mask them off
+ // after performing the inverse rotation by (64-r). So we need that:
+ // 63-LS == 64-r => LS == r-1
+ if (LS != r-1)
+ continue;
+
+ uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
+ uint64_t RImmWithOnes = RImm | OnesMask;
+
+ RCount = SelectInt64CountDirect(RImmWithOnes) + 1;
+ if (RCount < Count) {
+ Count = RCount;
+ RMin = r;
+ MatImm = RImmWithOnes;
+ MaskEnd = LS;
+ }
+ }
+
+ if (!RMin)
+ return SelectInt64Direct(CurDAG, dl, Imm);
+
+ auto getI32Imm = [CurDAG, dl](unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+ };
+
+ SDValue Val = SDValue(SelectInt64Direct(CurDAG, dl, MatImm), 0);
+ return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
+ getI32Imm(64 - RMin), getI32Imm(MaskEnd));
+}
+
+// Select a 64-bit constant.
+static SDNode *SelectInt64(SelectionDAG *CurDAG, SDNode *N) {
+ SDLoc dl(N);
+
+ // Get 64 bit value.
+ int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return SelectInt64(CurDAG, dl, Imm);
+}
+
+namespace {
+class BitPermutationSelector {
+ struct ValueBit {
+ SDValue V;
+
+ // The bit number in the value, using a convention where bit 0 is the
+ // lowest-order bit.
+ unsigned Idx;
+
+ enum Kind {
+ ConstZero,
+ Variable
+ } K;
+
+ ValueBit(SDValue V, unsigned I, Kind K = Variable)
+ : V(V), Idx(I), K(K) {}
+ ValueBit(Kind K = Variable)
+ : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
+
+ bool isZero() const {
+ return K == ConstZero;
+ }
+
+ bool hasValue() const {
+ return K == Variable;
+ }
+
+ SDValue getValue() const {
+ assert(hasValue() && "Cannot get the value of a constant bit");
+ return V;
+ }
+
+ unsigned getValueBitIndex() const {
+ assert(hasValue() && "Cannot get the value bit index of a constant bit");
+ return Idx;
+ }
+ };
+
+ // A bit group has the same underlying value and the same rotate factor.
+ struct BitGroup {
+ SDValue V;
+ unsigned RLAmt;
+ unsigned StartIdx, EndIdx;
+
+ // This rotation amount assumes that the lower 32 bits of the quantity are
+ // replicated in the high 32 bits by the rotation operator (which is done
+ // by rlwinm and friends in 64-bit mode).
+ bool Repl32;
+ // Did converting to Repl32 == true change the rotation factor? If it did,
+ // it decreased it by 32.
+ bool Repl32CR;
+ // Was this group coalesced after setting Repl32 to true?
+ bool Repl32Coalesced;
+
+ BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
+ : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
+ Repl32Coalesced(false) {
+ DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R <<
+ " [" << S << ", " << E << "]\n");
+ }
+ };
+
+ // Information on each (Value, RLAmt) pair (like the number of groups
+ // associated with each) used to choose the lowering method.
+ struct ValueRotInfo {
+ SDValue V;
+ unsigned RLAmt;
+ unsigned NumGroups;
+ unsigned FirstGroupStartIdx;
+ bool Repl32;
+
+ ValueRotInfo()
+ : RLAmt(UINT32_MAX), NumGroups(0), FirstGroupStartIdx(UINT32_MAX),
+ Repl32(false) {}
+
+ // For sorting (in reverse order) by NumGroups, and then by
+ // FirstGroupStartIdx.
+ bool operator < (const ValueRotInfo &Other) const {
+ // We need to sort so that the non-Repl32 come first because, when we're
+ // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
+ // masking operation.
+ if (Repl32 < Other.Repl32)
+ return true;
+ else if (Repl32 > Other.Repl32)
+ return false;
+ else if (NumGroups > Other.NumGroups)
+ return true;
+ else if (NumGroups < Other.NumGroups)
+ return false;
+ else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
+ return true;
+ return false;
+ }
+ };
+
+ // Return true if something interesting was deduced, return false if we're
+ // providing only a generic representation of V (or something else likewise
+ // uninteresting for instruction selection).
+ bool getValueBits(SDValue V, SmallVector<ValueBit, 64> &Bits) {
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::ROTL:
+ if (isa<ConstantSDNode>(V.getOperand(1))) {
+ unsigned RotAmt = V.getConstantOperandVal(1);
+
+ SmallVector<ValueBit, 64> LHSBits(Bits.size());
+ getValueBits(V.getOperand(0), LHSBits);
+
+ for (unsigned i = 0; i < Bits.size(); ++i)
+ Bits[i] = LHSBits[i < RotAmt ? i + (Bits.size() - RotAmt) : i - RotAmt];
+
+ return true;
+ }
+ break;
+ case ISD::SHL:
+ if (isa<ConstantSDNode>(V.getOperand(1))) {
+ unsigned ShiftAmt = V.getConstantOperandVal(1);
+
+ SmallVector<ValueBit, 64> LHSBits(Bits.size());
+ getValueBits(V.getOperand(0), LHSBits);
+
+ for (unsigned i = ShiftAmt; i < Bits.size(); ++i)
+ Bits[i] = LHSBits[i - ShiftAmt];
+
+ for (unsigned i = 0; i < ShiftAmt; ++i)
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+
+ return true;
+ }
+ break;
+ case ISD::SRL:
+ if (isa<ConstantSDNode>(V.getOperand(1))) {
+ unsigned ShiftAmt = V.getConstantOperandVal(1);
+
+ SmallVector<ValueBit, 64> LHSBits(Bits.size());
+ getValueBits(V.getOperand(0), LHSBits);
+
+ for (unsigned i = 0; i < Bits.size() - ShiftAmt; ++i)
+ Bits[i] = LHSBits[i + ShiftAmt];
+
+ for (unsigned i = Bits.size() - ShiftAmt; i < Bits.size(); ++i)
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+
+ return true;
+ }
+ break;
+ case ISD::AND:
+ if (isa<ConstantSDNode>(V.getOperand(1))) {
+ uint64_t Mask = V.getConstantOperandVal(1);
+
+ SmallVector<ValueBit, 64> LHSBits(Bits.size());
+ bool LHSTrivial = getValueBits(V.getOperand(0), LHSBits);
+
+ for (unsigned i = 0; i < Bits.size(); ++i)
+ if (((Mask >> i) & 1) == 1)
+ Bits[i] = LHSBits[i];
+ else
+ Bits[i] = ValueBit(ValueBit::ConstZero);
+
+ // Mark this as interesting, only if the LHS was also interesting. This
+ // prevents the overall procedure from matching a single immediate 'and'
+ // (which is non-optimal because such an and might be folded with other
+ // things if we don't select it here).
+ return LHSTrivial;
+ }
+ break;
+ case ISD::OR: {
+ SmallVector<ValueBit, 64> LHSBits(Bits.size()), RHSBits(Bits.size());
+ getValueBits(V.getOperand(0), LHSBits);
+ getValueBits(V.getOperand(1), RHSBits);
+
+ bool AllDisjoint = true;
+ for (unsigned i = 0; i < Bits.size(); ++i)
+ if (LHSBits[i].isZero())
+ Bits[i] = RHSBits[i];
+ else if (RHSBits[i].isZero())
+ Bits[i] = LHSBits[i];
+ else {
+ AllDisjoint = false;
+ break;
+ }
+
+ if (!AllDisjoint)
+ break;
+
+ return true;
+ }
+ }
+
+ for (unsigned i = 0; i < Bits.size(); ++i)
+ Bits[i] = ValueBit(V, i);
+
+ return false;
+ }
+
+ // For each value (except the constant ones), compute the left-rotate amount
+ // to get it from its original to final position.
+ void computeRotationAmounts() {
+ HasZeros = false;
+ RLAmt.resize(Bits.size());
+ for (unsigned i = 0; i < Bits.size(); ++i)
+ if (Bits[i].hasValue()) {
+ unsigned VBI = Bits[i].getValueBitIndex();
+ if (i >= VBI)
+ RLAmt[i] = i - VBI;
+ else
+ RLAmt[i] = Bits.size() - (VBI - i);
+ } else if (Bits[i].isZero()) {
+ HasZeros = true;
+ RLAmt[i] = UINT32_MAX;
+ } else {
+ llvm_unreachable("Unknown value bit type");
+ }
+ }
+
+ // Collect groups of consecutive bits with the same underlying value and
+ // rotation factor. If we're doing late masking, we ignore zeros, otherwise
+ // they break up groups.
+ void collectBitGroups(bool LateMask) {
+ BitGroups.clear();
+
+ unsigned LastRLAmt = RLAmt[0];
+ SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
+ unsigned LastGroupStartIdx = 0;
+ for (unsigned i = 1; i < Bits.size(); ++i) {
+ unsigned ThisRLAmt = RLAmt[i];
+ SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
+ if (LateMask && !ThisValue) {
+ ThisValue = LastValue;
+ ThisRLAmt = LastRLAmt;
+ // If we're doing late masking, then the first bit group always starts
+ // at zero (even if the first bits were zero).
+ if (BitGroups.empty())
+ LastGroupStartIdx = 0;
+ }
+
+ // If this bit has the same underlying value and the same rotate factor as
+ // the last one, then they're part of the same group.
+ if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
+ continue;
+
+ if (LastValue.getNode())
+ BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
+ i-1));
+ LastRLAmt = ThisRLAmt;
+ LastValue = ThisValue;
+ LastGroupStartIdx = i;
+ }
+ if (LastValue.getNode())
+ BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
+ Bits.size()-1));
+
+ if (BitGroups.empty())
+ return;
+
+ // We might be able to combine the first and last groups.
+ if (BitGroups.size() > 1) {
+ // If the first and last groups are the same, then remove the first group
+ // in favor of the last group, making the ending index of the last group
+ // equal to the ending index of the to-be-removed first group.
+ if (BitGroups[0].StartIdx == 0 &&
+ BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
+ BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
+ BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
+ DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
+ BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
+ BitGroups.erase(BitGroups.begin());
+ }
+ }
+ }
+
+ // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
+ // associated with each. If there is a degeneracy, pick the one that occurs
+ // first (in the final value).
+ void collectValueRotInfo() {
+ ValueRots.clear();
+
+ for (auto &BG : BitGroups) {
+ unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
+ ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
+ VRI.V = BG.V;
+ VRI.RLAmt = BG.RLAmt;
+ VRI.Repl32 = BG.Repl32;
+ VRI.NumGroups += 1;
+ VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
+ }
+
+ // Now that we've collected the various ValueRotInfo instances, we need to
+ // sort them.
+ ValueRotsVec.clear();
+ for (auto &I : ValueRots) {
+ ValueRotsVec.push_back(I.second);
+ }
+ std::sort(ValueRotsVec.begin(), ValueRotsVec.end());
+ }
+
+ // In 64-bit mode, rlwinm and friends have a rotation operator that
+ // replicates the low-order 32 bits into the high-order 32-bits. The mask
+ // indices of these instructions can only be in the lower 32 bits, so they
+ // can only represent some 64-bit bit groups. However, when they can be used,
+ // the 32-bit replication can be used to represent, as a single bit group,
+ // otherwise separate bit groups. We'll convert to replicated-32-bit bit
+ // groups when possible. Returns true if any of the bit groups were
+ // converted.
+ void assignRepl32BitGroups() {
+ // If we have bits like this:
+ //
+ // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
+ // Groups: | RLAmt = 8 | RLAmt = 40 |
+ //
+ // But, making use of a 32-bit operation that replicates the low-order 32
+ // bits into the high-order 32 bits, this can be one bit group with a RLAmt
+ // of 8.
+
+ auto IsAllLow32 = [this](BitGroup & BG) {
+ if (BG.StartIdx <= BG.EndIdx) {
+ for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
+ if (!Bits[i].hasValue())
+ continue;
+ if (Bits[i].getValueBitIndex() >= 32)
+ return false;
+ }
+ } else {
+ for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
+ if (!Bits[i].hasValue())
+ continue;
+ if (Bits[i].getValueBitIndex() >= 32)
+ return false;
+ }
+ for (unsigned i = 0; i <= BG.EndIdx; ++i) {
+ if (!Bits[i].hasValue())
+ continue;
+ if (Bits[i].getValueBitIndex() >= 32)
+ return false;
+ }
+ }
+
+ return true;
+ };
+
+ for (auto &BG : BitGroups) {
+ if (BG.StartIdx < 32 && BG.EndIdx < 32) {
+ if (IsAllLow32(BG)) {
+ if (BG.RLAmt >= 32) {
+ BG.RLAmt -= 32;
+ BG.Repl32CR = true;
+ }
+
+ BG.Repl32 = true;
+
+ DEBUG(dbgs() << "\t32-bit replicated bit group for " <<
+ BG.V.getNode() << " RLAmt = " << BG.RLAmt <<
+ " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n");
+ }
+ }
+ }
+
+ // Now walk through the bit groups, consolidating where possible.
+ for (auto I = BitGroups.begin(); I != BitGroups.end();) {
+ // We might want to remove this bit group by merging it with the previous
+ // group (which might be the ending group).
+ auto IP = (I == BitGroups.begin()) ?
+ std::prev(BitGroups.end()) : std::prev(I);
+ if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
+ I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
+
+ DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " <<
+ I->V.getNode() << " RLAmt = " << I->RLAmt <<
+ " [" << I->StartIdx << ", " << I->EndIdx <<
+ "] with group with range [" <<
+ IP->StartIdx << ", " << IP->EndIdx << "]\n");
+
+ IP->EndIdx = I->EndIdx;
+ IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
+ IP->Repl32Coalesced = true;
+ I = BitGroups.erase(I);
+ continue;
+ } else {
+ // There is a special case worth handling: If there is a single group
+ // covering the entire upper 32 bits, and it can be merged with both
+ // the next and previous groups (which might be the same group), then
+ // do so. If it is the same group (so there will be only one group in
+ // total), then we need to reverse the order of the range so that it
+ // covers the entire 64 bits.
+ if (I->StartIdx == 32 && I->EndIdx == 63) {
+ assert(std::next(I) == BitGroups.end() &&
+ "bit group ends at index 63 but there is another?");
+ auto IN = BitGroups.begin();
+
+ if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
+ (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
+ IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
+ IsAllLow32(*I)) {
+
+ DEBUG(dbgs() << "\tcombining bit group for " <<
+ I->V.getNode() << " RLAmt = " << I->RLAmt <<
+ " [" << I->StartIdx << ", " << I->EndIdx <<
+ "] with 32-bit replicated groups with ranges [" <<
+ IP->StartIdx << ", " << IP->EndIdx << "] and [" <<
+ IN->StartIdx << ", " << IN->EndIdx << "]\n");
+
+ if (IP == IN) {
+ // There is only one other group; change it to cover the whole
+ // range (backward, so that it can still be Repl32 but cover the
+ // whole 64-bit range).
+ IP->StartIdx = 31;
+ IP->EndIdx = 30;
+ IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
+ IP->Repl32Coalesced = true;
+ I = BitGroups.erase(I);
+ } else {
+ // There are two separate groups, one before this group and one
+ // after us (at the beginning). We're going to remove this group,
+ // but also the group at the very beginning.
+ IP->EndIdx = IN->EndIdx;
+ IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
+ IP->Repl32Coalesced = true;
+ I = BitGroups.erase(I);
+ BitGroups.erase(BitGroups.begin());
+ }
+
+ // This must be the last group in the vector (and we might have
+ // just invalidated the iterator above), so break here.
+ break;
+ }
+ }
+ }
+
+ ++I;
+ }
+ }
+
+ SDValue getI32Imm(unsigned Imm, SDLoc dl) {
+ return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+ }
+
+ uint64_t getZerosMask() {
+ uint64_t Mask = 0;
+ for (unsigned i = 0; i < Bits.size(); ++i) {
+ if (Bits[i].hasValue())
+ continue;
+ Mask |= (UINT64_C(1) << i);
+ }
+
+ return ~Mask;
+ }
+
+ // Depending on the number of groups for a particular value, it might be
+ // better to rotate, mask explicitly (using andi/andis), and then or the
+ // result. Select this part of the result first.
+ void SelectAndParts32(SDLoc dl, SDValue &Res, unsigned *InstCnt) {
+ if (BPermRewriterNoMasking)
+ return;
+
+ for (ValueRotInfo &VRI : ValueRotsVec) {
+ unsigned Mask = 0;
+ for (unsigned i = 0; i < Bits.size(); ++i) {
+ if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
+ continue;
+ if (RLAmt[i] != VRI.RLAmt)
+ continue;
+ Mask |= (1u << i);
+ }
+
+ // Compute the masks for andi/andis that would be necessary.
+ unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
+ assert((ANDIMask != 0 || ANDISMask != 0) &&
+ "No set bits in mask for value bit groups");
+ bool NeedsRotate = VRI.RLAmt != 0;
+
+ // We're trying to minimize the number of instructions. If we have one
+ // group, using one of andi/andis can break even. If we have three
+ // groups, we can use both andi and andis and break even (to use both
+ // andi and andis we also need to or the results together). We need four
+ // groups if we also need to rotate. To use andi/andis we need to do more
+ // than break even because rotate-and-mask instructions tend to be easier
+ // to schedule.
+
+ // FIXME: We've biased here against using andi/andis, which is right for
+ // POWER cores, but not optimal everywhere. For example, on the A2,
+ // andi/andis have single-cycle latency whereas the rotate-and-mask
+ // instructions take two cycles, and it would be better to bias toward
+ // andi/andis in break-even cases.
+
+ unsigned NumAndInsts = (unsigned) NeedsRotate +
+ (unsigned) (ANDIMask != 0) +
+ (unsigned) (ANDISMask != 0) +
+ (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
+ (unsigned) (bool) Res;
+
+ DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
+ " RL: " << VRI.RLAmt << ":" <<
+ "\n\t\t\tisel using masking: " << NumAndInsts <<
+ " using rotates: " << VRI.NumGroups << "\n");
+
+ if (NumAndInsts >= VRI.NumGroups)
+ continue;
+
+ DEBUG(dbgs() << "\t\t\t\tusing masking\n");
+
+ if (InstCnt) *InstCnt += NumAndInsts;
+
+ SDValue VRot;
+ if (VRI.RLAmt) {
+ SDValue Ops[] =
+ { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
+ getI32Imm(31, dl) };
+ VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
+ Ops), 0);
+ } else {
+ VRot = VRI.V;
+ }
+
+ SDValue ANDIVal, ANDISVal;
+ if (ANDIMask != 0)
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
+ VRot, getI32Imm(ANDIMask, dl)), 0);
+ if (ANDISMask != 0)
+ ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
+ VRot, getI32Imm(ANDISMask, dl)), 0);
+
+ SDValue TotalVal;
+ if (!ANDIVal)
+ TotalVal = ANDISVal;
+ else if (!ANDISVal)
+ TotalVal = ANDIVal;
+ else
+ TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
+ ANDIVal, ANDISVal), 0);
+
+ if (!Res)
+ Res = TotalVal;
+ else
+ Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
+ Res, TotalVal), 0);
+
+ // Now, remove all groups with this underlying value and rotation
+ // factor.
+ eraseMatchingBitGroups([VRI](const BitGroup &BG) {
+ return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
+ });
+ }
+ }
+
+ // Instruction selection for the 32-bit case.
+ SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
+ SDLoc dl(N);
+ SDValue Res;
+
+ if (InstCnt) *InstCnt = 0;
+
+ // Take care of cases that should use andi/andis first.
+ SelectAndParts32(dl, Res, InstCnt);
+
+ // If we've not yet selected a 'starting' instruction, and we have no zeros
+ // to fill in, select the (Value, RLAmt) with the highest priority (largest
+ // number of groups), and start with this rotated value.
+ if ((!HasZeros || LateMask) && !Res) {
+ ValueRotInfo &VRI = ValueRotsVec[0];
+ if (VRI.RLAmt) {
+ if (InstCnt) *InstCnt += 1;
+ SDValue Ops[] =
+ { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
+ getI32Imm(31, dl) };
+ Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
+ 0);
+ } else {
+ Res = VRI.V;
+ }
+
+ // Now, remove all groups with this underlying value and rotation factor.
+ eraseMatchingBitGroups([VRI](const BitGroup &BG) {
+ return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
+ });
+ }
+
+ if (InstCnt) *InstCnt += BitGroups.size();
+
+ // Insert the other groups (one at a time).
+ for (auto &BG : BitGroups) {
+ if (!Res) {
+ SDValue Ops[] =
+ { BG.V, getI32Imm(BG.RLAmt, dl),
+ getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
+ getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
+ Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+ } else {
+ SDValue Ops[] =
+ { Res, BG.V, getI32Imm(BG.RLAmt, dl),
+ getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
+ getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
+ Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
+ }
+ }
+
+ if (LateMask) {
+ unsigned Mask = (unsigned) getZerosMask();
+
+ unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
+ assert((ANDIMask != 0 || ANDISMask != 0) &&
+ "No set bits in zeros mask?");
+
+ if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
+ (unsigned) (ANDISMask != 0) +
+ (unsigned) (ANDIMask != 0 && ANDISMask != 0);
+
+ SDValue ANDIVal, ANDISVal;
+ if (ANDIMask != 0)
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
+ Res, getI32Imm(ANDIMask, dl)), 0);
+ if (ANDISMask != 0)
+ ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
+ Res, getI32Imm(ANDISMask, dl)), 0);
+
+ if (!ANDIVal)
+ Res = ANDISVal;
+ else if (!ANDISVal)
+ Res = ANDIVal;
+ else
+ Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
+ ANDIVal, ANDISVal), 0);
+ }
+
+ return Res.getNode();
+ }
+
+ unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
+ unsigned MaskStart, unsigned MaskEnd,
+ bool IsIns) {
+ // In the notation used by the instructions, 'start' and 'end' are reversed
+ // because bits are counted from high to low order.
+ unsigned InstMaskStart = 64 - MaskEnd - 1,
+ InstMaskEnd = 64 - MaskStart - 1;
+
+ if (Repl32)
+ return 1;
+
+ if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
+ InstMaskEnd == 63 - RLAmt)
+ return 1;
+
+ return 2;
+ }
+
+ // For 64-bit values, not all combinations of rotates and masks are
+ // available. Produce one if it is available.
+ SDValue SelectRotMask64(SDValue V, SDLoc dl, unsigned RLAmt, bool Repl32,
+ unsigned MaskStart, unsigned MaskEnd,
+ unsigned *InstCnt = nullptr) {
+ // In the notation used by the instructions, 'start' and 'end' are reversed
+ // because bits are counted from high to low order.
+ unsigned InstMaskStart = 64 - MaskEnd - 1,
+ InstMaskEnd = 64 - MaskStart - 1;
+
+ if (InstCnt) *InstCnt += 1;
+
+ if (Repl32) {
+ // This rotation amount assumes that the lower 32 bits of the quantity
+ // are replicated in the high 32 bits by the rotation operator (which is
+ // done by rlwinm and friends).
+ assert(InstMaskStart >= 32 && "Mask cannot start out of range");
+ assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
+ SDValue Ops[] =
+ { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
+ getI32Imm(InstMaskEnd - 32, dl) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
+ Ops), 0);
+ }
+
+ if (InstMaskEnd == 63) {
+ SDValue Ops[] =
+ { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
+ }
+
+ if (InstMaskStart == 0) {
+ SDValue Ops[] =
+ { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
+ }
+
+ if (InstMaskEnd == 63 - RLAmt) {
+ SDValue Ops[] =
+ { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
+ }
+
+ // We cannot do this with a single instruction, so we'll use two. The
+ // problem is that we're not free to choose both a rotation amount and mask
+ // start and end independently. We can choose an arbitrary mask start and
+ // end, but then the rotation amount is fixed. Rotation, however, can be
+ // inverted, and so by applying an "inverse" rotation first, we can get the
+ // desired result.
+ if (InstCnt) *InstCnt += 1;
+
+ // The rotation mask for the second instruction must be MaskStart.
+ unsigned RLAmt2 = MaskStart;
+ // The first instruction must rotate V so that the overall rotation amount
+ // is RLAmt.
+ unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
+ if (RLAmt1)
+ V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
+ return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
+ }
+
+ // For 64-bit values, not all combinations of rotates and masks are
+ // available. Produce a rotate-mask-and-insert if one is available.
+ SDValue SelectRotMaskIns64(SDValue Base, SDValue V, SDLoc dl, unsigned RLAmt,
+ bool Repl32, unsigned MaskStart,
+ unsigned MaskEnd, unsigned *InstCnt = nullptr) {
+ // In the notation used by the instructions, 'start' and 'end' are reversed
+ // because bits are counted from high to low order.
+ unsigned InstMaskStart = 64 - MaskEnd - 1,
+ InstMaskEnd = 64 - MaskStart - 1;
+
+ if (InstCnt) *InstCnt += 1;
+
+ if (Repl32) {
+ // This rotation amount assumes that the lower 32 bits of the quantity
+ // are replicated in the high 32 bits by the rotation operator (which is
+ // done by rlwinm and friends).
+ assert(InstMaskStart >= 32 && "Mask cannot start out of range");
+ assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
+ SDValue Ops[] =
+ { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
+ getI32Imm(InstMaskEnd - 32, dl) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
+ Ops), 0);
+ }
+
+ if (InstMaskEnd == 63 - RLAmt) {
+ SDValue Ops[] =
+ { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
+ }
+
+ // We cannot do this with a single instruction, so we'll use two. The
+ // problem is that we're not free to choose both a rotation amount and mask
+ // start and end independently. We can choose an arbitrary mask start and
+ // end, but then the rotation amount is fixed. Rotation, however, can be
+ // inverted, and so by applying an "inverse" rotation first, we can get the
+ // desired result.
+ if (InstCnt) *InstCnt += 1;
+
+ // The rotation mask for the second instruction must be MaskStart.
+ unsigned RLAmt2 = MaskStart;
+ // The first instruction must rotate V so that the overall rotation amount
+ // is RLAmt.
+ unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
+ if (RLAmt1)
+ V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
+ return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
+ }
+
+ void SelectAndParts64(SDLoc dl, SDValue &Res, unsigned *InstCnt) {
+ if (BPermRewriterNoMasking)
+ return;
+
+ // The idea here is the same as in the 32-bit version, but with additional
+ // complications from the fact that Repl32 might be true. Because we
+ // aggressively convert bit groups to Repl32 form (which, for small
+ // rotation factors, involves no other change), and then coalesce, it might
+ // be the case that a single 64-bit masking operation could handle both
+ // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
+ // form allowed coalescing, then we must use a 32-bit rotaton in order to
+ // completely capture the new combined bit group.
+
+ for (ValueRotInfo &VRI : ValueRotsVec) {
+ uint64_t Mask = 0;
+
+ // We need to add to the mask all bits from the associated bit groups.
+ // If Repl32 is false, we need to add bits from bit groups that have
+ // Repl32 true, but are trivially convertable to Repl32 false. Such a
+ // group is trivially convertable if it overlaps only with the lower 32
+ // bits, and the group has not been coalesced.
+ auto MatchingBG = [VRI](const BitGroup &BG) {
+ if (VRI.V != BG.V)
+ return false;
+
+ unsigned EffRLAmt = BG.RLAmt;
+ if (!VRI.Repl32 && BG.Repl32) {
+ if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
+ !BG.Repl32Coalesced) {
+ if (BG.Repl32CR)
+ EffRLAmt += 32;
+ } else {
+ return false;
+ }
+ } else if (VRI.Repl32 != BG.Repl32) {
+ return false;
+ }
+
+ return VRI.RLAmt == EffRLAmt;
+ };
+
+ for (auto &BG : BitGroups) {
+ if (!MatchingBG(BG))
+ continue;
+
+ if (BG.StartIdx <= BG.EndIdx) {
+ for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
+ Mask |= (UINT64_C(1) << i);
+ } else {
+ for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
+ Mask |= (UINT64_C(1) << i);
+ for (unsigned i = 0; i <= BG.EndIdx; ++i)
+ Mask |= (UINT64_C(1) << i);
+ }
+ }
+
+ // We can use the 32-bit andi/andis technique if the mask does not
+ // require any higher-order bits. This can save an instruction compared
+ // to always using the general 64-bit technique.
+ bool Use32BitInsts = isUInt<32>(Mask);
+ // Compute the masks for andi/andis that would be necessary.
+ unsigned ANDIMask = (Mask & UINT16_MAX),
+ ANDISMask = (Mask >> 16) & UINT16_MAX;
+
+ bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
+
+ unsigned NumAndInsts = (unsigned) NeedsRotate +
+ (unsigned) (bool) Res;
+ if (Use32BitInsts)
+ NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
+ (unsigned) (ANDIMask != 0 && ANDISMask != 0);
+ else
+ NumAndInsts += SelectInt64Count(Mask) + /* and */ 1;
+
+ unsigned NumRLInsts = 0;
+ bool FirstBG = true;
+ for (auto &BG : BitGroups) {
+ if (!MatchingBG(BG))
+ continue;
+ NumRLInsts +=
+ SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
+ !FirstBG);
+ FirstBG = false;
+ }
+
+ DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
+ " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") <<
+ "\n\t\t\tisel using masking: " << NumAndInsts <<
+ " using rotates: " << NumRLInsts << "\n");
+
+ // When we'd use andi/andis, we bias toward using the rotates (andi only
+ // has a record form, and is cracked on POWER cores). However, when using
+ // general 64-bit constant formation, bias toward the constant form,
+ // because that exposes more opportunities for CSE.
+ if (NumAndInsts > NumRLInsts)
+ continue;
+ if (Use32BitInsts && NumAndInsts == NumRLInsts)
+ continue;
+
+ DEBUG(dbgs() << "\t\t\t\tusing masking\n");
+
+ if (InstCnt) *InstCnt += NumAndInsts;
+
+ SDValue VRot;
+ // We actually need to generate a rotation if we have a non-zero rotation
+ // factor or, in the Repl32 case, if we care about any of the
+ // higher-order replicated bits. In the latter case, we generate a mask
+ // backward so that it actually includes the entire 64 bits.
+ if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
+ VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
+ VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
+ else
+ VRot = VRI.V;
+
+ SDValue TotalVal;
+ if (Use32BitInsts) {
+ assert((ANDIMask != 0 || ANDISMask != 0) &&
+ "No set bits in mask when using 32-bit ands for 64-bit value");
+
+ SDValue ANDIVal, ANDISVal;
+ if (ANDIMask != 0)
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
+ VRot, getI32Imm(ANDIMask, dl)), 0);
+ if (ANDISMask != 0)
+ ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
+ VRot, getI32Imm(ANDISMask, dl)), 0);
+
+ if (!ANDIVal)
+ TotalVal = ANDISVal;
+ else if (!ANDISVal)
+ TotalVal = ANDIVal;
+ else
+ TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
+ ANDIVal, ANDISVal), 0);
+ } else {
+ TotalVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0);
+ TotalVal =
+ SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
+ VRot, TotalVal), 0);
+ }
+
+ if (!Res)
+ Res = TotalVal;
+ else
+ Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
+ Res, TotalVal), 0);
+
+ // Now, remove all groups with this underlying value and rotation
+ // factor.
+ eraseMatchingBitGroups(MatchingBG);
+ }
+ }
+
+ // Instruction selection for the 64-bit case.
+ SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
+ SDLoc dl(N);
+ SDValue Res;
+
+ if (InstCnt) *InstCnt = 0;
+
+ // Take care of cases that should use andi/andis first.
+ SelectAndParts64(dl, Res, InstCnt);
+
+ // If we've not yet selected a 'starting' instruction, and we have no zeros
+ // to fill in, select the (Value, RLAmt) with the highest priority (largest
+ // number of groups), and start with this rotated value.
+ if ((!HasZeros || LateMask) && !Res) {
+ // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
+ // groups will come first, and so the VRI representing the largest number
+ // of groups might not be first (it might be the first Repl32 groups).
+ unsigned MaxGroupsIdx = 0;
+ if (!ValueRotsVec[0].Repl32) {
+ for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
+ if (ValueRotsVec[i].Repl32) {
+ if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
+ MaxGroupsIdx = i;
+ break;
+ }
+ }
+
+ ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
+ bool NeedsRotate = false;
+ if (VRI.RLAmt) {
+ NeedsRotate = true;
+ } else if (VRI.Repl32) {
+ for (auto &BG : BitGroups) {
+ if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
+ BG.Repl32 != VRI.Repl32)
+ continue;
+
+ // We don't need a rotate if the bit group is confined to the lower
+ // 32 bits.
+ if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
+ continue;
+
+ NeedsRotate = true;
+ break;
+ }
+ }
+
+ if (NeedsRotate)
+ Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
+ VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
+ InstCnt);
+ else
+ Res = VRI.V;
+
+ // Now, remove all groups with this underlying value and rotation factor.
+ if (Res)
+ eraseMatchingBitGroups([VRI](const BitGroup &BG) {
+ return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
+ BG.Repl32 == VRI.Repl32;
+ });
+ }
+
+ // Because 64-bit rotates are more flexible than inserts, we might have a
+ // preference regarding which one we do first (to save one instruction).
+ if (!Res)
+ for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
+ if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
+ false) <
+ SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
+ true)) {
+ if (I != BitGroups.begin()) {
+ BitGroup BG = *I;
+ BitGroups.erase(I);
+ BitGroups.insert(BitGroups.begin(), BG);
+ }
+
+ break;
+ }
+ }
+
+ // Insert the other groups (one at a time).
+ for (auto &BG : BitGroups) {
+ if (!Res)
+ Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
+ BG.EndIdx, InstCnt);
+ else
+ Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
+ BG.StartIdx, BG.EndIdx, InstCnt);
+ }
+
+ if (LateMask) {
+ uint64_t Mask = getZerosMask();
+
+ // We can use the 32-bit andi/andis technique if the mask does not
+ // require any higher-order bits. This can save an instruction compared
+ // to always using the general 64-bit technique.
+ bool Use32BitInsts = isUInt<32>(Mask);
+ // Compute the masks for andi/andis that would be necessary.
+ unsigned ANDIMask = (Mask & UINT16_MAX),
+ ANDISMask = (Mask >> 16) & UINT16_MAX;
+
+ if (Use32BitInsts) {
+ assert((ANDIMask != 0 || ANDISMask != 0) &&
+ "No set bits in mask when using 32-bit ands for 64-bit value");
+
+ if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
+ (unsigned) (ANDISMask != 0) +
+ (unsigned) (ANDIMask != 0 && ANDISMask != 0);
+
+ SDValue ANDIVal, ANDISVal;
+ if (ANDIMask != 0)
+ ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
+ Res, getI32Imm(ANDIMask, dl)), 0);
+ if (ANDISMask != 0)
+ ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
+ Res, getI32Imm(ANDISMask, dl)), 0);
+
+ if (!ANDIVal)
+ Res = ANDISVal;
+ else if (!ANDISVal)
+ Res = ANDIVal;
+ else
+ Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
+ ANDIVal, ANDISVal), 0);
+ } else {
+ if (InstCnt) *InstCnt += SelectInt64Count(Mask) + /* and */ 1;
+
+ SDValue MaskVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0);
+ Res =
+ SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
+ Res, MaskVal), 0);
+ }
+ }
+
+ return Res.getNode();
+ }
+
+ SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
+ // Fill in BitGroups.
+ collectBitGroups(LateMask);
+ if (BitGroups.empty())
+ return nullptr;
+
+ // For 64-bit values, figure out when we can use 32-bit instructions.
+ if (Bits.size() == 64)
+ assignRepl32BitGroups();
+
+ // Fill in ValueRotsVec.
+ collectValueRotInfo();
+
+ if (Bits.size() == 32) {
+ return Select32(N, LateMask, InstCnt);
+ } else {
+ assert(Bits.size() == 64 && "Not 64 bits here?");
+ return Select64(N, LateMask, InstCnt);
+ }
+
+ return nullptr;
+ }
+
+ void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
+ BitGroups.erase(std::remove_if(BitGroups.begin(), BitGroups.end(), F),
+ BitGroups.end());
+ }
+
+ SmallVector<ValueBit, 64> Bits;
+
+ bool HasZeros;
+ SmallVector<unsigned, 64> RLAmt;
+
+ SmallVector<BitGroup, 16> BitGroups;
+
+ DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
+ SmallVector<ValueRotInfo, 16> ValueRotsVec;
+
+ SelectionDAG *CurDAG;
+
+public:
+ BitPermutationSelector(SelectionDAG *DAG)
+ : CurDAG(DAG) {}
+
+ // Here we try to match complex bit permutations into a set of
+ // rotate-and-shift/shift/and/or instructions, using a set of heuristics
+ // known to produce optimial code for common cases (like i32 byte swapping).
+ SDNode *Select(SDNode *N) {
+ Bits.resize(N->getValueType(0).getSizeInBits());
+ if (!getValueBits(SDValue(N, 0), Bits))
+ return nullptr;
+
+ DEBUG(dbgs() << "Considering bit-permutation-based instruction"
+ " selection for: ");
+ DEBUG(N->dump(CurDAG));
+
+ // Fill it RLAmt and set HasZeros.
+ computeRotationAmounts();
+
+ if (!HasZeros)
+ return Select(N, false);
+
+ // We currently have two techniques for handling results with zeros: early
+ // masking (the default) and late masking. Late masking is sometimes more
+ // efficient, but because the structure of the bit groups is different, it
+ // is hard to tell without generating both and comparing the results. With
+ // late masking, we ignore zeros in the resulting value when inserting each
+ // set of bit groups, and then mask in the zeros at the end. With early
+ // masking, we only insert the non-zero parts of the result at every step.
+
+ unsigned InstCnt, InstCntLateMask;
+ DEBUG(dbgs() << "\tEarly masking:\n");
+ SDNode *RN = Select(N, false, &InstCnt);
+ DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
+
+ DEBUG(dbgs() << "\tLate masking:\n");
+ SDNode *RNLM = Select(N, true, &InstCntLateMask);
+ DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask <<
+ " instructions\n");
+
+ if (InstCnt <= InstCntLateMask) {
+ DEBUG(dbgs() << "\tUsing early-masking for isel\n");
+ return RN;
+ }
+
+ DEBUG(dbgs() << "\tUsing late-masking for isel\n");
+ return RNLM;
+ }
+};
+} // anonymous namespace
+
+SDNode *PPCDAGToDAGISel::SelectBitPermutation(SDNode *N) {
+ if (N->getValueType(0) != MVT::i32 &&
+ N->getValueType(0) != MVT::i64)
+ return nullptr;
+
+ if (!UseBitPermRewriter)
+ return nullptr;
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ROTL:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::AND:
+ case ISD::OR: {
+ BitPermutationSelector BPS(CurDAG);
+ return BPS.Select(N);
+ }
+ }
+
+ return nullptr;
+}
+
+/// SelectCC - Select a comparison of the specified values with the specified
+/// condition code, returning the CR# of the expression.
+SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDLoc dl) {
+ // Always select the LHS.
+ unsigned Opc;
+
+ if (LHS.getValueType() == MVT::i32) {
+ unsigned Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt32Immediate(RHS, Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF, dl)),
+ 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt<16>((int)Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF, dl)),
+ 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpw cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmplwi cr0,r0,0x5678
+ // beq cr0,L6
+ SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
+ getI32Imm(Imm >> 16, dl)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
+ getI32Imm(Imm & 0xFFFF, dl)), 0);
+ }
+ Opc = PPC::CMPLW;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF, dl)), 0);
+ Opc = PPC::CMPLW;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm((int)SImm & 0xFFFF,
+ dl)),
+ 0);
+ Opc = PPC::CMPW;
+ }
+ } else if (LHS.getValueType() == MVT::i64) {
+ uint64_t Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt64Immediate(RHS.getNode(), Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF, dl)),
+ 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF, dl)),
+ 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpd cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmpldi cr0,r0,0x5678
+ // beq cr0,L6
+ if (isUInt<32>(Imm)) {
+ SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
+ getI64Imm(Imm >> 16, dl)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
+ getI64Imm(Imm & 0xFFFF, dl)),
+ 0);
+ }
+ }
+ Opc = PPC::CMPLD;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI64Imm(Imm & 0xFFFF, dl)), 0);
+ Opc = PPC::CMPLD;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI64Imm(SImm & 0xFFFF, dl)),
+ 0);
+ Opc = PPC::CMPD;
+ }
+ } else if (LHS.getValueType() == MVT::f32) {
+ Opc = PPC::FCMPUS;
+ } else {
+ assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
+ Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
+ }
+ return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+}
+
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETUEQ:
+ case ISD::SETONE:
+ case ISD::SETOLE:
+ case ISD::SETOGE:
+ llvm_unreachable("Should be lowered by legalize!");
+ default: llvm_unreachable("Unknown condition!");
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return PPC::PRED_EQ;
+ case ISD::SETUNE:
+ case ISD::SETNE: return PPC::PRED_NE;
+ case ISD::SETOLT:
+ case ISD::SETLT: return PPC::PRED_LT;
+ case ISD::SETULE:
+ case ISD::SETLE: return PPC::PRED_LE;
+ case ISD::SETOGT:
+ case ISD::SETGT: return PPC::PRED_GT;
+ case ISD::SETUGE:
+ case ISD::SETGE: return PPC::PRED_GE;
+ case ISD::SETO: return PPC::PRED_NU;
+ case ISD::SETUO: return PPC::PRED_UN;
+ // These two are invalid for floating point. Assume we have int.
+ case ISD::SETULT: return PPC::PRED_LT;
+ case ISD::SETUGT: return PPC::PRED_GT;
+ }
+}
+
+/// getCRIdxForSetCC - Return the index of the condition register field
+/// associated with the SetCC condition, and whether or not the field is
+/// treated as inverted. That is, lt = 0; ge = 0 inverted.
+static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
+ Invert = false;
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition!");
+ case ISD::SETOLT:
+ case ISD::SETLT: return 0; // Bit #0 = SETOLT
+ case ISD::SETOGT:
+ case ISD::SETGT: return 1; // Bit #1 = SETOGT
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
+ case ISD::SETUO: return 3; // Bit #3 = SETUO
+ case ISD::SETUGE:
+ case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
+ case ISD::SETULE:
+ case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
+ case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
+ case ISD::SETUEQ:
+ case ISD::SETOGE:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ llvm_unreachable("Invalid branch code: should be expanded by legalize");
+ // These are invalid for floating point. Assume integer.
+ case ISD::SETULT: return 0;
+ case ISD::SETUGT: return 1;
+ }
+}
+
+// getVCmpInst: return the vector compare instruction for the specified
+// vector type and condition code. Since this is for altivec specific code,
+// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
+static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
+ bool HasVSX, bool &Swap, bool &Negate) {
+ Swap = false;
+ Negate = false;
+
+ if (VecVT.isFloatingPoint()) {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
+ case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
+ case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGEDP;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid floating-point vector compare condition");
+ } else {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
+ case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETUEQ:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPEQUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPEQUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPEQUW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPEQUD;
+ break;
+ case ISD::SETGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTSB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTSH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTSW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPGTSD;
+ break;
+ case ISD::SETUGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTUW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPGTUD;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid integer vector compare condition");
+ }
+}
+
+SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
+ SDLoc dl(N);
+ unsigned Imm;
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ EVT PtrVT =
+ CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
+ bool isPPC64 = (PtrVT == MVT::i64);
+
+ if (!PPCSubTarget->useCRBits() &&
+ isInt32Immediate(N->getOperand(1), Imm)) {
+ // We can codegen setcc op, imm very efficiently compared to a brcond.
+ // Check for those cases here.
+ // setcc op, 0
+ if (Imm == 0) {
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ: {
+ Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
+ SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
+ getI32Imm(31, dl) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ case ISD::SETNE: {
+ if (isPPC64) break;
+ SDValue AD =
+ SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(~0U, dl)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
+ AD.getValue(1));
+ }
+ case ISD::SETLT: {
+ SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
+ getI32Imm(31, dl) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ case ISD::SETGT: {
+ SDValue T =
+ SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
+ T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
+ SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
+ getI32Imm(31, dl) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ }
+ } else if (Imm == ~0U) { // setcc op, -1
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ:
+ if (isPPC64) break;
+ Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(1, dl)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(CurDAG->getMachineNode(PPC::LI, dl,
+ MVT::i32,
+ getI32Imm(0, dl)),
+ 0), Op.getValue(1));
+ case ISD::SETNE: {
+ if (isPPC64) break;
+ Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
+ SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(~0U, dl));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
+ Op, SDValue(AD, 1));
+ }
+ case ISD::SETLT: {
+ SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
+ getI32Imm(1, dl)), 0);
+ SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
+ Op), 0);
+ SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
+ getI32Imm(31, dl) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ case ISD::SETGT: {
+ SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
+ getI32Imm(31, dl) };
+ Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
+ getI32Imm(1, dl));
+ }
+ }
+ }
+ }
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Altivec Vector compare instructions do not set any CR register by default and
+ // vector compare operations return the same type as the operands.
+ if (LHS.getValueType().isVector()) {
+ if (PPCSubTarget->hasQPX())
+ return nullptr;
+
+ EVT VecVT = LHS.getValueType();
+ bool Swap, Negate;
+ unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
+ PPCSubTarget->hasVSX(), Swap, Negate);
+ if (Swap)
+ std::swap(LHS, RHS);
+
+ EVT ResVT = VecVT.changeVectorElementTypeToInteger();
+ if (Negate) {
+ SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
+ return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
+ PPC::VNOR,
+ ResVT, VCmp, VCmp);
+ }
+
+ return CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
+ }
+
+ if (PPCSubTarget->useCRBits())
+ return nullptr;
+
+ bool Inv;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv);
+ SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
+ SDValue IntCR;
+
+ // Force the ccreg into CR7.
+ SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
+
+ SDValue InFlag(nullptr, 0); // Null incoming flag value.
+ CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
+ InFlag).getValue(1);
+
+ IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
+ CCReg), 0);
+
+ SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
+ getI32Imm(31, dl), getI32Imm(31, dl) };
+ if (!Inv)
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+
+ // Get the specified bit.
+ SDValue Tmp =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
+}
+
+SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ return Result;
+}
+
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
+ SDLoc dl(N);
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
+ return nullptr; // Already selected.
+ }
+
+ // In case any misguided DAG-level optimizations form an ADD with a
+ // TargetConstant operand, crash here instead of miscompiling (by selecting
+ // an r+r add instead of some kind of r+i add).
+ if (N->getOpcode() == ISD::ADD &&
+ N->getOperand(1).getOpcode() == ISD::TargetConstant)
+ llvm_unreachable("Invalid ADD with TargetConstant operand");
+
+ // Try matching complex bit permutations before doing anything else.
+ if (SDNode *NN = SelectBitPermutation(N))
+ return NN;
+
+ switch (N->getOpcode()) {
+ default: break;
+
+ case ISD::Constant: {
+ if (N->getValueType(0) == MVT::i64)
+ return SelectInt64(CurDAG, N);
+ break;
+ }
+
+ case ISD::SETCC: {
+ SDNode *SN = SelectSETCC(N);
+ if (SN)
+ return SN;
+ break;
+ }
+ case PPCISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex:
+ return getFrameIndex(N, N);
+
+ case PPCISD::MFOCRF: {
+ SDValue InFlag = N->getOperand(1);
+ return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
+ N->getOperand(0), InFlag);
+ }
+
+ case PPCISD::READ_TIME_BASE: {
+ return CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
+ MVT::Other, N->getOperand(0));
+ }
+
+ case PPCISD::SRA_ADDZE: {
+ SDValue N0 = N->getOperand(0);
+ SDValue ShiftAmt =
+ CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
+ getConstantIntValue(), dl,
+ N->getValueType(0));
+ if (N->getValueType(0) == MVT::i64) {
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
+ N0, ShiftAmt);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64,
+ SDValue(Op, 0), SDValue(Op, 1));
+ } else {
+ assert(N->getValueType(0) == MVT::i32 &&
+ "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+ N0, ShiftAmt);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1));
+ }
+ }
+
+ case ISD::LOAD: {
+ // Handle preincrement loads.
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT LoadedVT = LD->getMemoryVT();
+
+ // Normal loads are handled by code generated from the .td file.
+ if (LD->getAddressingMode() != ISD::PRE_INC)
+ break;
+
+ SDValue Offset = LD->getOffset();
+ if (Offset.getOpcode() == ISD::TargetConstant ||
+ Offset.getOpcode() == ISD::TargetGlobalAddress) {
+
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDU; break;
+ case MVT::f32: Opcode = PPC::LFSU; break;
+ case MVT::i32: Opcode = PPC::LWZU; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDU; break;
+ case MVT::i32: Opcode = PPC::LWZU8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Offset, Base, Chain };
+ return transferMemOperands(
+ N, CurDAG->getMachineNode(
+ Opcode, dl, LD->getValueType(0),
+ PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
+ Ops));
+ } else {
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
+ case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
+ case MVT::f64: Opcode = PPC::LFDUX; break;
+ case MVT::f32: Opcode = PPC::LFSUX; break;
+ case MVT::i32: Opcode = PPC::LWZUX; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
+ "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDUX; break;
+ case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Base, Offset, Chain };
+ return transferMemOperands(
+ N, CurDAG->getMachineNode(
+ Opcode, dl, LD->getValueType(0),
+ PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
+ Ops));
+ }
+ }
+
+ case ISD::AND: {
+ unsigned Imm, Imm2, SH, MB, ME;
+ uint64_t Imm64;
+
+ // If this is an and of a value rotated between 0 and 31 bits and then and'd
+ // with a mask, emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ // If this is just a masked value where the input is not handled above, and
+ // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRunOfOnes(Imm, MB, ME) &&
+ N->getOperand(0).getOpcode() != ISD::ROTL) {
+ SDValue Val = N->getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ // If this is a 64-bit zero-extension mask, emit rldicl.
+ if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
+ isMask_64(Imm64)) {
+ SDValue Val = N->getOperand(0);
+ MB = 64 - countTrailingOnes(Imm64);
+ SH = 0;
+
+ // If the operand is a logical right shift, we can fold it into this
+ // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
+ // for n <= mb. The right shift is really a left rotate followed by a
+ // mask, and this mask is a more-restrictive sub-mask of the mask implied
+ // by the shift.
+ if (Val.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
+ assert(Imm < 64 && "Illegal shift amount");
+ Val = Val.getOperand(0);
+ SH = 64 - Imm;
+ }
+
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
+ return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
+ }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
+ ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ return nullptr;
+ }
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
+ // bitfield insert.
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ N->getOperand(0).getOpcode() == ISD::OR &&
+ isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+ // The idea here is to check whether this is equivalent to:
+ // (c1 & m) | (x & ~m)
+ // where m is a run-of-ones mask. The logic here is that, for each bit in
+ // c1 and c2:
+ // - if both are 1, then the output will be 1.
+ // - if both are 0, then the output will be 0.
+ // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
+ // come from x.
+ // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
+ // be 0.
+ // If that last condition is never the case, then we can form m from the
+ // bits that are the same between c1 and c2.
+ unsigned MB, ME;
+ if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ N->getOperand(0).getOperand(1),
+ getI32Imm(0, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::OR: {
+ if (N->getValueType(0) == MVT::i32)
+ if (SDNode *I = SelectBitfieldInsert(N))
+ return I;
+
+ short Imm;
+ if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
+ isIntS16Immediate(N->getOperand(1), Imm)) {
+ APInt LHSKnownZero, LHSKnownOne;
+ CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne);
+
+ // If this is equivalent to an add, then we can fold it with the
+ // FrameIndex calculation.
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL)
+ return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::ADD: {
+ short Imm;
+ if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
+ isIntS16Immediate(N->getOperand(1), Imm))
+ return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
+
+ break;
+ }
+ case ISD::SHL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SRL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ // FIXME: Remove this once the ANDI glue bug is fixed:
+ case PPCISD::ANDIo_1_EQ_BIT:
+ case PPCISD::ANDIo_1_GT_BIT: {
+ if (!ANDIGlueBug)
+ break;
+
+ EVT InVT = N->getOperand(0).getValueType();
+ assert((InVT == MVT::i64 || InVT == MVT::i32) &&
+ "Invalid input type for ANDIo_1_EQ_BIT");
+
+ unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
+ SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
+ N->getOperand(0),
+ CurDAG->getTargetConstant(1, dl, InVT)),
+ 0);
+ SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
+ SDValue SRIdxVal =
+ CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
+ PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
+
+ return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1,
+ CR0Reg, SRIdxVal,
+ SDValue(AndI.getNode(), 1) /* glue */);
+ }
+ case ISD::SELECT_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ EVT PtrVT =
+ CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
+ bool isPPC64 = (PtrVT == MVT::i64);
+
+ // If this is a select of i1 operands, we'll pattern match it.
+ if (PPCSubTarget->useCRBits() &&
+ N->getOperand(0).getValueType() == MVT::i1)
+ break;
+
+ // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
+ if (!isPPC64)
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (N1C->isNullValue() && N3C->isNullValue() &&
+ N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+ // FIXME: Implement this optzn for PPC64.
+ N->getValueType(0) == MVT::i32) {
+ SDNode *Tmp =
+ CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ N->getOperand(0), getI32Imm(~0U, dl));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+ SDValue(Tmp, 0), N->getOperand(0),
+ SDValue(Tmp, 1));
+ }
+
+ SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+
+ if (N->getValueType(0) == MVT::i1) {
+ // An i1 select is: (c & t) | (!c & f).
+ bool Inv;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv);
+
+ unsigned SRI;
+ switch (Idx) {
+ default: llvm_unreachable("Invalid CC index");
+ case 0: SRI = PPC::sub_lt; break;
+ case 1: SRI = PPC::sub_gt; break;
+ case 2: SRI = PPC::sub_eq; break;
+ case 3: SRI = PPC::sub_un; break;
+ }
+
+ SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
+
+ SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
+ CCBit, CCBit), 0);
+ SDValue C = Inv ? NotCCBit : CCBit,
+ NotC = Inv ? CCBit : NotCCBit;
+
+ SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
+ C, N->getOperand(2)), 0);
+ SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
+ NotC, N->getOperand(3)), 0);
+
+ return CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
+ }
+
+ unsigned BROpc = getPredicateForSetCC(CC);
+
+ unsigned SelectCCOp;
+ if (N->getValueType(0) == MVT::i32)
+ SelectCCOp = PPC::SELECT_CC_I4;
+ else if (N->getValueType(0) == MVT::i64)
+ SelectCCOp = PPC::SELECT_CC_I8;
+ else if (N->getValueType(0) == MVT::f32)
+ if (PPCSubTarget->hasP8Vector())
+ SelectCCOp = PPC::SELECT_CC_VSSRC;
+ else
+ SelectCCOp = PPC::SELECT_CC_F4;
+ else if (N->getValueType(0) == MVT::f64)
+ if (PPCSubTarget->hasVSX())
+ SelectCCOp = PPC::SELECT_CC_VSFRC;
+ else
+ SelectCCOp = PPC::SELECT_CC_F8;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
+ SelectCCOp = PPC::SELECT_CC_QFRC;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
+ SelectCCOp = PPC::SELECT_CC_QSRC;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
+ SelectCCOp = PPC::SELECT_CC_QBRC;
+ else if (N->getValueType(0) == MVT::v2f64 ||
+ N->getValueType(0) == MVT::v2i64)
+ SelectCCOp = PPC::SELECT_CC_VSRC;
+ else
+ SelectCCOp = PPC::SELECT_CC_VRRC;
+
+ SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
+ getI32Imm(BROpc, dl) };
+ return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
+ }
+ case ISD::VSELECT:
+ if (PPCSubTarget->hasVSX()) {
+ SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
+ }
+
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
+ N->getValueType(0) == MVT::v2i64)) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
+ Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
+ unsigned DM[2];
+
+ for (int i = 0; i < 2; ++i)
+ if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
+ DM[i] = 0;
+ else
+ DM[i] = 1;
+
+ if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
+ Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ isa<LoadSDNode>(Op1.getOperand(0))) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
+ SDValue Base, Offset;
+
+ if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
+ (LD->getMemoryVT() == MVT::f64 ||
+ LD->getMemoryVT() == MVT::i64) &&
+ SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
+ SDValue Chain = LD->getChain();
+ SDValue Ops[] = { Base, Offset, Chain };
+ return CurDAG->SelectNodeTo(N, PPC::LXVDSX,
+ N->getValueType(0), Ops);
+ }
+ }
+
+ // For little endian, we must swap the input operands and adjust
+ // the mask elements (reverse and invert them).
+ if (PPCSubTarget->isLittleEndian()) {
+ std::swap(Op1, Op2);
+ unsigned tmp = DM[0];
+ DM[0] = 1 - DM[1];
+ DM[1] = 1 - tmp;
+ }
+
+ SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
+ MVT::i32);
+ SDValue Ops[] = { Op1, Op2, DMV };
+ return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
+ }
+
+ break;
+ case PPCISD::BDNZ:
+ case PPCISD::BDZ: {
+ bool IsPPC64 = PPCSubTarget->isPPC64();
+ SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ?
+ (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
+ MVT::Other, Ops);
+ }
+ case PPCISD::COND_BRANCH: {
+ // Op #0 is the Chain.
+ // Op #1 is the PPC::PRED_* number.
+ // Op #2 is the CR#
+ // Op #3 is the Dest MBB
+ // Op #4 is the Flag.
+ // Prevent PPC::PRED_* from being selected into LI.
+ unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if (EnableBranchHint)
+ PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
+
+ SDValue Pred = getI32Imm(PCC, dl);
+ SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
+ N->getOperand(0), N->getOperand(4) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
+ }
+ case ISD::BR_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ unsigned PCC = getPredicateForSetCC(CC);
+
+ if (N->getOperand(2).getValueType() == MVT::i1) {
+ unsigned Opc;
+ bool Swap;
+ switch (PCC) {
+ default: llvm_unreachable("Unexpected Boolean-operand predicate");
+ case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
+ case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
+ case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
+ case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
+ case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
+ case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
+ }
+
+ SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
+ N->getOperand(Swap ? 3 : 2),
+ N->getOperand(Swap ? 2 : 3)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other,
+ BitComp, N->getOperand(4), N->getOperand(0));
+ }
+
+ if (EnableBranchHint)
+ PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
+
+ SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
+ SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
+ N->getOperand(4), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
+ }
+ case ISD::BRIND: {
+ // FIXME: Should custom lower this.
+ SDValue Chain = N->getOperand(0);
+ SDValue Target = N->getOperand(1);
+ unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
+ unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
+ Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
+ Chain), 0);
+ return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
+ }
+ case PPCISD::TOC_ENTRY: {
+ assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
+ "Only supported for 64-bit ABI and 32-bit SVR4");
+ if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
+ SDValue GA = N->getOperand(0);
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl,
+ MVT::i32, GA, N->getOperand(1)));
+ }
+
+ // For medium and large code model, we generate two instructions as
+ // described below. Otherwise we allow SelectCodeCommon to handle this,
+ // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
+ CodeModel::Model CModel = TM.getCodeModel();
+ if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
+ break;
+
+ // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
+ // If it must be toc-referenced according to PPCSubTarget, we generate:
+ // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
+ // Otherwise we generate:
+ // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
+ SDValue GA = N->getOperand(0);
+ SDValue TOCbase = N->getOperand(1);
+ SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
+ TOCbase, GA);
+
+ if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
+ CModel == CodeModel::Large)
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
+ MVT::i64, GA, SDValue(Tmp, 0)));
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+ const GlobalValue *GV = G->getGlobal();
+ unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
+ if (GVFlags & PPCII::MO_NLP_FLAG) {
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
+ MVT::i64, GA, SDValue(Tmp, 0)));
+ }
+ }
+
+ return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
+ SDValue(Tmp, 0), GA);
+ }
+ case PPCISD::PPC32_PICGOT: {
+ // Generate a PIC-safe GOT reference.
+ assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
+ "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
+ return CurDAG->SelectNodeTo(
+ N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()),
+ MVT::i32);
+ }
+ case PPCISD::VADD_SPLAT: {
+ // This expands into one of three sequences, depending on whether
+ // the first operand is odd or even, positive or negative.
+ assert(isa<ConstantSDNode>(N->getOperand(0)) &&
+ isa<ConstantSDNode>(N->getOperand(1)) &&
+ "Invalid operand on VADD_SPLAT!");
+
+ int Elt = N->getConstantOperandVal(0);
+ int EltSize = N->getConstantOperandVal(1);
+ unsigned Opc1, Opc2, Opc3;
+ EVT VT;
+
+ if (EltSize == 1) {
+ Opc1 = PPC::VSPLTISB;
+ Opc2 = PPC::VADDUBM;
+ Opc3 = PPC::VSUBUBM;
+ VT = MVT::v16i8;
+ } else if (EltSize == 2) {
+ Opc1 = PPC::VSPLTISH;
+ Opc2 = PPC::VADDUHM;
+ Opc3 = PPC::VSUBUHM;
+ VT = MVT::v8i16;
+ } else {
+ assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
+ Opc1 = PPC::VSPLTISW;
+ Opc2 = PPC::VADDUWM;
+ Opc3 = PPC::VSUBUWM;
+ VT = MVT::v4i32;
+ }
+
+ if ((Elt & 1) == 0) {
+ // Elt is even, in the range [-32,-18] + [16,30].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp = VSPLTIS[BHW] elt
+ // VADDU[BHW]M tmp, tmp
+ // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
+ SDValue EltVal = getI32Imm(Elt >> 1, dl);
+ SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ SDValue TmpVal = SDValue(Tmp, 0);
+ return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+ } else if (Elt > 0) {
+ // Elt is odd and positive, in the range [17,31].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt-16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VSUBU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt - 16, dl);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16, dl);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+
+ } else {
+ // Elt is odd and negative, in the range [-31,-17].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt+16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VADDU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt + 16, dl);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16, dl);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+ }
+ }
+ }
+
+ return SelectCode(N);
+}
+
+// If the target supports the cmpb instruction, do the idiom recognition here.
+// We don't do this as a DAG combine because we don't want to do it as nodes
+// are being combined (because we might miss part of the eventual idiom). We
+// don't want to do it during instruction selection because we want to reuse
+// the logic for lowering the masking operations already part of the
+// instruction selector.
+SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
+ SDLoc dl(N);
+
+ assert(N->getOpcode() == ISD::OR &&
+ "Only OR nodes are supported for CMPB");
+
+ SDValue Res;
+ if (!PPCSubTarget->hasCMPB())
+ return Res;
+
+ if (N->getValueType(0) != MVT::i32 &&
+ N->getValueType(0) != MVT::i64)
+ return Res;
+
+ EVT VT = N->getValueType(0);
+
+ SDValue RHS, LHS;
+ bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+ uint64_t Mask = 0, Alt = 0;
+
+ auto IsByteSelectCC = [this](SDValue O, unsigned &b,
+ uint64_t &Mask, uint64_t &Alt,
+ SDValue &LHS, SDValue &RHS) {
+ if (O.getOpcode() != ISD::SELECT_CC)
+ return false;
+ ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
+
+ if (!isa<ConstantSDNode>(O.getOperand(2)) ||
+ !isa<ConstantSDNode>(O.getOperand(3)))
+ return false;
+
+ uint64_t PM = O.getConstantOperandVal(2);
+ uint64_t PAlt = O.getConstantOperandVal(3);
+ for (b = 0; b < 8; ++b) {
+ uint64_t Mask = UINT64_C(0xFF) << (8*b);
+ if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
+ break;
+ }
+
+ if (b == 8)
+ return false;
+ Mask |= PM;
+ Alt |= PAlt;
+
+ if (!isa<ConstantSDNode>(O.getOperand(1)) ||
+ O.getConstantOperandVal(1) != 0) {
+ SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::TRUNCATE)
+ Op1 = Op1.getOperand(0);
+
+ if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
+ Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
+ isa<ConstantSDNode>(Op0.getOperand(1))) {
+
+ unsigned Bits = Op0.getValueType().getSizeInBits();
+ if (b != Bits/8-1)
+ return false;
+ if (Op0.getConstantOperandVal(1) != Bits-8)
+ return false;
+
+ LHS = Op0.getOperand(0);
+ RHS = Op1.getOperand(0);
+ return true;
+ }
+
+ // When we have small integers (i16 to be specific), the form present
+ // post-legalization uses SETULT in the SELECT_CC for the
+ // higher-order byte, depending on the fact that the
+ // even-higher-order bytes are known to all be zero, for example:
+ // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
+ // (so when the second byte is the same, because all higher-order
+ // bits from bytes 3 and 4 are known to be zero, the result of the
+ // xor can be at most 255)
+ if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
+ isa<ConstantSDNode>(O.getOperand(1))) {
+
+ uint64_t ULim = O.getConstantOperandVal(1);
+ if (ULim != (UINT64_C(1) << b*8))
+ return false;
+
+ // Now we need to make sure that the upper bytes are known to be
+ // zero.
+ unsigned Bits = Op0.getValueType().getSizeInBits();
+ if (!CurDAG->MaskedValueIsZero(Op0,
+ APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
+ return false;
+
+ LHS = Op0.getOperand(0);
+ RHS = Op0.getOperand(1);
+ return true;
+ }
+
+ return false;
+ }
+
+ if (CC != ISD::SETEQ)
+ return false;
+
+ SDValue Op = O.getOperand(0);
+ if (Op.getOpcode() == ISD::AND) {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+ if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
+ return false;
+
+ SDValue XOR = Op.getOperand(0);
+ if (XOR.getOpcode() == ISD::TRUNCATE)
+ XOR = XOR.getOperand(0);
+ if (XOR.getOpcode() != ISD::XOR)
+ return false;
+
+ LHS = XOR.getOperand(0);
+ RHS = XOR.getOperand(1);
+ return true;
+ } else if (Op.getOpcode() == ISD::SRL) {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+ unsigned Bits = Op.getValueType().getSizeInBits();
+ if (b != Bits/8-1)
+ return false;
+ if (Op.getConstantOperandVal(1) != Bits-8)
+ return false;
+
+ SDValue XOR = Op.getOperand(0);
+ if (XOR.getOpcode() == ISD::TRUNCATE)
+ XOR = XOR.getOperand(0);
+ if (XOR.getOpcode() != ISD::XOR)
+ return false;
+
+ LHS = XOR.getOperand(0);
+ RHS = XOR.getOperand(1);
+ return true;
+ }
+
+ return false;
+ };
+
+ SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
+ while (!Queue.empty()) {
+ SDValue V = Queue.pop_back_val();
+
+ for (const SDValue &O : V.getNode()->ops()) {
+ unsigned b;
+ uint64_t M = 0, A = 0;
+ SDValue OLHS, ORHS;
+ if (O.getOpcode() == ISD::OR) {
+ Queue.push_back(O);
+ } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
+ if (!LHS) {
+ LHS = OLHS;
+ RHS = ORHS;
+ BytesFound[b] = true;
+ Mask |= M;
+ Alt |= A;
+ } else if ((LHS == ORHS && RHS == OLHS) ||
+ (RHS == ORHS && LHS == OLHS)) {
+ BytesFound[b] = true;
+ Mask |= M;
+ Alt |= A;
+ } else {
+ return Res;
+ }
+ } else {
+ return Res;
+ }
+ }
+ }
+
+ unsigned LastB = 0, BCnt = 0;
+ for (unsigned i = 0; i < 8; ++i)
+ if (BytesFound[LastB]) {
+ ++BCnt;
+ LastB = i;
+ }
+
+ if (!LastB || BCnt < 2)
+ return Res;
+
+ // Because we'll be zero-extending the output anyway if don't have a specific
+ // value for each input byte (via the Mask), we can 'anyext' the inputs.
+ if (LHS.getValueType() != VT) {
+ LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
+ RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
+ }
+
+ Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
+
+ bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
+ if (NonTrivialMask && !Alt) {
+ // Res = Mask & CMPB
+ Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
+ CurDAG->getConstant(Mask, dl, VT));
+ } else if (Alt) {
+ // Res = (CMPB & Mask) | (~CMPB & Alt)
+ // Which, as suggested here:
+ // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
+ // can be written as:
+ // Res = Alt ^ ((Alt ^ Mask) & CMPB)
+ // useful because the (Alt ^ Mask) can be pre-computed.
+ Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
+ CurDAG->getConstant(Mask ^ Alt, dl, VT));
+ Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
+ CurDAG->getConstant(Alt, dl, VT));
+ }
+
+ return Res;
+}
+
+// When CR bit registers are enabled, an extension of an i1 variable to a i32
+// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
+// involves constant materialization of a 0 or a 1 or both. If the result of
+// the extension is then operated upon by some operator that can be constant
+// folded with a constant 0 or 1, and that constant can be materialized using
+// only one instruction (like a zero or one), then we should fold in those
+// operations with the select.
+void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
+ if (!PPCSubTarget->useCRBits())
+ return;
+
+ if (N->getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOpcode() != ISD::ANY_EXTEND)
+ return;
+
+ if (N->getOperand(0).getValueType() != MVT::i1)
+ return;
+
+ if (!N->hasOneUse())
+ return;
+
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Cond = N->getOperand(0);
+ SDValue ConstTrue =
+ CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
+ SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
+
+ do {
+ SDNode *User = *N->use_begin();
+ if (User->getNumOperands() != 2)
+ break;
+
+ auto TryFold = [this, N, User, dl](SDValue Val) {
+ SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
+ SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
+ SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
+
+ return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
+ User->getValueType(0),
+ O0.getNode(), O1.getNode());
+ };
+
+ SDValue TrueRes = TryFold(ConstTrue);
+ if (!TrueRes)
+ break;
+ SDValue FalseRes = TryFold(ConstFalse);
+ if (!FalseRes)
+ break;
+
+ // For us to materialize these using one instruction, we must be able to
+ // represent them as signed 16-bit integers.
+ uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
+ False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
+ if (!isInt<16>(True) || !isInt<16>(False))
+ break;
+
+ // We can replace User with a new SELECT node, and try again to see if we
+ // can fold the select with its user.
+ Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
+ N = User;
+ ConstTrue = TrueRes;
+ ConstFalse = FalseRes;
+ } while (N->hasOneUse());
+}
+
+void PPCDAGToDAGISel::PreprocessISelDAG() {
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ bool MadeChange = false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ if (N->use_empty())
+ continue;
+
+ SDValue Res;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::OR:
+ Res = combineToCMPB(N);
+ break;
+ }
+
+ if (!Res)
+ foldBoolExts(Res, N);
+
+ if (Res) {
+ DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(Res.getNode()->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ MadeChange = true;
+ }
+ }
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
+}
+
+/// PostprocessISelDAG - Perform some late peephole optimizations
+/// on the DAG representation.
+void PPCDAGToDAGISel::PostprocessISelDAG() {
+
+ // Skip peepholes at -O0.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ PeepholePPC64();
+ PeepholeCROps();
+ PeepholePPC64ZExt();
+}
+
+// Check if all users of this node will become isel where the second operand
+// is the constant zero. If this is so, and if we can negate the condition,
+// then we can flip the true and false operands. This will allow the zero to
+// be folded with the isel so that we don't need to materialize a register
+// containing zero.
+bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
+ // If we're not using isel, then this does not matter.
+ if (!PPCSubTarget->hasISEL())
+ return false;
+
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (!User->isMachineOpcode())
+ return false;
+ if (User->getMachineOpcode() != PPC::SELECT_I4 &&
+ User->getMachineOpcode() != PPC::SELECT_I8)
+ return false;
+
+ SDNode *Op2 = User->getOperand(2).getNode();
+ if (!Op2->isMachineOpcode())
+ return false;
+
+ if (Op2->getMachineOpcode() != PPC::LI &&
+ Op2->getMachineOpcode() != PPC::LI8)
+ return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
+ if (!C)
+ return false;
+
+ if (!C->isNullValue())
+ return false;
+ }
+
+ return true;
+}
+
+void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
+ SmallVector<SDNode *, 4> ToReplace;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
+ User->getMachineOpcode() == PPC::SELECT_I8) &&
+ "Must have all select users");
+ ToReplace.push_back(User);
+ }
+
+ for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
+ UE = ToReplace.end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ SDNode *ResNode =
+ CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
+ User->getValueType(0), User->getOperand(0),
+ User->getOperand(2),
+ User->getOperand(1));
+
+ DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
+ DEBUG(User->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ ReplaceUses(User, ResNode);
+ }
+}
+
+void PPCDAGToDAGISel::PeepholeCROps() {
+ bool IsModified;
+ do {
+ IsModified = false;
+ for (SDNode &Node : CurDAG->allnodes()) {
+ MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
+ if (!MachineNode || MachineNode->use_empty())
+ continue;
+ SDNode *ResNode = MachineNode;
+
+ bool Op1Set = false, Op1Unset = false,
+ Op1Not = false,
+ Op2Set = false, Op2Unset = false,
+ Op2Not = false;
+
+ unsigned Opcode = MachineNode->getMachineOpcode();
+ switch (Opcode) {
+ default: break;
+ case PPC::CRAND:
+ case PPC::CRNAND:
+ case PPC::CROR:
+ case PPC::CRXOR:
+ case PPC::CRNOR:
+ case PPC::CREQV:
+ case PPC::CRANDC:
+ case PPC::CRORC: {
+ SDValue Op = MachineNode->getOperand(1);
+ if (Op.isMachineOpcode()) {
+ if (Op.getMachineOpcode() == PPC::CRSET)
+ Op2Set = true;
+ else if (Op.getMachineOpcode() == PPC::CRUNSET)
+ Op2Unset = true;
+ else if (Op.getMachineOpcode() == PPC::CRNOR &&
+ Op.getOperand(0) == Op.getOperand(1))
+ Op2Not = true;
+ }
+ } // fallthrough
+ case PPC::BC:
+ case PPC::BCn:
+ case PPC::SELECT_I4:
+ case PPC::SELECT_I8:
+ case PPC::SELECT_F4:
+ case PPC::SELECT_F8:
+ case PPC::SELECT_QFRC:
+ case PPC::SELECT_QSRC:
+ case PPC::SELECT_QBRC:
+ case PPC::SELECT_VRRC:
+ case PPC::SELECT_VSFRC:
+ case PPC::SELECT_VSSRC:
+ case PPC::SELECT_VSRC: {
+ SDValue Op = MachineNode->getOperand(0);
+ if (Op.isMachineOpcode()) {
+ if (Op.getMachineOpcode() == PPC::CRSET)
+ Op1Set = true;
+ else if (Op.getMachineOpcode() == PPC::CRUNSET)
+ Op1Unset = true;
+ else if (Op.getMachineOpcode() == PPC::CRNOR &&
+ Op.getOperand(0) == Op.getOperand(1))
+ Op1Not = true;
+ }
+ }
+ break;
+ }
+
+ bool SelectSwap = false;
+ switch (Opcode) {
+ default: break;
+ case PPC::CRAND:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // x & x = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Set)
+ // 1 & y = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Set)
+ // x & 1 = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset || Op2Unset)
+ // x & 0 = 0 & y = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Not)
+ // ~x & y = andc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0).
+ getOperand(0));
+ else if (Op2Not)
+ // x & ~y = andc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRNAND:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // nand(x, x) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Set)
+ // nand(1, y) -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Set)
+ // nand(x, 1) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Unset || Op2Unset)
+ // nand(x, 0) = nand(0, y) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Not)
+ // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // nand(x, ~y) = ~x | y = orc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1).
+ getOperand(0),
+ MachineNode->getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CROR:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // x | x = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Set || Op2Set)
+ // x | 1 = 1 | y = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Unset)
+ // 0 | y = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Unset)
+ // x | 0 = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // ~x | y = orc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0).
+ getOperand(0));
+ else if (Op2Not)
+ // x | ~y = orc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRXOR:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // xor(x, x) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // xor(1, y) -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Set)
+ // xor(x, 1) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Unset)
+ // xor(0, y) = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Unset)
+ // xor(x, 0) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // xor(~x, y) = eqv(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // xor(x, ~y) = eqv(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRNOR:
+ if (Op1Set || Op2Set)
+ // nor(1, y) -> 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Unset)
+ // nor(0, y) = ~y -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Unset)
+ // nor(x, 0) = ~x
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Not)
+ // nor(~x, y) = andc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // nor(x, ~y) = andc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1).
+ getOperand(0),
+ MachineNode->getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CREQV:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // eqv(x, x) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // eqv(1, y) = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Set)
+ // eqv(x, 1) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset)
+ // eqv(0, y) = ~y -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Unset)
+ // eqv(x, 0) = ~x
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Not)
+ // eqv(~x, y) = xor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // eqv(x, ~y) = xor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRANDC:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // andc(x, x) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // andc(1, y) = ~y
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op1Unset || Op2Set)
+ // andc(0, y) = andc(x, 1) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op2Unset)
+ // andc(x, 0) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // andc(~x, y) = ~(x | y) = nor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // andc(x, ~y) = x & y
+ ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0)),
+ SelectSwap = true;
+ break;
+ case PPC::CRORC:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // orc(x, x) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set || Op2Unset)
+ // orc(1, y) = orc(x, 0) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op2Set)
+ // orc(x, 1) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset)
+ // orc(0, y) = ~y
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op1Not)
+ // orc(~x, y) = ~(x & y) = nand(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // orc(x, ~y) = x | y
+ ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0)),
+ SelectSwap = true;
+ break;
+ case PPC::SELECT_I4:
+ case PPC::SELECT_I8:
+ case PPC::SELECT_F4:
+ case PPC::SELECT_F8:
+ case PPC::SELECT_QFRC:
+ case PPC::SELECT_QSRC:
+ case PPC::SELECT_QBRC:
+ case PPC::SELECT_VRRC:
+ case PPC::SELECT_VSFRC:
+ case PPC::SELECT_VSSRC:
+ case PPC::SELECT_VSRC:
+ if (Op1Set)
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op1Unset)
+ ResNode = MachineNode->getOperand(2).getNode();
+ else if (Op1Not)
+ ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
+ SDLoc(MachineNode),
+ MachineNode->getValueType(0),
+ MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(2),
+ MachineNode->getOperand(1));
+ break;
+ case PPC::BC:
+ case PPC::BCn:
+ if (Op1Not)
+ ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
+ PPC::BC,
+ SDLoc(MachineNode),
+ MVT::Other,
+ MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1),
+ MachineNode->getOperand(2));
+ // FIXME: Handle Op1Set, Op1Unset here too.
+ break;
+ }
+
+ // If we're inverting this node because it is used only by selects that
+ // we'd like to swap, then swap the selects before the node replacement.
+ if (SelectSwap)
+ SwapAllSelectUsers(MachineNode);
+
+ if (ResNode != MachineNode) {
+ DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
+ DEBUG(MachineNode->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ ReplaceUses(MachineNode, ResNode);
+ IsModified = true;
+ }
+ }
+ if (IsModified)
+ CurDAG->RemoveDeadNodes();
+ } while (IsModified);
+}
+
+// Gather the set of 32-bit operations that are known to have their
+// higher-order 32 bits zero, where ToPromote contains all such operations.
+static bool PeepholePPC64ZExtGather(SDValue Op32,
+ SmallPtrSetImpl<SDNode *> &ToPromote) {
+ if (!Op32.isMachineOpcode())
+ return false;
+
+ // First, check for the "frontier" instructions (those that will clear the
+ // higher-order 32 bits.
+
+ // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
+ // around. If it does not, then these instructions will clear the
+ // higher-order bits.
+ if ((Op32.getMachineOpcode() == PPC::RLWINM ||
+ Op32.getMachineOpcode() == PPC::RLWNM) &&
+ Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
+ // SLW and SRW always clear the higher-order bits.
+ if (Op32.getMachineOpcode() == PPC::SLW ||
+ Op32.getMachineOpcode() == PPC::SRW) {
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
+ // For LI and LIS, we need the immediate to be positive (so that it is not
+ // sign extended).
+ if (Op32.getMachineOpcode() == PPC::LI ||
+ Op32.getMachineOpcode() == PPC::LIS) {
+ if (!isUInt<15>(Op32.getConstantOperandVal(0)))
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
+ // LHBRX and LWBRX always clear the higher-order bits.
+ if (Op32.getMachineOpcode() == PPC::LHBRX ||
+ Op32.getMachineOpcode() == PPC::LWBRX) {
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
+ // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
+ if (Op32.getMachineOpcode() == PPC::CNTLZW) {
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
+ // Next, check for those instructions we can look through.
+
+ // Assuming the mask does not wrap around, then the higher-order bits are
+ // taken directly from the first operand.
+ if (Op32.getMachineOpcode() == PPC::RLWIMI &&
+ Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
+ SmallPtrSet<SDNode *, 16> ToPromote1;
+ if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+ ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
+ return true;
+ }
+
+ // For OR, the higher-order bits are zero if that is true for both operands.
+ // For SELECT_I4, the same is true (but the relevant operand numbers are
+ // shifted by 1).
+ if (Op32.getMachineOpcode() == PPC::OR ||
+ Op32.getMachineOpcode() == PPC::SELECT_I4) {
+ unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
+ SmallPtrSet<SDNode *, 16> ToPromote1;
+ if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
+ return false;
+ if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+ ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
+ return true;
+ }
+
+ // For ORI and ORIS, we need the higher-order bits of the first operand to be
+ // zero, and also for the constant to be positive (so that it is not sign
+ // extended).
+ if (Op32.getMachineOpcode() == PPC::ORI ||
+ Op32.getMachineOpcode() == PPC::ORIS) {
+ SmallPtrSet<SDNode *, 16> ToPromote1;
+ if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
+ return false;
+ if (!isUInt<15>(Op32.getConstantOperandVal(1)))
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+ ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
+ return true;
+ }
+
+ // The higher-order bits of AND are zero if that is true for at least one of
+ // the operands.
+ if (Op32.getMachineOpcode() == PPC::AND) {
+ SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
+ bool Op0OK =
+ PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
+ bool Op1OK =
+ PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
+ if (!Op0OK && !Op1OK)
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+
+ if (Op0OK)
+ ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
+
+ if (Op1OK)
+ ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
+
+ return true;
+ }
+
+ // For ANDI and ANDIS, the higher-order bits are zero if either that is true
+ // of the first operand, or if the second operand is positive (so that it is
+ // not sign extended).
+ if (Op32.getMachineOpcode() == PPC::ANDIo ||
+ Op32.getMachineOpcode() == PPC::ANDISo) {
+ SmallPtrSet<SDNode *, 16> ToPromote1;
+ bool Op0OK =
+ PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
+ bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
+ if (!Op0OK && !Op1OK)
+ return false;
+
+ ToPromote.insert(Op32.getNode());
+
+ if (Op0OK)
+ ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
+
+ return true;
+ }
+
+ return false;
+}
+
+void PPCDAGToDAGISel::PeepholePPC64ZExt() {
+ if (!PPCSubTarget->isPPC64())
+ return;
+
+ // When we zero-extend from i32 to i64, we use a pattern like this:
+ // def : Pat<(i64 (zext i32:$in)),
+ // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
+ // 0, 32)>;
+ // There are several 32-bit shift/rotate instructions, however, that will
+ // clear the higher-order bits of their output, rendering the RLDICL
+ // unnecessary. When that happens, we remove it here, and redefine the
+ // relevant 32-bit operation to be a 64-bit operation.
+
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ bool MadeChange = false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ if (N->getMachineOpcode() != PPC::RLDICL)
+ continue;
+
+ if (N->getConstantOperandVal(1) != 0 ||
+ N->getConstantOperandVal(2) != 32)
+ continue;
+
+ SDValue ISR = N->getOperand(0);
+ if (!ISR.isMachineOpcode() ||
+ ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
+ continue;
+
+ if (!ISR.hasOneUse())
+ continue;
+
+ if (ISR.getConstantOperandVal(2) != PPC::sub_32)
+ continue;
+
+ SDValue IDef = ISR.getOperand(0);
+ if (!IDef.isMachineOpcode() ||
+ IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
+ continue;
+
+ // We now know that we're looking at a canonical i32 -> i64 zext. See if we
+ // can get rid of it.
+
+ SDValue Op32 = ISR->getOperand(1);
+ if (!Op32.isMachineOpcode())
+ continue;
+
+ // There are some 32-bit instructions that always clear the high-order 32
+ // bits, there are also some instructions (like AND) that we can look
+ // through.
+ SmallPtrSet<SDNode *, 16> ToPromote;
+ if (!PeepholePPC64ZExtGather(Op32, ToPromote))
+ continue;
+
+ // If the ToPromote set contains nodes that have uses outside of the set
+ // (except for the original INSERT_SUBREG), then abort the transformation.
+ bool OutsideUse = false;
+ for (SDNode *PN : ToPromote) {
+ for (SDNode *UN : PN->uses()) {
+ if (!ToPromote.count(UN) && UN != ISR.getNode()) {
+ OutsideUse = true;
+ break;
+ }
+ }
+
+ if (OutsideUse)
+ break;
+ }
+ if (OutsideUse)
+ continue;
+
+ MadeChange = true;
+
+ // We now know that this zero extension can be removed by promoting to
+ // nodes in ToPromote to 64-bit operations, where for operations in the
+ // frontier of the set, we need to insert INSERT_SUBREGs for their
+ // operands.
+ for (SDNode *PN : ToPromote) {
+ unsigned NewOpcode;
+ switch (PN->getMachineOpcode()) {
+ default:
+ llvm_unreachable("Don't know the 64-bit variant of this instruction");
+ case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
+ case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
+ case PPC::SLW: NewOpcode = PPC::SLW8; break;
+ case PPC::SRW: NewOpcode = PPC::SRW8; break;
+ case PPC::LI: NewOpcode = PPC::LI8; break;
+ case PPC::LIS: NewOpcode = PPC::LIS8; break;
+ case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
+ case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
+ case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
+ case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
+ case PPC::OR: NewOpcode = PPC::OR8; break;
+ case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
+ case PPC::ORI: NewOpcode = PPC::ORI8; break;
+ case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
+ case PPC::AND: NewOpcode = PPC::AND8; break;
+ case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break;
+ case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break;
+ }
+
+ // Note: During the replacement process, the nodes will be in an
+ // inconsistent state (some instructions will have operands with values
+ // of the wrong type). Once done, however, everything should be right
+ // again.
+
+ SmallVector<SDValue, 4> Ops;
+ for (const SDValue &V : PN->ops()) {
+ if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
+ !isa<ConstantSDNode>(V)) {
+ SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
+ SDNode *ReplOp =
+ CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
+ ISR.getNode()->getVTList(), ReplOpOps);
+ Ops.push_back(SDValue(ReplOp, 0));
+ } else {
+ Ops.push_back(V);
+ }
+ }
+
+ // Because all to-be-promoted nodes only have users that are other
+ // promoted nodes (or the original INSERT_SUBREG), we can safely replace
+ // the i32 result value type with i64.
+
+ SmallVector<EVT, 2> NewVTs;
+ SDVTList VTs = PN->getVTList();
+ for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
+ if (VTs.VTs[i] == MVT::i32)
+ NewVTs.push_back(MVT::i64);
+ else
+ NewVTs.push_back(VTs.VTs[i]);
+
+ DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
+ DEBUG(PN->dump(CurDAG));
+
+ CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
+
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(PN->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+ }
+
+ // Now we replace the original zero extend and its associated INSERT_SUBREG
+ // with the value feeding the INSERT_SUBREG (which has now been promoted to
+ // return an i64).
+
+ DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(Op32.getNode()->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ ReplaceUses(N, Op32.getNode());
+ }
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
+}
+
+void PPCDAGToDAGISel::PeepholePPC64() {
+ // These optimizations are currently supported only for 64-bit SVR4.
+ if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
+ return;
+
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ unsigned FirstOp;
+ unsigned StorageOpcode = N->getMachineOpcode();
+
+ switch (StorageOpcode) {
+ default: continue;
+
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LD:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWA:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ FirstOp = 0;
+ break;
+
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STD:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ FirstOp = 1;
+ break;
+ }
+
+ // If this is a load or store with a zero offset, or within the alignment,
+ // we may be able to fold an add-immediate into the memory operation.
+ // The check against alignment is below, as it can't occur until we check
+ // the arguments to N
+ if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
+ continue;
+
+ SDValue Base = N->getOperand(FirstOp + 1);
+ if (!Base.isMachineOpcode())
+ continue;
+
+ // On targets with fusion, we don't want this to fire and remove a fusion
+ // opportunity, unless a) it results in another fusion opportunity or
+ // b) optimizing for size.
+ if (PPCSubTarget->hasFusion() &&
+ (!MF->getFunction()->optForSize() && !Base.hasOneUse()))
+ continue;
+
+ unsigned Flags = 0;
+ bool ReplaceFlags = true;
+
+ // When the feeding operation is an add-immediate of some sort,
+ // determine whether we need to add relocation information to the
+ // target flags on the immediate operand when we fold it into the
+ // load instruction.
+ //
+ // For something like ADDItocL, the relocation information is
+ // inferred from the opcode; when we process it in the AsmPrinter,
+ // we add the necessary relocation there. A load, though, can receive
+ // relocation from various flavors of ADDIxxx, so we need to carry
+ // the relocation information in the target flags.
+ switch (Base.getMachineOpcode()) {
+ default: continue;
+
+ case PPC::ADDI8:
+ case PPC::ADDI:
+ // In some cases (such as TLS) the relocation information
+ // is already in place on the operand, so copying the operand
+ // is sufficient.
+ ReplaceFlags = false;
+ // For these cases, the immediate may not be divisible by 4, in
+ // which case the fold is illegal for DS-form instructions. (The
+ // other cases provide aligned addresses and are always safe.)
+ if ((StorageOpcode == PPC::LWA ||
+ StorageOpcode == PPC::LD ||
+ StorageOpcode == PPC::STD) &&
+ (!isa<ConstantSDNode>(Base.getOperand(1)) ||
+ Base.getConstantOperandVal(1) % 4 != 0))
+ continue;
+ break;
+ case PPC::ADDIdtprelL:
+ Flags = PPCII::MO_DTPREL_LO;
+ break;
+ case PPC::ADDItlsldL:
+ Flags = PPCII::MO_TLSLD_LO;
+ break;
+ case PPC::ADDItocL:
+ Flags = PPCII::MO_TOC_LO;
+ break;
+ }
+
+ SDValue ImmOpnd = Base.getOperand(1);
+ int MaxDisplacement = 0;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ const GlobalValue *GV = GA->getGlobal();
+ MaxDisplacement = GV->getAlignment() - 1;
+ }
+
+ int Offset = N->getConstantOperandVal(FirstOp);
+ if (Offset < 0 || Offset > MaxDisplacement)
+ continue;
+
+ // We found an opportunity. Reverse the operands from the add
+ // immediate and substitute them into the load or store. If
+ // needed, update the target flags for the immediate operand to
+ // reflect the necessary relocation information.
+ DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ DEBUG(Base->dump(CurDAG));
+ DEBUG(dbgs() << "\nN: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ // If the relocation information isn't already present on the
+ // immediate operand, add it now.
+ if (ReplaceFlags) {
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ SDLoc dl(GA);
+ const GlobalValue *GV = GA->getGlobal();
+ // We can't perform this optimization for data whose alignment
+ // is insufficient for the instruction encoding.
+ if (GV->getAlignment() < 4 &&
+ (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
+ StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
+ DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
+ continue;
+ }
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
+ } else if (ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
+ const Constant *C = CP->getConstVal();
+ ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
+ CP->getAlignment(),
+ Offset, Flags);
+ }
+ }
+
+ if (FirstOp == 1) // Store
+ (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+ Base.getOperand(0), N->getOperand(3));
+ else // Load
+ (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
+ N->getOperand(2));
+
+ // The add-immediate may now be dead, in which case remove it.
+ if (Base.getNode()->use_empty())
+ CurDAG->RemoveDeadNode(Base.getNode());
+ }
+}
+
+
+/// createPPCISelDag - This pass converts a legalized DAG into a
+/// PowerPC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
+ return new PPCDAGToDAGISel(TM);
+}
+
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
+ PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID,
+ nullptr, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
new file mode 100644
index 0000000..af9ad07
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -0,0 +1,11590 @@
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCISelLowering.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCCallingConv.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPerfectShuffle.h"
+#include "PPCTargetMachine.h"
+#include "PPCTargetObjectFile.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
+cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
+
+static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
+cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
+
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
+// FIXME: Remove this once the bug has been fixed!
+extern cl::opt<bool> ANDIGlueBug;
+
+PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
+ const PPCSubtarget &STI)
+ : TargetLowering(TM), Subtarget(STI) {
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
+ // arguments are at least 4/8 bytes aligned.
+ bool isPPC64 = Subtarget.isPPC64();
+ setMinStackArgumentAlignment(isPPC64 ? 8:4);
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
+ if (!Subtarget.useSoftFloat()) {
+ addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+ }
+
+ // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
+ }
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // PowerPC has pre-inc load and store's.
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
+
+ if (Subtarget.useCRBits()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (isPPC64 || Subtarget.hasFPCVT()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
+ }
+
+ // PowerPC does not support direct load / store of condition registers
+ setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::STORE, MVT::i1, Custom);
+
+ // FIXME: Remove this once the ANDI glue bug is fixed:
+ if (ANDIGlueBug)
+ setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
+
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setTruncStoreAction(VT, MVT::i1, Expand);
+ }
+
+ addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
+ }
+
+ // This is used in the ppcf128->int sequence. Note it has different semantics
+ // from FP_ROUND: that rounds to nearest, this rounds to zero.
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+
+ // We do not currently implement these libm ops for PowerPC.
+ setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
+
+ // PowerPC has no SREM/UREM instructions
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/fmod/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Legal);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Legal);
+
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+
+ // If we're enabling GP optimizations, use hardware square root
+ if (!Subtarget.hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
+ Subtarget.hasFRE()))
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+
+ if (!Subtarget.hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
+ Subtarget.hasFRES()))
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ if (Subtarget.hasFCPSGN()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
+ } else {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ }
+
+ if (Subtarget.hasFPRND()) {
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
+ }
+
+ // PowerPC does not have BSWAP, CTPOP or CTTZ
+ setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+
+ if (Subtarget.hasPOPCNTD()) {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
+ } else {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ }
+
+ // PowerPC does not have ROTR
+ setOperationAction(ISD::ROTR, MVT::i32 , Expand);
+ setOperationAction(ISD::ROTR, MVT::i64 , Expand);
+
+ if (!Subtarget.useCRBits()) {
+ // PowerPC does not have Select
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ }
+
+ // PowerPC wants to turn select_cc of FP into fsel when possible.
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // PowerPC wants to optimize integer setcc a bit
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+ // PowerPC does not have BRCOND which requires SetCC
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ // PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ if (Subtarget.hasDirectMove()) {
+ setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+ } else {
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+ }
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+ // TRAP is legal.
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // TRAMPOLINE is custom lowered.
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ if (Subtarget.isSVR4ABI()) {
+ if (isPPC64) {
+ // VAARG always uses double-word chunks, so promote anything smaller.
+ setOperationAction(ISD::VAARG, MVT::i1, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i8, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i16, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i32, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ } else {
+ // VAARG is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i64, Custom);
+ }
+ } else
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ if (Subtarget.isSVR4ABI() && !isPPC64)
+ // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // To handle counter-based loop conditions.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+
+ // Comparisons that require checking two conditions.
+ setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+
+ if (Subtarget.has64BitSupport()) {
+ // They also have instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ // This is just the low 32 bits of a (signed) fp->i64 conversion.
+ // We cannot do this with Promote because i64 is not a legal type.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+
+ if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ } else {
+ // PowerPC does not have FP_TO_UINT on 32-bit implementations.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ }
+
+ // With the instructions enabled under FPCVT, we can do everything.
+ if (Subtarget.hasFPCVT()) {
+ if (Subtarget.has64BitSupport()) {
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ }
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ }
+
+ if (Subtarget.use64BitRegs()) {
+ // 64-bit PowerPC implementations can support i64 types directly
+ addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ // 64-bit PowerPC wants to expand i128 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+ } else {
+ // 32-bit PowerPC wants to expand i64 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ }
+
+ if (Subtarget.hasAltivec()) {
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ for (MVT VT : MVT::vector_valuetypes()) {
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+
+ // Vector instructions introduced in P8
+ if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
+ setOperationAction(ISD::CTPOP, VT, Legal);
+ setOperationAction(ISD::CTLZ, VT, Legal);
+ }
+ else {
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ // We promote all shuffles to v16i8.
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
+ AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
+
+ // We promote all non-typed operations to v4i32.
+ setOperationAction(ISD::AND , VT, Promote);
+ AddPromotedToType (ISD::AND , VT, MVT::v4i32);
+ setOperationAction(ISD::OR , VT, Promote);
+ AddPromotedToType (ISD::OR , VT, MVT::v4i32);
+ setOperationAction(ISD::XOR , VT, Promote);
+ AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
+ setOperationAction(ISD::LOAD , VT, Promote);
+ AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+ setOperationAction(ISD::SELECT_CC, VT, Promote);
+ AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
+
+ // No other operations are legal.
+ setOperationAction(ISD::MUL , VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
+ setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ }
+ }
+
+ // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
+ // with merges, splats, etc.
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::AND , MVT::v4i32, Legal);
+ setOperationAction(ISD::OR , MVT::v4i32, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i32, Legal);
+ setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::v4i32,
+ Subtarget.useCRBits() ? Legal : Expand);
+ setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+
+ addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
+
+ setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+
+ if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ }
+
+ if (Subtarget.hasP8Altivec())
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ else
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+ // Altivec does not contain unordered floating-point compare instructions
+ setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
+
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+ if (Subtarget.hasP8Vector()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
+ }
+ if (Subtarget.hasDirectMove()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+ }
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::MUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
+
+ // Share the Altivec comparison restrictions.
+ setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
+
+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STORE, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
+
+ if (Subtarget.hasP8Vector())
+ addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
+
+ addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
+
+ addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
+ addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
+ addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
+
+ if (Subtarget.hasP8Altivec()) {
+ setOperationAction(ISD::SHL, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRA, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRL, MVT::v2i64, Legal);
+
+ setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
+ }
+ else {
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+ // VSX v2i64 only supports non-arithmetic operations.
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ }
+
+ setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
+ setOperationAction(ISD::STORE, MVT::v2i64, Promote);
+ AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+
+ // Vector operation legalization checks the result type of
+ // SIGN_EXTEND_INREG, overall legalization checks the inner type.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+
+ addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
+ }
+
+ if (Subtarget.hasP8Altivec()) {
+ addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
+ }
+ }
+
+ if (Subtarget.hasQPX()) {
+ setOperationAction(ISD::FADD, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FREM, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
+ setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::LOAD , MVT::v4f64, Custom);
+ setOperationAction(ISD::STORE , MVT::v4f64, Custom);
+
+ setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
+ setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
+ setOperationAction(ISD::FABS , MVT::v4f64, Legal);
+ setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
+ setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
+ setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
+
+ setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
+
+ addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FREM, MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
+
+ setOperationAction(ISD::LOAD , MVT::v4f32, Custom);
+ setOperationAction(ISD::STORE , MVT::v4f32, Custom);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
+ setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
+ setOperationAction(ISD::FABS , MVT::v4f32, Legal);
+ setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+
+ setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
+
+ addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
+
+ setOperationAction(ISD::AND , MVT::v4i1, Legal);
+ setOperationAction(ISD::OR , MVT::v4i1, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i1, Legal);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
+
+ setOperationAction(ISD::LOAD , MVT::v4i1, Custom);
+ setOperationAction(ISD::STORE , MVT::v4i1, Custom);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
+
+ addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
+
+ // These need to set FE_INEXACT, and so cannot be vectorized here.
+ setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ } else {
+ setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ }
+ }
+
+ if (Subtarget.has64BitSupport())
+ setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
+
+ if (!isPPC64) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+ }
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ if (Subtarget.hasAltivec()) {
+ // Altivec instructions set fields to all zeros or all ones.
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+ }
+
+ if (!isPPC64) {
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
+ }
+
+ setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ if (Subtarget.hasFPCVT())
+ setTargetDAGCombine(ISD::UINT_TO_FP);
+ setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::BR_CC);
+ if (Subtarget.useCRBits())
+ setTargetDAGCombine(ISD::BRCOND);
+ setTargetDAGCombine(ISD::BSWAP);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+
+ if (Subtarget.useCRBits()) {
+ setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::SELECT_CC);
+ }
+
+ // Use reciprocal estimates.
+ if (TM.Options.UnsafeFPMath) {
+ setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine(ISD::FSQRT);
+ }
+
+ // Darwin long double math library functions have $LDBL128 appended.
+ if (Subtarget.isDarwin()) {
+ setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
+ setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
+ setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
+ setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
+ setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
+ setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
+ setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
+ setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
+ setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
+ setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
+ }
+
+ // With 32 condition bits, we don't need to sink (and duplicate) compares
+ // aggressively in CodeGenPrep.
+ if (Subtarget.useCRBits()) {
+ setHasMultipleConditionRegisters();
+ setJumpIsExpensive();
+ }
+
+ setMinFunctionAlignment(2);
+ if (Subtarget.isDarwin())
+ setPrefFunctionAlignment(4);
+
+ switch (Subtarget.getDarwinDirective()) {
+ default: break;
+ case PPC::DIR_970:
+ case PPC::DIR_A2:
+ case PPC::DIR_E500mc:
+ case PPC::DIR_E5500:
+ case PPC::DIR_PWR4:
+ case PPC::DIR_PWR5:
+ case PPC::DIR_PWR5X:
+ case PPC::DIR_PWR6:
+ case PPC::DIR_PWR6X:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
+ setPrefFunctionAlignment(4);
+ setPrefLoopAlignment(4);
+ break;
+ }
+
+ setInsertFencesForAtomic(true);
+
+ if (Subtarget.enableMachineScheduler())
+ setSchedulingPreference(Sched::Source);
+ else
+ setSchedulingPreference(Sched::Hybrid);
+
+ computeRegisterProperties(STI.getRegisterInfo());
+
+ // The Freescale cores do better with aggressive inlining of memcpy and
+ // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
+ if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
+ Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
+ MaxStoresPerMemset = 32;
+ MaxStoresPerMemsetOptSize = 16;
+ MaxStoresPerMemcpy = 32;
+ MaxStoresPerMemcpyOptSize = 8;
+ MaxStoresPerMemmove = 32;
+ MaxStoresPerMemmoveOptSize = 8;
+ } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+ // The A2 also benefits from (very) aggressive inlining of memcpy and
+ // friends. The overhead of a the function call, even when warm, can be
+ // over one hundred cycles.
+ MaxStoresPerMemset = 128;
+ MaxStoresPerMemcpy = 128;
+ MaxStoresPerMemmove = 128;
+ }
+}
+
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
+ unsigned MaxMaxAlign) {
+ if (MaxAlign == MaxMaxAlign)
+ return;
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
+ MaxAlign = 32;
+ else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
+ MaxAlign = 16;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (auto *EltTy : STy->elements()) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == MaxMaxAlign)
+ break;
+ }
+ }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.
+unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const {
+ // Darwin passes everything on 4 byte boundary.
+ if (Subtarget.isDarwin())
+ return 4;
+
+ // 16byte and wider vectors are passed on 16byte boundary.
+ // The rest is 8 on PPC64 and 4 on PPC32 boundary.
+ unsigned Align = Subtarget.isPPC64() ? 8 : 4;
+ if (Subtarget.hasAltivec() || Subtarget.hasQPX())
+ getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
+ return Align;
+}
+
+bool PPCTargetLowering::useSoftFloat() const {
+ return Subtarget.useSoftFloat();
+}
+
+const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch ((PPCISD::NodeType)Opcode) {
+ case PPCISD::FIRST_NUMBER: break;
+ case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::FCFID: return "PPCISD::FCFID";
+ case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
+ case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
+ case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
+ case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
+ case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
+ case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
+ case PPCISD::FRE: return "PPCISD::FRE";
+ case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
+ case PPCISD::STFIWX: return "PPCISD::STFIWX";
+ case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
+ case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
+ case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::CMPB: return "PPCISD::CMPB";
+ case PPCISD::Hi: return "PPCISD::Hi";
+ case PPCISD::Lo: return "PPCISD::Lo";
+ case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
+ case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
+ case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
+ case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
+ case PPCISD::SRL: return "PPCISD::SRL";
+ case PPCISD::SRA: return "PPCISD::SRA";
+ case PPCISD::SHL: return "PPCISD::SHL";
+ case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
+ case PPCISD::CALL: return "PPCISD::CALL";
+ case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
+ case PPCISD::MTCTR: return "PPCISD::MTCTR";
+ case PPCISD::BCTRL: return "PPCISD::BCTRL";
+ case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
+ case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
+ case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
+ case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
+ case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
+ case PPCISD::MFVSR: return "PPCISD::MFVSR";
+ case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
+ case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
+ case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
+ case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
+ case PPCISD::VCMP: return "PPCISD::VCMP";
+ case PPCISD::VCMPo: return "PPCISD::VCMPo";
+ case PPCISD::LBRX: return "PPCISD::LBRX";
+ case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
+ case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
+ case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
+ case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
+ case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
+ case PPCISD::BDNZ: return "PPCISD::BDNZ";
+ case PPCISD::BDZ: return "PPCISD::BDZ";
+ case PPCISD::MFFS: return "PPCISD::MFFS";
+ case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
+ case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ case PPCISD::CR6SET: return "PPCISD::CR6SET";
+ case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
+ case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
+ case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
+ case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
+ case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
+ case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
+ case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
+ case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
+ case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
+ case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
+ case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
+ case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
+ case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
+ case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
+ case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
+ case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
+ case PPCISD::SC: return "PPCISD::SC";
+ case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
+ case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
+ case PPCISD::RFEBB: return "PPCISD::RFEBB";
+ case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
+ case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
+ case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
+ case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
+ case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
+ case PPCISD::QBFLT: return "PPCISD::QBFLT";
+ case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
+ }
+ return nullptr;
+}
+
+EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
+ EVT VT) const {
+ if (!VT.isVector())
+ return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
+
+ if (Subtarget.hasQPX())
+ return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
+
+ return VT.changeVectorElementTypeToInteger();
+}
+
+bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
+ assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Node matching predicates, for use by the tblgen matching code.
+//===----------------------------------------------------------------------===//
+
+/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isZero();
+ }
+ return false;
+}
+
+/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if it matches the specified value.
+static bool isConstantOrUndef(int Op, int Val) {
+ return Op < 0 || Op == Val;
+}
+
+/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUHUM instruction.
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operations with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ if (ShuffleKind == 0) {
+ if (IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
+ return false;
+ } else if (ShuffleKind == 2) {
+ if (!IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = IsLE ? 0 : 1;
+ for (unsigned i = 0; i != 8; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
+ return false;
+ }
+ return true;
+}
+
+/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUWUM instruction.
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operations with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ if (ShuffleKind == 0) {
+ if (IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
+ return false;
+ } else if (ShuffleKind == 2) {
+ if (!IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = IsLE ? 0 : 2;
+ for (unsigned i = 0; i != 8; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
+ return false;
+ }
+ return true;
+}
+
+/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
+/// current subtarget.
+///
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operations with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
+ const PPCSubtarget& Subtarget =
+ static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+ if (!Subtarget.hasP8Vector())
+ return false;
+
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ if (ShuffleKind == 0) {
+ if (IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; i += 4)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
+ !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
+ !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
+ return false;
+ } else if (ShuffleKind == 2) {
+ if (!IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; i += 4)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = IsLE ? 0 : 4;
+ for (unsigned i = 0; i != 8; i += 4)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
+ return false;
+ }
+ return true;
+}
+
+/// isVMerge - Common function, used to match vmrg* shuffles.
+///
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned LHSStart, unsigned RHSStart) {
+ if (N->getValueType(0) != MVT::v16i8)
+ return false;
+ assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
+ "Unsupported merge size!");
+
+ for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
+ for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
+ if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
+ LHSStart+j+i*UnitSize) ||
+ !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
+ RHSStart+j+i*UnitSize))
+ return false;
+ }
+ return true;
+}
+
+/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2). For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getDataLayout().isLittleEndian()) {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 0, 0);
+ else if (ShuffleKind == 2) // swapped
+ return isVMerge(N, UnitSize, 0, 16);
+ else
+ return false;
+ } else {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 8, 8);
+ else if (ShuffleKind == 0) // normal
+ return isVMerge(N, UnitSize, 8, 24);
+ else
+ return false;
+ }
+}
+
+/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2). For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getDataLayout().isLittleEndian()) {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 8, 8);
+ else if (ShuffleKind == 2) // swapped
+ return isVMerge(N, UnitSize, 8, 24);
+ else
+ return false;
+ } else {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 0, 0);
+ else if (ShuffleKind == 0) // normal
+ return isVMerge(N, UnitSize, 0, 16);
+ else
+ return false;
+ }
+}
+
+/**
+ * \brief Common function used to match vmrgew and vmrgow shuffles
+ *
+ * The indexOffset determines whether to look for even or odd words in
+ * the shuffle mask. This is based on the of the endianness of the target
+ * machine.
+ * - Little Endian:
+ * - Use offset of 0 to check for odd elements
+ * - Use offset of 4 to check for even elements
+ * - Big Endian:
+ * - Use offset of 0 to check for even elements
+ * - Use offset of 4 to check for odd elements
+ * A detailed description of the vector element ordering for little endian and
+ * big endian can be found at
+ * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
+ * Targeting your applications - what little endian and big endian IBM XL C/C++
+ * compiler differences mean to you
+ *
+ * The mask to the shuffle vector instruction specifies the indices of the
+ * elements from the two input vectors to place in the result. The elements are
+ * numbered in array-access order, starting with the first vector. These vectors
+ * are always of type v16i8, thus each vector will contain 16 elements of size
+ * 8. More info on the shuffle vector can be found in the
+ * http://llvm.org/docs/LangRef.html#shufflevector-instruction
+ * Language Reference.
+ *
+ * The RHSStartValue indicates whether the same input vectors are used (unary)
+ * or two different input vectors are used, based on the following:
+ * - If the instruction uses the same vector for both inputs, the range of the
+ * indices will be 0 to 15. In this case, the RHSStart value passed should
+ * be 0.
+ * - If the instruction has two different vectors then the range of the
+ * indices will be 0 to 31. In this case, the RHSStart value passed should
+ * be 16 (indices 0-15 specify elements in the first vector while indices 16
+ * to 31 specify elements in the second vector).
+ *
+ * \param[in] N The shuffle vector SD Node to analyze
+ * \param[in] IndexOffset Specifies whether to look for even or odd elements
+ * \param[in] RHSStartValue Specifies the starting index for the righthand input
+ * vector to the shuffle_vector instruction
+ * \return true iff this shuffle vector represents an even or odd word merge
+ */
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
+ unsigned RHSStartValue) {
+ if (N->getValueType(0) != MVT::v16i8)
+ return false;
+
+ for (unsigned i = 0; i < 2; ++i)
+ for (unsigned j = 0; j < 4; ++j)
+ if (!isConstantOrUndef(N->getMaskElt(i*4+j),
+ i*RHSStartValue+j+IndexOffset) ||
+ !isConstantOrUndef(N->getMaskElt(i*4+j+8),
+ i*RHSStartValue+j+IndexOffset+8))
+ return false;
+ return true;
+}
+
+/**
+ * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
+ * vmrgow instructions.
+ *
+ * \param[in] N The shuffle vector SD Node to analyze
+ * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
+ * \param[in] ShuffleKind Identify the type of merge:
+ * - 0 = big-endian merge with two different inputs;
+ * - 1 = either-endian merge with two identical inputs;
+ * - 2 = little-endian merge with two different inputs (inputs are swapped for
+ * little-endian merges).
+ * \param[in] DAG The current SelectionDAG
+ * \return true iff this shuffle mask
+ */
+bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getDataLayout().isLittleEndian()) {
+ unsigned indexOffset = CheckEven ? 4 : 0;
+ if (ShuffleKind == 1) // Unary
+ return isVMerge(N, indexOffset, 0);
+ else if (ShuffleKind == 2) // swapped
+ return isVMerge(N, indexOffset, 16);
+ else
+ return false;
+ }
+ else {
+ unsigned indexOffset = CheckEven ? 0 : 4;
+ if (ShuffleKind == 1) // Unary
+ return isVMerge(N, indexOffset, 0);
+ else if (ShuffleKind == 0) // Normal
+ return isVMerge(N, indexOffset, 16);
+ else
+ return false;
+ }
+ return false;
+}
+
+/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+/// amount, otherwise return -1.
+/// The ShuffleKind distinguishes between big-endian operations with two
+/// different inputs (0), either-endian operations with two identical inputs
+/// (1), and little-endian operations with two different inputs (2). For the
+/// latter, the input operands are swapped (see PPCInstrAltivec.td).
+int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
+ if (N->getValueType(0) != MVT::v16i8)
+ return -1;
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i;
+ for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
+ /*search*/;
+
+ if (i == 16) return -1; // all undef.
+
+ // Otherwise, check to see if the rest of the elements are consecutively
+ // numbered from this value.
+ unsigned ShiftAmt = SVOp->getMaskElt(i);
+ if (ShiftAmt < i) return -1;
+
+ ShiftAmt -= i;
+ bool isLE = DAG.getDataLayout().isLittleEndian();
+
+ if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else if (ShuffleKind == 1) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ } else
+ return -1;
+
+ if (isLE)
+ ShiftAmt = 16 - ShiftAmt;
+
+ return ShiftAmt;
+}
+
+/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of a single element that is suitable for input to
+/// VSPLTB/VSPLTH/VSPLTW.
+bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ (EltSize == 1 || EltSize == 2 || EltSize == 4));
+
+ // The consecutive indices need to specify an element, not part of two
+ // different elements. So abandon ship early if this isn't the case.
+ if (N->getMaskElt(0) % EltSize != 0)
+ return false;
+
+ // This is a splat operation if each element of the permute is the same, and
+ // if the value doesn't reference the second vector.
+ unsigned ElementBase = N->getMaskElt(0);
+
+ // FIXME: Handle UNDEF elements too!
+ if (ElementBase >= 16)
+ return false;
+
+ // Check that the indices are consecutive, in the case of a multi-byte element
+ // splatted with a v16i8 mask.
+ for (unsigned i = 1; i != EltSize; ++i)
+ if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
+ return false;
+
+ for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
+ if (N->getMaskElt(i) < 0) continue;
+ for (unsigned j = 0; j != EltSize; ++j)
+ if (N->getMaskElt(i+j) != N->getMaskElt(j))
+ return false;
+ }
+ return true;
+}
+
+/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
+ SelectionDAG &DAG) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ assert(isSplatShuffleMask(SVOp, EltSize));
+ if (DAG.getDataLayout().isLittleEndian())
+ return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
+ else
+ return SVOp->getMaskElt(0) / EltSize;
+}
+
+/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
+/// by using a vspltis[bhw] instruction of the specified element size, return
+/// the constant being splatted. The ByteSize field indicates the number of
+/// bytes of each element [124] -> [bhw].
+SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+ SDValue OpVal(nullptr, 0);
+
+ // If ByteSize of the splat is bigger than the element size of the
+ // build_vector, then we have a case where we are checking for a splat where
+ // multiple elements of the buildvector are folded together into a single
+ // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
+ unsigned EltSize = 16/N->getNumOperands();
+ if (EltSize < ByteSize) {
+ unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
+ SDValue UniquedVals[4];
+ assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
+
+ // See if all of the elements in the buildvector agree across.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ // If the element isn't a constant, bail fully out.
+ if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
+
+
+ if (!UniquedVals[i&(Multiple-1)].getNode())
+ UniquedVals[i&(Multiple-1)] = N->getOperand(i);
+ else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
+ return SDValue(); // no match.
+ }
+
+ // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
+ // either constant or undef values that are identical for each chunk. See
+ // if these chunks can form into a larger vspltis*.
+
+ // Check to see if all of the leading entries are either 0 or -1. If
+ // neither, then this won't fit into the immediate field.
+ bool LeadingZero = true;
+ bool LeadingOnes = true;
+ for (unsigned i = 0; i != Multiple-1; ++i) {
+ if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
+
+ LeadingZero &= isNullConstant(UniquedVals[i]);
+ LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
+ }
+ // Finally, check the least significant entry.
+ if (LeadingZero) {
+ if (!UniquedVals[Multiple-1].getNode())
+ return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
+ int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
+ if (Val < 16) // 0,0,0,4 -> vspltisw(4)
+ return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
+ }
+ if (LeadingOnes) {
+ if (!UniquedVals[Multiple-1].getNode())
+ return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
+ int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
+ if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
+ return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
+ }
+
+ return SDValue();
+ }
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (!OpVal.getNode())
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return SDValue();
+ }
+
+ if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
+
+ unsigned ValSizeInBytes = EltSize;
+ uint64_t Value = 0;
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ Value = CN->getZExtValue();
+ } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+ assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
+ Value = FloatToBits(CN->getValueAPF().convertToFloat());
+ }
+
+ // If the splat value is larger than the element value, then we can never do
+ // this splat. The only case that we could fit the replicated bits into our
+ // immediate field for would be zero, and we prefer to use vxor for it.
+ if (ValSizeInBytes < ByteSize) return SDValue();
+
+ // If the element value is larger than the splat value, check if it consists
+ // of a repeated bit pattern of size ByteSize.
+ if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
+ return SDValue();
+
+ // Properly sign extend the value.
+ int MaskVal = SignExtend32(Value, ByteSize * 8);
+
+ // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
+ if (MaskVal == 0) return SDValue();
+
+ // Finally, if this value fits in a 5 bit sext field, return it
+ if (SignExtend32<5>(MaskVal) == MaskVal)
+ return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
+ return SDValue();
+}
+
+/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isQVALIGNIShuffleMask(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
+ return -1;
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i;
+ for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
+ /*search*/;
+
+ if (i == 4) return -1; // all undef.
+
+ // Otherwise, check to see if the rest of the elements are consecutively
+ // numbered from this value.
+ unsigned ShiftAmt = SVOp->getMaskElt(i);
+ if (ShiftAmt < i) return -1;
+ ShiftAmt -= i;
+
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 4; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+
+ return ShiftAmt;
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing Mode Selection
+//===----------------------------------------------------------------------===//
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (!isa<ConstantSDNode>(N))
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ short imm = 0;
+ if (N.getOpcode() == ISD::ADD) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i
+ if (N.getOperand(1).getOpcode() == PPCISD::Lo)
+ return false; // r+i
+
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ } else if (N.getOpcode() == ISD::OR) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i can fold it if we can.
+
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are provably
+ // disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ APInt RHSKnownZero, RHSKnownOne;
+ DAG.computeKnownBits(N.getOperand(0),
+ LHSKnownZero, LHSKnownOne);
+
+ if (LHSKnownZero.getBoolValue()) {
+ DAG.computeKnownBits(N.getOperand(1),
+ RHSKnownZero, RHSKnownOne);
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ if (~(LHSKnownZero | RHSKnownZero) == 0) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+// If we happen to be doing an i64 load or store into a stack slot that has
+// less than a 4-byte alignment, then the frame-index elimination may need to
+// use an indexed load or store instruction (because the offset may not be a
+// multiple of 4). The extra register needed to hold the offset comes from the
+// register scavenger, and it is possible that the scavenger will need to use
+// an emergency spill slot. As a result, we need to make sure that a spill slot
+// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
+// stack slot.
+static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
+ // FIXME: This does not handle the LWA case.
+ if (VT != MVT::i64)
+ return;
+
+ // NOTE: We'll exclude negative FIs here, which come from argument
+ // lowering, because there are no known test cases triggering this problem
+ // using packed structures (or similar). We can remove this exclusion if
+ // we find such a test case. The reason why this is so test-case driven is
+ // because this entire 'fixup' is only to prevent crashes (from the
+ // register scavenger) on not-really-valid inputs. For example, if we have:
+ // %a = alloca i1
+ // %b = bitcast i1* %a to i64*
+ // store i64* a, i64 b
+ // then the store should really be marked as 'align 1', but is not. If it
+ // were marked as 'align 1' then the indexed form would have been
+ // instruction-selected initially, and the problem this 'fixup' is preventing
+ // won't happen regardless.
+ if (FrameIdx < 0)
+ return;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+ if (Align >= 4)
+ return;
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasNonRISpills();
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 16-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg. If Aligned is true, only accept displacements
+/// suitable for STD and friends, i.e. multiples of 4.
+bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG,
+ bool Aligned) const {
+ // FIXME dl should come from parent load or store, not from address
+ SDLoc dl(N);
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!Aligned || (imm & 3) == 0)) {
+ Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!Aligned || (imm & 3) == 0)) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
+
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+
+ // If this address fits entirely in a 16-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
+ Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ CN->getValueType(0));
+ return true;
+ }
+
+ // Handle 32-bit sext immediates with LIS + addr mode.
+ if ((CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
+ (!Aligned || (CN->getZExtValue() & 3) == 0)) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
+
+ Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
+ MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
+ return true;
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+/// represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ // Check to see if we can easily represent this as an [r+r] address. This
+ // will fail if it thinks that the address is more profitably represented as
+ // reg+imm, e.g. where imm = 0.
+ if (SelectAddressRegReg(N, Base, Index, DAG))
+ return true;
+
+ // If the operand is an addition, always emit this as [r+r], since this is
+ // better (for code size, and execution, as the memop does the add for free)
+ // than emitting an explicit add.
+ if (N.getOpcode() == ISD::ADD) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+
+ // Otherwise, do it the hard way, using R0 as the base register.
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ N.getValueType());
+ Index = N;
+ return true;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (DisablePPCPreinc) return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ unsigned Alignment;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ Alignment = LD->getAlignment();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ Alignment = ST->getAlignment();
+ isLoad = false;
+ } else
+ return false;
+
+ // PowerPC doesn't have preinc load/store instructions for vectors (except
+ // for QPX, which does have preinc r+r forms).
+ if (VT.isVector()) {
+ if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
+ return false;
+ } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
+ AM = ISD::PRE_INC;
+ return true;
+ }
+ }
+
+ if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
+
+ // Common code will reject creating a pre-inc form if the base pointer
+ // is a frame index, or if N is a store and the base pointer is either
+ // the same as or a predecessor of the value being stored. Check for
+ // those situations here, and try with swapped Base/Offset instead.
+ bool Swap = false;
+
+ if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
+ Swap = true;
+ else if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
+ Swap = true;
+ }
+
+ if (Swap)
+ std::swap(Base, Offset);
+
+ AM = ISD::PRE_INC;
+ return true;
+ }
+
+ // LDU/STU can only handle immediates that are a multiple of 4.
+ if (VT != MVT::i64) {
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
+ return false;
+ } else {
+ // LDU/STU need an address with at least 4-byte alignment.
+ if (Alignment < 4)
+ return false;
+
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
+ return false;
+ }
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
+ // sext i32 to i64 when addr mode is r+i.
+ if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
+ LD->getExtensionType() == ISD::SEXTLOAD &&
+ isa<ConstantSDNode>(Offset))
+ return false;
+ }
+
+ AM = ISD::PRE_INC;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// GetLabelAccessInfo - Return true if we should reference labels using a
+/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
+static bool GetLabelAccessInfo(const TargetMachine &TM,
+ const PPCSubtarget &Subtarget,
+ unsigned &HiOpFlags, unsigned &LoOpFlags,
+ const GlobalValue *GV = nullptr) {
+ HiOpFlags = PPCII::MO_HA;
+ LoOpFlags = PPCII::MO_LO;
+
+ // Don't use the pic base if not in PIC relocation model.
+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
+
+ if (isPIC) {
+ HiOpFlags |= PPCII::MO_PIC_FLAG;
+ LoOpFlags |= PPCII::MO_PIC_FLAG;
+ }
+
+ // If this is a reference to a global value that requires a non-lazy-ptr, make
+ // sure that instruction lowering adds it.
+ if (GV && Subtarget.hasLazyResolverStub(GV)) {
+ HiOpFlags |= PPCII::MO_NLP_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_FLAG;
+
+ if (GV->hasHiddenVisibility()) {
+ HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ }
+ }
+
+ return isPIC;
+}
+
+static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
+ SelectionDAG &DAG) {
+ SDLoc DL(HiPart);
+ EVT PtrVT = HiPart.getValueType();
+ SDValue Zero = DAG.getConstant(0, DL, PtrVT);
+
+ SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
+
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ if (isPIC)
+ Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
+
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+}
+
+static void setUsesTOCBasePtr(MachineFunction &MF) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setUsesTOCBasePtr();
+}
+
+static void setUsesTOCBasePtr(SelectionDAG &DAG) {
+ setUsesTOCBasePtr(DAG.getMachineFunction());
+}
+
+static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
+ SDValue GA) {
+ EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+ SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
+ DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
+
+ SDValue Ops[] = { GA, Reg };
+ return DAG.getMemIntrinsicNode(
+ PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
+ false, 0);
+}
+
+SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = CP->getConstVal();
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
+ return getTOCEntry(DAG, SDLoc(CP), true, GA);
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
+ PPCII::MO_PIC_FLAG);
+ return getTOCEntry(DAG, SDLoc(CP), false, GA);
+ }
+
+ SDValue CPIHi =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
+ SDValue CPILo =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
+ return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ return getTOCEntry(DAG, SDLoc(JT), true, GA);
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+ PPCII::MO_PIC_FLAG);
+ return getTOCEntry(DAG, SDLoc(GA), false, GA);
+ }
+
+ SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
+ SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
+ return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
+ const BlockAddress *BA = BASDN->getBlockAddress();
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual BlockAddress is stored in the TOC.
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
+ SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
+ return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
+ SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
+ SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
+ return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ // FIXME: TLS addresses currently use medium model code sequences,
+ // which is the most useful form. Eventually support for small and
+ // large models could be added if users need it, at the cost of
+ // additional complexity.
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
+ SDLoc dl(GA);
+ const GlobalValue *GV = GA->getGlobal();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ bool is64bit = Subtarget.isPPC64();
+ const Module *M = DAG.getMachineFunction().getFunction()->getParent();
+ PICLevel::Level picLevel = M->getPICLevel();
+
+ TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+
+ if (Model == TLSModel::LocalExec) {
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL_HA);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL_LO);
+ SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+ }
+
+ if (Model == TLSModel::InitialExec) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLS);
+ SDValue GOTPtr;
+ if (is64bit) {
+ setUsesTOCBasePtr(DAG);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+ PtrVT, GOTReg, TGA);
+ } else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+ SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
+ PtrVT, TGA, GOTPtr);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
+ }
+
+ if (Model == TLSModel::GeneralDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTPtr;
+ if (is64bit) {
+ setUsesTOCBasePtr(DAG);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ } else {
+ if (picLevel == PICLevel::Small)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
+ return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
+ GOTPtr, TGA, TGA);
+ }
+
+ if (Model == TLSModel::LocalDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTPtr;
+ if (is64bit) {
+ setUsesTOCBasePtr(DAG);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ } else {
+ if (picLevel == PICLevel::Small)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
+ SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
+ PtrVT, GOTPtr, TGA, TGA);
+ SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
+ PtrVT, TLSAddr, TGA);
+ return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
+ }
+
+ llvm_unreachable("Unknown TLS model!");
+}
+
+SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ SDLoc DL(GSDN);
+ const GlobalValue *GV = GSDN->getGlobal();
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
+ return getTOCEntry(DAG, DL, true, GA);
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
+ GSDN->getOffset(),
+ PPCII::MO_PIC_FLAG);
+ return getTOCEntry(DAG, DL, false, GA);
+ }
+
+ SDValue GAHi =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
+ SDValue GALo =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
+
+ SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
+
+ // If the global reference is actually to a non-lazy-pointer, we have to do an
+ // extra load to get the address of the global.
+ if (MOHiFlag & PPCII::MO_NLP_FLAG)
+ Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
+ false, false, false, 0);
+ return Ptr;
+}
+
+SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDLoc dl(Op);
+
+ if (Op.getValueType() == MVT::v2i64) {
+ // When the operands themselves are v2i64 values, we need to do something
+ // special because VSX has no underlying comparison operations for these.
+ if (Op.getOperand(0).getValueType() == MVT::v2i64) {
+ // Equality can be handled by casting to the legal type for Altivec
+ // comparisons, everything else needs to be expanded.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getSetCC(dl, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
+ CC));
+ }
+
+ return SDValue();
+ }
+
+ // We handle most of these in the usual way.
+ return Op;
+ }
+
+ // If we're comparing for equality to zero, expose the fact that this is
+ // implented as a ctlz/srl pair on ppc, so that the dag combiner can
+ // fold the new nodes.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (C->isNullValue() && CC == ISD::SETEQ) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, dl, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+ }
+ // Leave comparisons against 0 and -1 alone for now, since they're usually
+ // optimized. FIXME: revisit this when we can custom lower all setcc
+ // optimizations.
+ if (C->isAllOnesValue() || C->isNullValue())
+ return SDValue();
+ }
+
+ // If we have an integer seteq/setne, turn it into a compare against zero
+ // by xor'ing the rhs with the lhs, which is faster than setting a
+ // condition register, reading it back out, and masking the correct bit. The
+ // normal approach here uses sub to do this instead of xor. Using xor exposes
+ // the result to other bit-twiddling opportunities.
+ EVT LHSVT = Op.getOperand(0).getValueType();
+ if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ EVT VT = Op.getValueType();
+ SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
+ }
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ SDLoc dl(Node);
+
+ assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
+
+ // gpr_index
+ SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ VAListPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, false, 0);
+ InChain = GprIndex.getValue(1);
+
+ if (VT == MVT::i64) {
+ // Check if GprIndex is even
+ SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, dl, MVT::i32));
+ SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
+ DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
+ SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, dl, MVT::i32));
+ // Align GprIndex to be even if it isn't
+ GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
+ GprIndex);
+ }
+
+ // fpr index is 1 byte after gpr
+ SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(1, dl, MVT::i32));
+
+ // fpr
+ SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ FprPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, false, 0);
+ InChain = FprIndex.getValue(1);
+
+ SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(8, dl, MVT::i32));
+
+ SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(4, dl, MVT::i32));
+
+ // areas
+ SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ InChain = OverflowArea.getValue(1);
+
+ SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ InChain = RegSaveArea.getValue(1);
+
+ // select overflow_area if index > 8
+ SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
+
+ // adjustment constant gpr_index * 4/8
+ SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
+ MVT::i32));
+
+ // OurReg = RegSaveArea + RegConstant
+ SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
+ RegConstant);
+
+ // Floating types are 32 bytes into RegSaveArea
+ if (VT.isFloatingPoint())
+ OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
+ DAG.getConstant(32, dl, MVT::i32));
+
+ // increase {f,g}pr_index by 1 (or 2 if VT is i64)
+ SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
+ MVT::i32));
+
+ InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
+ VT.isInteger() ? VAListPtr : FprPtr,
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
+
+ // determine if we should load from reg_save_area or overflow_area
+ SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
+
+ // increase overflow_area by 4/8 if gpr/fpr > 8
+ SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
+ DAG.getConstant(VT.isInteger() ? 4 : 8,
+ dl, MVT::i32));
+
+ OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
+ OverflowAreaPlusN);
+
+ InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
+ OverflowAreaPtr,
+ MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+
+ return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
+
+ // We have to copy the entire va_list struct:
+ // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
+ return DAG.getMemcpy(Op.getOperand(0), Op,
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
+ false, MachinePointerInfo(), MachinePointerInfo());
+}
+
+SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ SDLoc dl(Op);
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ bool isPPC64 = (PtrVT == MVT::i64);
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Trmp; Args.push_back(Entry);
+
+ // TrampSize == (isPPC64 ? 48 : 40);
+ Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ Args.push_back(Entry);
+
+ Entry.Node = FPtr; Args.push_back(Entry);
+ Entry.Node = Nest; Args.push_back(Entry);
+
+ // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+ std::move(Args), 0);
+
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ SDLoc dl(Op);
+
+ if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV),
+ false, false, 0);
+ }
+
+ // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
+ // We suppose the given va_list is already allocated.
+ //
+ // typedef struct {
+ // char gpr; /* index into the array of 8 GPRs
+ // * stored in the register save area
+ // * gpr=0 corresponds to r3,
+ // * gpr=1 to r4, etc.
+ // */
+ // char fpr; /* index into the array of 8 FPRs
+ // * stored in the register save area
+ // * fpr=0 corresponds to f1,
+ // * fpr=1 to f2, etc.
+ // */
+ // char *overflow_arg_area;
+ // /* location on stack that holds
+ // * the next overflow argument
+ // */
+ // char *reg_save_area;
+ // /* where r3:r10 and f1:f8 (if saved)
+ // * are stored
+ // */
+ // } va_list[1];
+
+ SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
+ SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+
+ SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
+ PtrVT);
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ PtrVT);
+
+ uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
+ SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
+
+ uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
+ SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
+
+ uint64_t FPROffset = 1;
+ SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
+
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // Store first byte : number of int regs
+ SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
+ Op.getOperand(1),
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
+ uint64_t nextOffset = FPROffset;
+ SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
+ ConstFPROffset);
+
+ // Store second byte : number of float regs
+ SDValue secondStore =
+ DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
+ MachinePointerInfo(SV, nextOffset), MVT::i8,
+ false, false, 0);
+ nextOffset += StackOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
+
+ // Store second word : arguments given on stack
+ SDValue thirdStore =
+ DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
+ false, false, 0);
+ nextOffset += FrameOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
+
+ // Store third word : arguments given in registers
+ return DAG.getStore(thirdStore, dl, FR, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
+ false, false, 0);
+
+}
+
+#include "PPCGenCallingConv.inc"
+
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
+ return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
+}
+
+bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return true;
+}
+
+bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+
+ // Skip one register if the first unallocated register has an even register
+ // number and there are still argument registers available which have not been
+ // allocated yet. RegNum is actually an index into ArgRegs, which means we
+ // need to skip a register if RegNum is odd.
+ if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the first
+ // unallocated register has an odd register number and does not actually
+ // allocate a register for the current argument.
+ return false;
+}
+
+bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+
+ // If there is only one Floating-point register left we need to put both f64
+ // values of a split ppc_fp128 value on the stack.
+ if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the two f64
+ // values a ppc_fp128 value is split into are both passed in registers or both
+ // passed on the stack and does not actually allocate a register for the
+ // current argument.
+ return false;
+}
+
+/// FPR - The set of FP registers that should be allocated for arguments,
+/// on Darwin.
+static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
+ PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
+ PPC::F11, PPC::F12, PPC::F13};
+
+/// QFPR - The set of QPX registers that should be allocated for arguments.
+static const MCPhysReg QFPR[] = {
+ PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
+
+/// CalculateStackSlotSize - Calculates the size reserved for this argument on
+/// the stack.
+static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ unsigned ArgSize = ArgVT.getStoreSize();
+ if (Flags.isByVal())
+ ArgSize = Flags.getByValSize();
+
+ // Round up to multiples of the pointer size, except for array members,
+ // which are always packed.
+ if (!Flags.isInConsecutiveRegs())
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ return ArgSize;
+}
+
+/// CalculateStackSlotAlignment - Calculates the alignment of this argument
+/// on the stack.
+static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ unsigned Align = PtrByteSize;
+
+ // Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
+ ArgVT == MVT::v1i128)
+ Align = 16;
+ // QPX vector types stored in double-precision are padded to a 32 byte
+ // boundary.
+ else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
+ Align = 32;
+
+ // ByVal parameters are aligned as requested.
+ if (Flags.isByVal()) {
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > PtrByteSize) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ Align = BVAlign;
+ }
+ }
+
+ // Array members are always packed to their original alignment.
+ if (Flags.isInConsecutiveRegs()) {
+ // If the array member was split into multiple registers, the first
+ // needs to be aligned to the size of the full type. (Except for
+ // ppcf128, which is only aligned as its f64 components.)
+ if (Flags.isSplit() && OrigVT != MVT::ppcf128)
+ Align = OrigVT.getStoreSize();
+ else
+ Align = ArgVT.getStoreSize();
+ }
+
+ return Align;
+}
+
+/// CalculateStackSlotUsed - Return whether this argument will use its
+/// stack slot (instead of being passed in registers). ArgOffset,
+/// AvailableFPRs, and AvailableVRs must hold the current argument
+/// position, and will be updated to account for this argument.
+static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize,
+ unsigned LinkageSize,
+ unsigned ParamAreaSize,
+ unsigned &ArgOffset,
+ unsigned &AvailableFPRs,
+ unsigned &AvailableVRs, bool HasQPX) {
+ bool UseMemory = false;
+
+ // Respect alignment of argument on the stack.
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ // If there's no space left in the argument save area, we must
+ // use memory (this check also catches zero-sized arguments).
+ if (ArgOffset >= LinkageSize + ParamAreaSize)
+ UseMemory = true;
+
+ // Allocate argument on the stack.
+ ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // If we overran the argument save area, we must use memory
+ // (this check catches arguments passed partially in memory)
+ if (ArgOffset > LinkageSize + ParamAreaSize)
+ UseMemory = true;
+
+ // However, if the argument is actually passed in an FPR or a VR,
+ // we don't use memory after all.
+ if (!Flags.isByVal()) {
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
+ // QPX registers overlap with the scalar FP registers.
+ (HasQPX && (ArgVT == MVT::v4f32 ||
+ ArgVT == MVT::v4f64 ||
+ ArgVT == MVT::v4i1)))
+ if (AvailableFPRs > 0) {
+ --AvailableFPRs;
+ return false;
+ }
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
+ ArgVT == MVT::v1i128)
+ if (AvailableVRs > 0) {
+ --AvailableVRs;
+ return false;
+ }
+ }
+
+ return UseMemory;
+}
+
+/// EnsureStackAlignment - Round stack frame size up from NumBytes to
+/// ensure minimum alignment required for target.
+static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
+ unsigned NumBytes) {
+ unsigned TargetAlign = Lowering->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ return NumBytes;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
+ return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ else
+ return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ } else {
+ return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ }
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_32SVR4(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // 32-bit SVR4 ABI Stack Frame Layout:
+ // +-----------------------------------+
+ // +--> | Back chain |
+ // | +-----------------------------------+
+ // | | Floating-point register save area |
+ // | +-----------------------------------+
+ // | | General register save area |
+ // | +-----------------------------------+
+ // | | CR save word |
+ // | +-----------------------------------+
+ // | | VRSAVE save word |
+ // | +-----------------------------------+
+ // | | Alignment padding |
+ // | +-----------------------------------+
+ // | | Vector register save area |
+ // | +-----------------------------------+
+ // | | Local variable space |
+ // | +-----------------------------------+
+ // | | Parameter list area |
+ // | +-----------------------------------+
+ // | | LR save word |
+ // | +-----------------------------------+
+ // SP--> +--- | Back chain |
+ // +-----------------------------------+
+ //
+ // Specifications:
+ // System V Application Binary Interface PowerPC Processor Supplement
+ // AltiVec Technology Programming Interface Manual
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ unsigned PtrByteSize = 4;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ CCInfo.AllocateStack(LinkageSize, PtrByteSize);
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ const TargetRegisterClass *RC;
+ EVT ValVT = VA.getValVT();
+
+ switch (ValVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("ValVT not supported by formal arguments Lowering");
+ case MVT::i1:
+ case MVT::i32:
+ RC = &PPC::GPRCRegClass;
+ break;
+ case MVT::f32:
+ if (Subtarget.hasP8Vector())
+ RC = &PPC::VSSRCRegClass;
+ else
+ RC = &PPC::F4RCRegClass;
+ break;
+ case MVT::f64:
+ if (Subtarget.hasVSX())
+ RC = &PPC::VSFRCRegClass;
+ else
+ RC = &PPC::F8RCRegClass;
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ RC = &PPC::VRRCRegClass;
+ break;
+ case MVT::v4f32:
+ RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
+ break;
+ case MVT::v2f64:
+ case MVT::v2i64:
+ RC = &PPC::VSHRCRegClass;
+ break;
+ case MVT::v4f64:
+ RC = &PPC::QFRCRegClass;
+ break;
+ case MVT::v4i1:
+ RC = &PPC::QBRCRegClass;
+ break;
+ }
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+ ValVT == MVT::i1 ? MVT::i32 : ValVT);
+
+ if (ValVT == MVT::i1)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
+
+ InVals.push_back(ArgValue);
+ } else {
+ // Argument stored in memory.
+ assert(VA.isMemLoc());
+
+ unsigned ArgSize = VA.getLocVT().getStoreSize();
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+ isImmutable);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo(),
+ false, false, false, 0));
+ }
+ }
+
+ // Assign locations to all of the incoming aggregate by value arguments.
+ // Aggregates passed by value are stored in the local variable space of the
+ // caller's stack frame, right above the parameter list area.
+ SmallVector<CCValAssign, 16> ByValArgLocs;
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ ByValArgLocs, *DAG.getContext());
+
+ // Reserve stack space for the allocations in CCInfo.
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+ CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
+
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+ MinReservedArea = std::max(MinReservedArea, LinkageSize);
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
+
+ SmallVector<SDValue, 8> MemOps;
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ static const MCPhysReg GPArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
+
+ static const MCPhysReg FPArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+ unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+
+ if (Subtarget.useSoftFloat())
+ NumFPArgRegs = 0;
+
+ FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
+ FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
+
+ // Make room for NumGPArgRegs and NumFPArgRegs.
+ int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
+ NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
+
+ FuncInfo->setVarArgsStackOffset(
+ MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ CCInfo.getNextStackOffset(), true));
+
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // The fixed integer arguments of a variadic function are stored to the
+ // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+
+ // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
+ // is set.
+ // The double arguments are stored to the VarArgsFrameIndex
+ // on the stack.
+ for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by eight for the next argument to store
+ SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
+ PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+
+ return Chain;
+}
+
+// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+// value to MVT::i64 and then truncate to the correct register size.
+SDValue
+PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
+ SelectionDAG &DAG, SDValue ArgVal,
+ SDLoc dl) const {
+ if (Flags.isSExt())
+ ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+ else if (Flags.isZExt())
+ ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+
+ return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_64SVR4(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+ "fastcc not supported on varargs functions");
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ unsigned PtrByteSize = 8;
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+
+ static const MCPhysReg GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ static const MCPhysReg VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR);
+ const unsigned Num_FPR_Regs = 13;
+ const unsigned Num_VR_Regs = array_lengthof(VR);
+ const unsigned Num_QFPR_Regs = Num_FPR_Regs;
+
+ // Do a first pass over the arguments to determine whether the ABI
+ // guarantees that our caller has allocated the parameter save area
+ // on its stack frame. In the ELFv1 ABI, this is always the case;
+ // in the ELFv2 ABI, it is true if this is a vararg function or if
+ // any parameter is located in a stack slot.
+
+ bool HasParameterArea = !isELFv2ABI || isVarArg;
+ unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
+ unsigned NumBytes = LinkageSize;
+ unsigned AvailableFPRs = Num_FPR_Regs;
+ unsigned AvailableVRs = Num_VR_Regs;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ if (Ins[i].Flags.isNest())
+ continue;
+
+ if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytes, AvailableFPRs, AvailableVRs,
+ Subtarget.hasQPX()))
+ HasParameterArea = true;
+ }
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned &QFPR_idx = FPR_idx;
+ SmallVector<SDValue, 8> MemOps;
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ EVT OrigVT = Ins[ArgNo].ArgVT;
+ unsigned ObjSize = ObjectVT.getStoreSize();
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ if (Ins[ArgNo].isOrigArg()) {
+ std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].getOrigArgIndex();
+ }
+ // We re-align the argument offset for each argument, except when using the
+ // fast calling convention, when we need to make sure we do that only when
+ // we'll actually use a stack slot.
+ unsigned CurArgOffset, Align;
+ auto ComputeArgOffset = [&]() {
+ /* Respect alignment of argument on the stack. */
+ Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ CurArgOffset = ArgOffset;
+ };
+
+ if (CallConv != CallingConv::Fast) {
+ ComputeArgOffset();
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+ }
+
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
+
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Empty aggregate parameters do not take up registers. Examples:
+ // struct { } a;
+ // union { } b;
+ // int c[0];
+ // etc. However, we have to provide a place-holder in InVals, so
+ // pretend we have an 8-byte item at the current address for that
+ // purpose.
+ if (!ObjSize) {
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+ continue;
+ }
+
+ // Create a stack object covering all stack doublewords occupied
+ // by the argument. If the argument is (fully or partially) on
+ // the stack, or if the argument is fully in registers but the
+ // caller has allocated the parameter save anyway, we can refer
+ // directly to the caller's stack frame. Otherwise, create a
+ // local copy in our own frame.
+ int FI;
+ if (HasParameterArea ||
+ ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
+ FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
+ else
+ FI = MFI->CreateStackObject(ArgSize, Align, false);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+
+ // Handle aggregates smaller than 8 bytes.
+ if (ObjSize < PtrByteSize) {
+ // The value of the object is its address, which differs from the
+ // address of the enclosing doubleword on big-endian systems.
+ SDValue Arg = FIN;
+ if (!isLittleEndian) {
+ SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
+ Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
+ }
+ InVals.push_back(Arg);
+
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store;
+
+ if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
+ EVT ObjType = (ObjSize == 1 ? MVT::i8 :
+ (ObjSize == 2 ? MVT::i16 : MVT::i32));
+ Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
+ MachinePointerInfo(&*FuncArg), ObjType,
+ false, false, 0);
+ } else {
+ // For sizes that don't fit a truncating store (3, 5, 6, 7),
+ // store the whole register as-is to the parameter save area
+ // slot.
+ Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(&*FuncArg), false, false, 0);
+ }
+
+ MemOps.push_back(Store);
+ }
+ // Whether we copied from a register or not, advance the offset
+ // into the parameter save area by a full doubleword.
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+
+ // The value of the object is its address, which is the address of
+ // its first stack doubleword.
+ InVals.push_back(FIN);
+
+ // Store whatever pieces of the object are in registers to memory.
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ if (GPR_idx == Num_GPR_Regs)
+ break;
+
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Addr = FIN;
+ if (j) {
+ SDValue Off = DAG.getConstant(j, dl, PtrVT);
+ Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
+ }
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, Addr,
+ MachinePointerInfo(&*FuncArg, j), false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ }
+ ArgOffset += ArgSize;
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ if (Flags.isNest()) {
+ // The 'nest' parameter, if any, is passed in R11.
+ unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+
+ break;
+ }
+
+ // These can be scalar arguments or elements of an integer array type
+ // passed directly. Clang may use those instead of "byval" aggregate
+ // types to avoid forcing arguments to memory unnecessarily.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // These can be scalar arguments or elements of a float array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // float aggregates.
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+
+ if (ObjectVT == MVT::f32)
+ VReg = MF.addLiveIn(FPR[FPR_idx],
+ Subtarget.hasP8Vector()
+ ? &PPC::VSSRCRegClass
+ : &PPC::F4RCRegClass);
+ else
+ VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
+ ? &PPC::VSFRCRegClass
+ : &PPC::F8RCRegClass);
+
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
+ // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
+ // once we support fp <-> gpr moves.
+
+ // This can only ever happen in the presence of f32 array types,
+ // since otherwise we never run out of FPRs before running out
+ // of GPRs.
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::f32) {
+ if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
+ ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
+ DAG.getConstant(32, dl, MVT::i32));
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ }
+
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
+ needsLoad = true;
+ }
+
+ // When passing an array of floats, the array occupies consecutive
+ // space in the argument area; only round up to the next doubleword
+ // at the end of the array. Otherwise, each float takes 8 bytes.
+ if (CallConv != CallingConv::Fast || needsLoad) {
+ ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
+ ArgOffset += ArgSize;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ }
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
+ case MVT::v1i128:
+ if (!Subtarget.hasQPX()) {
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
+ MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
+ MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++VR_idx;
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
+ needsLoad = true;
+ }
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += 16;
+ break;
+ } // not QPX
+
+ assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
+ "Invalid QPX parameter type");
+ /* fall through */
+
+ case MVT::v4f64:
+ case MVT::v4i1:
+ // QPX vectors are treated like their scalar floating-point subregisters
+ // (except that they're larger).
+ unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
+ if (QFPR_idx != Num_QFPR_Regs) {
+ const TargetRegisterClass *RC;
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
+ case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
+ default: RC = &PPC::QBRCRegClass; break;
+ }
+
+ unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++QFPR_idx;
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+ needsLoad = true;
+ }
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += Sz;
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined
+ // above that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ if (ObjSize < ArgSize && !isLittleEndian)
+ CurArgOffset += ArgSize - ObjSize;
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, false, 0);
+ }
+
+ InVals.push_back(ArgVal);
+ }
+
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea;
+ if (HasParameterArea)
+ MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+ else
+ MinReservedArea = LinkageSize;
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized functions' reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ int Depth = ArgOffset;
+
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(PtrByteSize, Depth, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx < Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+
+ return Chain;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_Darwin(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+ bool isPPC64 = PtrVT == MVT::i64;
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ unsigned ArgOffset = LinkageSize;
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
+ const unsigned Num_FPR_Regs = 13;
+ const unsigned Num_VR_Regs = array_lengthof( VR);
+
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ // In 32-bit non-varargs functions, the stack space for vectors is after the
+ // stack space for non-vectors. We do not use this space unless we have
+ // too many vectors to fit in registers, something that only occurs in
+ // constructed examples:), but we have to walk the arglist to figure
+ // that out...for the pathological case, compute VecArgOffset as the
+ // start of the vector parameter area. Computing VecArgOffset is the
+ // entire point of the following loop.
+ unsigned VecArgOffset = ArgOffset;
+ if (!isVarArg && !isPPC64) {
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
+ ++ArgNo) {
+ EVT ObjectVT = Ins[ArgNo].VT;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of regs.
+ unsigned ObjSize = Flags.getByValSize();
+ unsigned ArgSize =
+ ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ VecArgOffset += ArgSize;
+ continue;
+ }
+
+ switch(ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::f32:
+ VecArgOffset += 4;
+ break;
+ case MVT::i64: // PPC64
+ case MVT::f64:
+ // FIXME: We are guaranteed to be !isPPC64 at this point.
+ // Does MVT::i64 apply?
+ VecArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Nothing to do, we're only looking at Nonvector args here.
+ break;
+ }
+ }
+ }
+ // We've found where the vector parameter area in memory is. Skip the
+ // first 12 parameters; these don't use that memory.
+ VecArgOffset = ((VecArgOffset+15)/16)*16;
+ VecArgOffset += 12*16;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+
+ SmallVector<SDValue, 8> MemOps;
+ unsigned nAltivecParamsAtEnd = 0;
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ if (Ins[ArgNo].isOrigArg()) {
+ std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].getOrigArgIndex();
+ }
+ unsigned CurArgOffset = ArgOffset;
+
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg || isPPC64) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(ObjectVT,
+ Flags,
+ PtrByteSize);
+ } else nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
+ Flags,
+ PtrByteSize);
+
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
+
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Objects of size 1 and 2 are right justified, everything else is
+ // left justified. This means the memory address is adjusted forwards.
+ if (ObjSize==1 || ObjSize==2) {
+ CurArgOffset = CurArgOffset + (4 - ObjSize);
+ }
+ // The value of the object is its address.
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+ if (ObjSize==1 || ObjSize==2) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
+ SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(&*FuncArg),
+ ObjType, false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ }
+
+ ArgOffset += PtrByteSize;
+
+ continue;
+ }
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ // Store whatever pieces of the object are in registers
+ // to memory. ArgOffset will be the address of the beginning
+ // of the object.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(&*FuncArg, j), false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
+ case MVT::i32:
+ if (!isPPC64) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+
+ if (ObjectVT == MVT::i1)
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in the Darwin ABI.
+ ArgOffset += PtrByteSize;
+ break;
+ }
+ // FALLTHROUGH
+ case MVT::i64: // PPC64
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in the Darwin ABI.
+ ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // Every 4 bytes of argument space consumes one of the GPRs available for
+ // argument passing.
+ if (GPR_idx != Num_GPR_Regs) {
+ ++GPR_idx;
+ if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
+ ++GPR_idx;
+ }
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+
+ if (ObjectVT == MVT::f32)
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+ else
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else {
+ needsLoad = true;
+ }
+
+ // All FP arguments reserve stack space in the Darwin ABI.
+ ArgOffset += isPPC64 ? 8 : ObjSize;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Note that vector arguments in registers don't reserve stack space,
+ // except in varargs functions.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ if (isVarArg) {
+ while ((ArgOffset % 16) != 0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != Num_GPR_Regs)
+ GPR_idx++;
+ }
+ ArgOffset += 16;
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
+ }
+ ++VR_idx;
+ } else {
+ if (!isVarArg && !isPPC64) {
+ // Vectors go after all the nonvectors.
+ CurArgOffset = VecArgOffset;
+ VecArgOffset += 16;
+ } else {
+ // Vectors are aligned.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ CurArgOffset = ArgOffset;
+ ArgOffset += 16;
+ }
+ needsLoad = true;
+ }
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined above
+ // that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ int FI = MFI->CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, false, 0);
+ }
+
+ InVals.push_back(ArgVal);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+
+ // Area that is at least reserved in the caller of this function.
+ MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized functions' reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ int Depth = ArgOffset;
+
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ Depth, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg;
+
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+
+ return Chain;
+}
+
+/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
+/// adjusted to accommodate the arguments for the tailcall.
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
+ unsigned ParamSize) {
+
+ if (!isTailCall) return 0;
+
+ PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
+ unsigned CallerMinReservedArea = FI->getMinReservedArea();
+ int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
+ // Remember only if the new adjustement is bigger.
+ if (SPDiff < FI->getTailCallSPDelta())
+ FI->setTailCallSPDelta(SPDiff);
+
+ return SPDiff;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ if (!getTargetMachine().Options.GuaranteedTailCallOpt)
+ return false;
+
+ // Variable argument functions are not supported.
+ if (isVarArg)
+ return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ // Functions containing by val parameters are not supported.
+ for (unsigned i = 0; i != Ins.size(); i++) {
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (Flags.isByVal()) return false;
+ }
+
+ // Non-PIC/GOT tail calls are supported.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return true;
+
+ // At the moment we can only do local tail calls (in same module, hidden
+ // or protected) if we are generating PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+
+ return false;
+}
+
+/// isCallCompatibleAddress - Return the immediate to use if the specified
+/// 32-bit value is representable in the immediate field of a BxA instruction.
+static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return nullptr;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ SignExtend32<26>(Addr) != Addr)
+ return nullptr; // Top 6 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op),
+ DAG.getTargetLoweringInfo().getPointerTy(
+ DAG.getDataLayout())).getNode();
+}
+
+namespace {
+
+struct TailCallArgumentInfo {
+ SDValue Arg;
+ SDValue FrameIdxOp;
+ int FrameIdx;
+
+ TailCallArgumentInfo() : FrameIdx(0) {}
+};
+}
+
+/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
+static void
+StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
+ SDValue Chain,
+ const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
+ SmallVectorImpl<SDValue> &MemOpChains,
+ SDLoc dl) {
+ for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
+ SDValue Arg = TailCallArgs[i].Arg;
+ SDValue FIN = TailCallArgs[i].FrameIdxOp;
+ int FI = TailCallArgs[i].FrameIdx;
+ // Store relative to framepointer.
+ MemOpChains.push_back(DAG.getStore(
+ Chain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+ false, 0));
+ }
+}
+
+/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
+/// the appropriate stack slot for the tail call optimized function call.
+static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
+ MachineFunction &MF,
+ SDValue Chain,
+ SDValue OldRetAddr,
+ SDValue OldFP,
+ int SPDiff,
+ bool isPPC64,
+ bool isDarwinABI,
+ SDLoc dl) {
+ if (SPDiff) {
+ // Calculate the new stack slot for the return address.
+ int SlotSize = isPPC64 ? 8 : 4;
+ const PPCFrameLowering *FL =
+ MF.getSubtarget<PPCSubtarget>().getFrameLowering();
+ int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
+ int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ NewRetAddrLoc, true);
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
+ Chain = DAG.getStore(
+ Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewRetAddr),
+ false, false, 0);
+
+ // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
+ // slot as the FP is never overwritten.
+ if (isDarwinABI) {
+ int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
+ int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
+ true);
+ SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+ Chain = DAG.getStore(
+ Chain, dl, OldFP, NewFramePtrIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFPIdx),
+ false, false, 0);
+ }
+ }
+ return Chain;
+}
+
+/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
+/// the position of the argument.
+static void
+CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
+ SDValue Arg, int SPDiff, unsigned ArgOffset,
+ SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
+ int Offset = ArgOffset + SPDiff;
+ uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue FIN = DAG.getFrameIndex(FI, VT);
+ TailCallArgumentInfo Info;
+ Info.Arg = Arg;
+ Info.FrameIdxOp = FIN;
+ Info.FrameIdx = FI;
+ TailCallArguments.push_back(Info);
+}
+
+/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
+/// stack slot. Returns the chain as result and the loaded frame pointers in
+/// LROpOut/FPOpout. Used when tail calling.
+SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ bool isDarwinABI,
+ SDLoc dl) const {
+ if (SPDiff) {
+ // Load the LR and FP stack slot for later adjusting.
+ EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
+ LROpOut = getReturnAddrFrameIndex(DAG);
+ LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
+ false, false, false, 0);
+ Chain = SDValue(LROpOut.getNode(), 1);
+
+ // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
+ // slot as the FP is never overwritten.
+ if (isDarwinABI) {
+ FPOpOut = getFramePointerFrameIndex(DAG);
+ FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
+ false, false, false, 0);
+ Chain = SDValue(FPOpOut.getNode(), 1);
+ }
+ }
+ return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ SDLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ false, false, false, MachinePointerInfo(),
+ MachinePointerInfo());
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
+/// tail calls.
+static void
+LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
+ SDValue Arg, SDValue PtrOff, int SPDiff,
+ unsigned ArgOffset, bool isPPC64, bool isTailCall,
+ bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
+ SDLoc dl) {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ if (!isTailCall) {
+ if (isVector) {
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, dl, PtrVT));
+ }
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0));
+ // Calculate and remember argument location.
+ } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
+ TailCallArguments);
+}
+
+static
+void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
+ SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+ SDValue LROp, SDValue FPOp, bool isDarwinABI,
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+ // might overwrite each other in case of tail call optimization.
+ SmallVector<SDValue, 8> MemOpChains2;
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+ MemOpChains2, dl);
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+ isPPC64, isDarwinABI, dl);
+
+ // Emit callseq_end just before tailcall node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+ DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
+ InFlag = Chain.getValue(1);
+}
+
+// Is this global address that of a function that can be called by name? (as
+// opposed to something that must hold a descriptor for an indirect call).
+static bool isFunctionGlobalAddress(SDValue Callee) {
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
+ Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false;
+
+ return G->getGlobal()->getType()->getElementType()->isFunctionTy();
+ }
+
+ return false;
+}
+
+static
+unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
+ SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
+ bool isTailCall, bool IsPatchPoint, bool hasNest,
+ SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
+ SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
+ ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
+
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
+
+ unsigned CallOpc = PPCISD::CALL;
+
+ bool needIndirectCall = true;
+ if (!isSVR4ABI || !isPPC64)
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ needIndirectCall = false;
+ }
+
+ if (isFunctionGlobalAddress(Callee)) {
+ GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+ // A call to a TLS address is actually an indirect call to a
+ // thread-specific pointer.
+ unsigned OpFlags = 0;
+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ !G->getGlobal()->isStrongDefinitionForLinker()) ||
+ (Subtarget.isTargetELF() && !isPPC64 &&
+ !G->getGlobal()->hasLocalLinkage() &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_PLT_OR_STUB;
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress /
+ // TargetExternalSymbol node so that legalize doesn't hack it.
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType(), 0, OpFlags);
+ needIndirectCall = false;
+ }
+
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ unsigned char OpFlags = 0;
+
+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
+ (Subtarget.isTargetELF() && !isPPC64 &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_PLT_OR_STUB;
+ }
+
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
+ OpFlags);
+ needIndirectCall = false;
+ }
+
+ if (IsPatchPoint) {
+ // We'll form an invalid direct call when lowering a patchpoint; the full
+ // sequence for an indirect call is complicated, and many of the
+ // instructions introduced might have side effects (and, thus, can't be
+ // removed later). The call itself will be removed as soon as the
+ // argument/return lowering is complete, so the fact that it has the wrong
+ // kind of operands should not really matter.
+ needIndirectCall = false;
+ }
+
+ if (needIndirectCall) {
+ // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
+ // to do the call, we can't use PPCISD::CALL.
+ SDValue MTCTROps[] = {Chain, Callee, InFlag};
+
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
+ // Function pointers in the 64-bit SVR4 ABI do not point to the function
+ // entry point, but to the function descriptor (the function entry point
+ // address is part of the function descriptor though).
+ // The function descriptor is a three doubleword structure with the
+ // following fields: function entry point, TOC base address and
+ // environment pointer.
+ // Thus for a call through a function pointer, the following actions need
+ // to be performed:
+ // 1. Save the TOC of the caller in the TOC save area of its stack
+ // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
+ // 2. Load the address of the function entry point from the function
+ // descriptor.
+ // 3. Load the TOC of the callee from the function descriptor into r2.
+ // 4. Load the environment pointer from the function descriptor into
+ // r11.
+ // 5. Branch to the function entry point address.
+ // 6. On return of the callee, the TOC of the caller needs to be
+ // restored (this is done in FinishCall()).
+ //
+ // The loads are scheduled at the beginning of the call sequence, and the
+ // register copies are flagged together to ensure that no other
+ // operations can be scheduled in between. E.g. without flagging the
+ // copies together, a TOC access in the caller could be scheduled between
+ // the assignment of the callee TOC and the branch to the callee, which
+ // results in the TOC access going through the TOC of the callee instead
+ // of going through the TOC of the caller, which leads to incorrect code.
+
+ // Load the address of the function entry point from the function
+ // descriptor.
+ SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
+ if (LDChain.getValueType() == MVT::Glue)
+ LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
+
+ bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
+
+ MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
+ SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
+ false, false, LoadsInv, 8);
+
+ // Load environment pointer into r11.
+ SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
+ SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
+ MPI.getWithOffset(16), false, false,
+ LoadsInv, 8);
+
+ SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
+ SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
+ MPI.getWithOffset(8), false, false,
+ LoadsInv, 8);
+
+ setUsesTOCBasePtr(DAG);
+ SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
+ InFlag);
+ Chain = TOCVal.getValue(0);
+ InFlag = TOCVal.getValue(1);
+
+ // If the function call has an explicit 'nest' parameter, it takes the
+ // place of the environment pointer.
+ if (!hasNest) {
+ SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
+ InFlag);
+
+ Chain = EnvVal.getValue(0);
+ InFlag = EnvVal.getValue(1);
+ }
+
+ MTCTROps[0] = Chain;
+ MTCTROps[1] = LoadFuncPtr;
+ MTCTROps[2] = InFlag;
+ }
+
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
+ makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
+ InFlag = Chain.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Glue);
+ Ops.push_back(Chain);
+ CallOpc = PPCISD::BCTRL;
+ Callee.setNode(nullptr);
+ // Add use of X11 (holding environment pointer)
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
+ Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
+ // Add CTR register as callee so a bctr can be emitted later.
+ if (isTailCall)
+ Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
+ }
+
+ // If this is a direct call, pass the chain and the callee.
+ if (Callee.getNode()) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+ // If this is a tail call add stack pointer delta.
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
+ // into the call.
+ if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
+ setUsesTOCBasePtr(DAG);
+ Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+ }
+
+ return CallOpc;
+}
+
+static
+bool isLocalCall(const SDValue &Callee)
+{
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->isStrongDefinitionForLinker();
+ return false;
+}
+
+SDValue
+PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl,
+ VA.getLocReg(), VA.getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::AExt:
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+SDValue
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
+ bool isTailCall, bool isVarArg, bool IsPatchPoint,
+ bool hasNest, SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8>
+ &RegsToPass,
+ SDValue InFlag, SDValue Chain,
+ SDValue CallSeqStart, SDValue &Callee,
+ int SPDiff, unsigned NumBytes,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
+
+ std::vector<EVT> NodeTys;
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
+ SPDiff, isTailCall, IsPatchPoint, hasNest,
+ RegsToPass, Ops, NodeTys, CS, Subtarget);
+
+ // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
+ if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+ Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
+
+ // When performing tail call optimization the callee pops its arguments off
+ // the stack. Account for this here so these bytes can be pushed back on in
+ // PPCFrameLowering::eliminateCallFramePseudoInstr.
+ int BytesCalleePops =
+ (CallConv == CallingConv::Fast &&
+ getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ // Emit tail call.
+ if (isTailCall) {
+ assert(((Callee.getOpcode() == ISD::Register &&
+ cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
+ Callee.getOpcode() == ISD::TargetExternalSymbol ||
+ Callee.getOpcode() == ISD::TargetGlobalAddress ||
+ isa<ConstantSDNode>(Callee)) &&
+ "Expecting an global address, external symbol, absolute value or register");
+
+ DAG.getMachineFunction().getFrameInfo()->setHasTailCall();
+ return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
+ }
+
+ // Add a NOP immediately after the branch instruction when using the 64-bit
+ // SVR4 ABI. At link time, if caller and callee are in a different module and
+ // thus have a different TOC, the call will be replaced with a call to a stub
+ // function which saves the current TOC, loads the TOC of the callee and
+ // branches to the callee. The NOP will be replaced with a load instruction
+ // which restores the TOC of the caller from the TOC save slot of the current
+ // stack frame. If caller and callee belong to the same module (and have the
+ // same TOC), the NOP will remain unchanged.
+
+ if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
+ !IsPatchPoint) {
+ if (CallOpc == PPCISD::BCTRL) {
+ // This is a call through a function pointer.
+ // Restore the caller TOC from the save area into R2.
+ // See PrepareCall() for more information about calls through function
+ // pointers in the 64-bit SVR4 ABI.
+ // We are using a target-specific load with r2 hard coded, because the
+ // result of a target-independent load would never go directly into r2,
+ // since r2 is a reserved register (which prevents the register allocator
+ // from allocating it), resulting in an additional register being
+ // allocated and an unnecessary move instruction being generated.
+ CallOpc = PPCISD::BCTRL_LOAD_TOC;
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
+ unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+
+ // The address needs to go after the chain input but before the flag (or
+ // any other variadic arguments).
+ Ops.insert(std::next(Ops.begin()), AddTOC);
+ } else if ((CallOpc == PPCISD::CALL) &&
+ (!isLocalCall(Callee) ||
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_))
+ // Otherwise insert NOP for non-local calls.
+ CallOpc = PPCISD::CALL_NOP;
+ }
+
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+ DAG.getIntPtrConstant(BytesCalleePops, dl, true),
+ InFlag, dl);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+ bool IsPatchPoint = CLI.IsPatchPoint;
+ ImmutableCallSite *CS = CLI.CS;
+
+ if (isTailCall)
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+ Ins, DAG);
+
+ if (!isTailCall && CS && CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
+
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
+ return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
+ else
+ return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
+ }
+
+ return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall, bool IsPatchPoint,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
+ // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
+ // of the 32-bit SVR4 ABI stack frame layout.
+
+ assert((CallConv == CallingConv::C ||
+ CallConv == CallingConv::Fast) && "Unknown calling convention!");
+
+ unsigned PtrByteSize = 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, parameter list area and the part of the local variable space which
+ // contains copies of aggregates which are passed by value.
+
+ // Assign locations to all of the outgoing arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
+ PtrByteSize);
+
+ if (isVarArg) {
+ // Handle fixed and variable vector arguments differently.
+ // Fixed vector arguments go into registers as long as registers are
+ // available. Variable vector arguments always go into memory.
+ unsigned NumArgs = Outs.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ bool Result;
+
+ if (Outs[i].IsFixed) {
+ Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+ CCInfo);
+ } else {
+ Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+ ArgFlags, CCInfo);
+ }
+
+ if (Result) {
+#ifndef NDEBUG
+ errs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << "\n";
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+ } else {
+ // All arguments are treated the same.
+ CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
+ }
+
+ // Assign locations to all of the outgoing aggregate by value arguments.
+ SmallVector<CCValAssign, 16> ByValArgLocs;
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ ByValArgLocs, *DAG.getContext());
+
+ // Reserve stack space for the allocations in CCInfo.
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+ CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
+
+ // Size of the linkage area, parameter list area and the part of the local
+ // space variable where copies of aggregates which are passed by value are
+ // stored.
+ unsigned NumBytes = CCByValInfo.getNextStackOffset();
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+ dl);
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be moved somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ bool seenFloatArg = false;
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, j = 0, e = ArgLocs.size();
+ i != e;
+ ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ if (Flags.isByVal()) {
+ // Argument is an aggregate which is passed by value, thus we need to
+ // create a copy of it in the local variable space of the current stack
+ // frame (which is the stack frame of the caller) and pass the address of
+ // this copy to the callee.
+ assert((j < ByValArgLocs.size()) && "Index out of bounds!");
+ CCValAssign &ByValVA = ByValArgLocs[j++];
+ assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
+
+ // Memory reserved in the local variable space of the callers stack frame.
+ unsigned LocMemOffset = ByValVA.getLocMemOffset();
+
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
+ StackPtr, PtrOff);
+
+ // Create a copy of the argument in the local area of the current
+ // stack frame.
+ SDValue MemcpyCall =
+ CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1),
+ SDLoc(MemcpyCall));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+
+ // Pass the address of the aggregate copy on the stack either in a
+ // physical register or in the parameter list area of the current stack
+ // frame to the callee.
+ Arg = PtrOff;
+ }
+
+ if (VA.isRegLoc()) {
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
+
+ seenFloatArg |= VA.getLocVT().isFloatingPoint();
+ // Put argument in a physical register.
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ // Put argument in the parameter list area of the current stack frame.
+ assert(VA.isMemLoc());
+ unsigned LocMemOffset = VA.getLocMemOffset();
+
+ if (!isTailCall) {
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
+ StackPtr, PtrOff);
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ } else {
+ // Calculate and remember argument location.
+ CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
+ TailCallArguments);
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Set CR bit 6 to true if this is a vararg call with floating args passed in
+ // registers.
+ if (isVarArg) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, InFlag };
+
+ Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
+ dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
+
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
+ false, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
+ /* unused except on PPC64 ELFv1 */ false, DAG,
+ RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+ NumBytes, Ins, InVals, CS);
+}
+
+// Copy an argument into memory, being careful to do this outside the
+// call sequence for the call to which the argument belongs.
+SDValue
+PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+ SDValue CallSeqStart,
+ ISD::ArgFlagsTy Flags,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+ // The MEMCPY must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1),
+ SDLoc(MemcpyCall));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ return NewCallSeqStart;
+}
+
+SDValue
+PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall, bool IsPatchPoint,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
+
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ unsigned NumOps = Outs.size();
+ bool hasNest = false;
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ unsigned PtrByteSize = 8;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+ "fastcc not supported on varargs functions");
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
+ // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
+ // area is 32 bytes reserved space for [SP][CR][LR][TOC].
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ unsigned NumBytes = LinkageSize;
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned &QFPR_idx = FPR_idx;
+
+ static const MCPhysReg GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ static const MCPhysReg VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
+
+ const unsigned NumGPRs = array_lengthof(GPR);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+ const unsigned NumQFPRs = NumFPRs;
+
+ // When using the fast calling convention, we don't provide backing for
+ // arguments that will be in registers.
+ unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
+
+ // Add up all the space actually used.
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ EVT OrigVT = Outs[i].ArgVT;
+
+ if (Flags.isNest())
+ continue;
+
+ if (CallConv == CallingConv::Fast) {
+ if (Flags.isByVal())
+ NumGPRsUsed += (Flags.getByValSize()+7)/8;
+ else
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ if (++NumGPRsUsed <= NumGPRs)
+ continue;
+ break;
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
+ case MVT::v1i128:
+ if (++NumVRsUsed <= NumVRs)
+ continue;
+ break;
+ case MVT::v4f32:
+ // When using QPX, this is handled like a FP register, otherwise, it
+ // is an Altivec register.
+ if (Subtarget.hasQPX()) {
+ if (++NumFPRsUsed <= NumFPRs)
+ continue;
+ } else {
+ if (++NumVRsUsed <= NumVRs)
+ continue;
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v4f64: // QPX
+ case MVT::v4i1: // QPX
+ if (++NumFPRsUsed <= NumFPRs)
+ continue;
+ break;
+ }
+ }
+
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ NumBytes = ((NumBytes + Align - 1) / Align) * Align;
+
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ if (Flags.isInConsecutiveRegsLast())
+ NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ }
+
+ unsigned NumBytesActuallyUsed = NumBytes;
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // To protect arguments on the stack from being clobbered in a tail call,
+ // force all the loads to happen before doing any other lowering.
+ if (isTailCall)
+ Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+ dl);
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = LinkageSize;
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ EVT OrigVT = Outs[i].ArgVT;
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ // We re-align the argument offset for each argument, except when using the
+ // fast calling convention, when we need to make sure we do that only when
+ // we'll actually use a stack slot.
+ auto ComputePtrOff = [&]() {
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+
+ PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ };
+
+ if (CallConv != CallingConv::Fast) {
+ ComputePtrOff();
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, NumGPRs);
+ }
+
+ // Promote integers to 64-bit values.
+ if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
+ if (Flags.isByVal()) {
+ // Note: Size includes alignment padding, so
+ // struct x { short a; char b; }
+ // will have Size = 4. With #pragma pack(1), it will have Size = 3.
+ // These are the proper values we need for right-justifying the
+ // aggregate in a parameter register.
+ unsigned Size = Flags.getByValSize();
+
+ // An empty aggregate parameter takes up no storage and no
+ // registers.
+ if (Size == 0)
+ continue;
+
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
+ // All aggregates smaller than 8 bytes must be passed right-justified.
+ if (Size==1 || Size==2 || Size==4) {
+ EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ MachinePointerInfo(), VT,
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+ }
+
+ if (GPR_idx == NumGPRs && Size < 8) {
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
+ PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+
+ // FIXME: The above statement is likely due to a misunderstanding of the
+ // documents. All arguments must be copied into the parameter area BY
+ // THE CALLEE in the event that the callee takes the address of any
+ // formal argument. That has not yet been implemented. However, it is
+ // reasonable to use the stack area as a staging area for the register
+ // load.
+
+ // Skip this for small aggregates, as we will use the same slot for a
+ // right-justified copy, below.
+ if (Size >= 8)
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // When a register is available, pass a small aggregate right-justified.
+ if (Size < 8 && GPR_idx != NumGPRs) {
+ // The easiest way to get this right-justified in a register
+ // is to copy the structure into the rightmost portion of a
+ // local variable slot, then load the whole slot into the
+ // register.
+ // FIXME: The memcpy seems to produce pretty awful code for
+ // small aggregates, particularly for packed ones.
+ // FIXME: It would be preferable to use the slot in the
+ // parameter save area instead of a new local variable.
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // Load the slot into the register.
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ // Done with this argument.
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+
+ // For aggregates larger than PtrByteSize, copy the pieces of the
+ // object that fit into registers from the parameter save area.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ if (Flags.isNest()) {
+ // The 'nest' parameter, if any, is passed in R11.
+ RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
+ hasNest = true;
+ break;
+ }
+
+ // These can be scalar arguments or elements of an integer array type
+ // passed directly. Clang may use those instead of "byval" aggregate
+ // types to avoid forcing arguments to memory unnecessarily.
+ if (GPR_idx != NumGPRs) {
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ if (CallConv == CallingConv::Fast)
+ ArgOffset += PtrByteSize;
+ }
+ if (CallConv != CallingConv::Fast)
+ ArgOffset += PtrByteSize;
+ break;
+ case MVT::f32:
+ case MVT::f64: {
+ // These can be scalar arguments or elements of a float array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // float aggregates.
+
+ // Named arguments go into FPRs first, and once they overflow, the
+ // remaining arguments go into GPRs and then the parameter save area.
+ // Unnamed arguments for vararg functions always go to GPRs and
+ // then the parameter save area. For now, put all arguments to vararg
+ // routines always in both locations (FPR *and* GPR or stack slot).
+ bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+ bool NeededLoad = false;
+
+ // First load the argument into the next available FPR.
+ if (FPR_idx != NumFPRs)
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ // Next, load the argument into GPR or stack slot if needed.
+ if (!NeedGPROrStack)
+ ;
+ else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
+ // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
+ // once we support fp <-> gpr moves.
+
+ // In the non-vararg case, this can only ever happen in the
+ // presence of f32 array types, since otherwise we never run
+ // out of FPRs before running out of GPRs.
+ SDValue ArgVal;
+
+ // Double values are always passed in a single GPR.
+ if (Arg.getValueType() != MVT::f32) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+
+ // Non-array float values are extended and passed in a GPR.
+ } else if (!Flags.isInConsecutiveRegs()) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+
+ // If we have an array of floats, we collect every odd element
+ // together with its predecessor into one GPR.
+ } else if (ArgOffset % PtrByteSize != 0) {
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
+ Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ if (!isLittleEndian)
+ std::swap(Lo, Hi);
+ ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+
+ // The final element, if even, goes into the first half of a GPR.
+ } else if (Flags.isInConsecutiveRegsLast()) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+ if (!isLittleEndian)
+ ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
+ DAG.getConstant(32, dl, MVT::i32));
+
+ // Non-final even elements are skipped; they will be handled
+ // together the with subsequent argument on the next go-around.
+ } else
+ ArgVal = SDValue();
+
+ if (ArgVal.getNode())
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
+ // Single-precision floating-point values are mapped to the
+ // second (rightmost) word of the stack doubleword.
+ if (Arg.getValueType() == MVT::f32 &&
+ !isLittleEndian && !Flags.isInConsecutiveRegs()) {
+ SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ }
+
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+
+ NeededLoad = true;
+ }
+ // When passing an array of floats, the array occupies consecutive
+ // space in the argument area; only round up to the next doubleword
+ // at the end of the array. Otherwise, each float takes 8 bytes.
+ if (CallConv != CallingConv::Fast || NeededLoad) {
+ ArgOffset += (Arg.getValueType() == MVT::f32 &&
+ Flags.isInConsecutiveRegs()) ? 4 : 8;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ }
+ break;
+ }
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
+ case MVT::v1i128:
+ if (!Subtarget.hasQPX()) {
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
+
+ // For a varargs call, named arguments go into VRs or on the stack as
+ // usual; unnamed arguments always go to the stack or the corresponding
+ // GPRs when within range. For now, we always put the value in both
+ // locations (or even all three).
+ if (isVarArg) {
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+
+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+ Arg.getSimpleValueType() == MVT::v2i64) ?
+ VSRH[VR_idx] : VR[VR_idx];
+ ++VR_idx;
+
+ RegsToPass.push_back(std::make_pair(VReg, Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, dl, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params go into VRs or on the stack.
+ if (VR_idx != NumVRs) {
+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+ Arg.getSimpleValueType() == MVT::v2i64) ?
+ VSRH[VR_idx] : VR[VR_idx];
+ ++VR_idx;
+
+ RegsToPass.push_back(std::make_pair(VReg, Arg));
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ if (CallConv == CallingConv::Fast)
+ ArgOffset += 16;
+ }
+
+ if (CallConv != CallingConv::Fast)
+ ArgOffset += 16;
+ break;
+ } // not QPX
+
+ assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
+ "Invalid QPX parameter type");
+
+ /* fall through */
+ case MVT::v4f64:
+ case MVT::v4i1: {
+ bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
+ if (isVarArg) {
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (QFPR_idx != NumQFPRs) {
+ SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
+ Store, PtrOff, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
+ }
+ ArgOffset += (IsF32 ? 16 : 32);
+ for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, dl, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs QPX params go into registers or on the stack.
+ if (QFPR_idx != NumQFPRs) {
+ RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ if (CallConv == CallingConv::Fast)
+ ArgOffset += (IsF32 ? 16 : 32);
+ }
+
+ if (CallConv != CallingConv::Fast)
+ ArgOffset += (IsF32 ? 16 : 32);
+ break;
+ }
+ }
+ }
+
+ assert(NumBytesActuallyUsed == ArgOffset);
+ (void)NumBytesActuallyUsed;
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
+ // Check if this is an indirect call (MTCTR/BCTRL).
+ // See PrepareCall() for more information about calls through function
+ // pointers in the 64-bit SVR4 ABI.
+ if (!isTailCall && !IsPatchPoint &&
+ !isFunctionGlobalAddress(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) {
+ // Load r2 into a virtual register and store it to the TOC save area.
+ setUsesTOCBasePtr(DAG);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+ // TOC save area offset.
+ unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ Chain = DAG.getStore(
+ Val.getValue(1), dl, Val, AddPtr,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset),
+ false, false, 0);
+ // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
+ // This does not mean the MTCTR instruction must use R12; it's easier
+ // to model this as an extra parameter, so do that.
+ if (isELFv2ABI && !IsPatchPoint)
+ RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
+ FPOp, true, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, hasNest,
+ DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
+ SPDiff, NumBytes, Ins, InVals, CS);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall, bool IsPatchPoint,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
+
+ unsigned NumOps = Outs.size();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ bool isPPC64 = PtrVT == MVT::i64;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ unsigned NumBytes = LinkageSize;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ unsigned nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // To protect arguments on the stack from being clobbered in a tail call,
+ // force all the loads to happen before doing any other lowering.
+ if (isTailCall)
+ Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
+ dl);
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ const unsigned NumGPRs = array_lengthof(GPR_32);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ // On PPC64, promote integers to 64-bit values.
+ if (isPPC64 && Arg.getValueType() == MVT::i32) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
+ if (Flags.isByVal()) {
+ unsigned Size = Flags.getByValSize();
+ // Very small objects are passed right-justified. Everything else is
+ // passed left-justified.
+ if (Size==1 || Size==2) {
+ EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ MachinePointerInfo(), VT,
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ ArgOffset += PtrByteSize;
+ } else {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
+ PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
+ // copy the pieces of the object that fit into registers from the
+ // parameter save area.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != NumGPRs) {
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
+
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += PtrByteSize;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ if (isVarArg) {
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+
+ // Float varargs are always shadowed in available integer registers
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
+ SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ } else {
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+ // GPRs.
+ if (GPR_idx != NumGPRs)
+ ++GPR_idx;
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+ !isPPC64) // PPC64 has 64-bit GPR's obviously :)
+ ++GPR_idx;
+ }
+ } else
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ if (isPPC64)
+ ArgOffset += 8;
+ else
+ ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (isVarArg) {
+ // These go aligned on the stack, or in the corresponding R registers
+ // when within range. The Darwin PPC ABI doc claims they also go in
+ // V registers; in fact gcc does this only for arguments that are
+ // prototyped, not for those that match the ... We do it for all
+ // arguments, seems to work.
+ while (ArgOffset % 16 !=0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != NumGPRs)
+ GPR_idx++;
+ }
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, dl, PtrVT));
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, dl, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params generally go in registers, but have
+ // stack space allocated at the end.
+ if (VR_idx != NumVRs) {
+ // Doesn't have GPR space allocated.
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ } else if (nAltivecParamsAtEnd==0) {
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ break;
+ }
+ }
+ // If all Altivec parameters fit in registers, as they usually do,
+ // they get stack space following the non-Altivec parameters. We
+ // don't track this here because nobody below needs it.
+ // If there are more Altivec parameters than fit in registers emit
+ // the stores here.
+ if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+ unsigned j = 0;
+ // Offset is aligned; skip 1st 12 params which go in V registers.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ ArgOffset += 12*16;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ EVT ArgType = Outs[i].VT;
+ if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
+ ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
+ if (++j > NumVRs) {
+ SDValue PtrOff;
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
+ // On Darwin, R12 must contain the address of an indirect callee. This does
+ // not mean the MTCTR instruction must use R12; it's easier to model this as
+ // an extra parameter, so do that.
+ if (!isTailCall &&
+ !isFunctionGlobalAddress(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee) &&
+ !isBLACompatibleAddress(Callee, DAG))
+ RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
+ PPC::R12), Callee));
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
+ FPOp, true, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
+ /* unused except on PPC64 ELFv1 */ false, DAG,
+ RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+ NumBytes, Ins, InVals, CS);
+}
+
+bool
+PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_PPC);
+}
+
+SDValue
+PPCTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc dl, SelectionDAG &DAG) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Arg = OutVals[i];
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
+}
+
+SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(
+ SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const {
+ SDLoc dl(Op);
+
+ // Get the corect type for integers.
+ EVT IntVT = Op.getValueType();
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+ // Build a DYNAREAOFFSET node.
+ SDValue Ops[2] = {Chain, FPSIdx};
+ SDVTList VTs = DAG.getVTList(IntVT);
+ return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
+}
+
+SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ // When we pop the dynamic allocation we need to restore the SP link.
+ SDLoc dl(Op);
+
+ // Get the corect type for pointers.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+
+ // Construct the stack pointer operand.
+ bool isPPC64 = Subtarget.isPPC64();
+ unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
+ SDValue StackPtr = DAG.getRegister(SP, PtrVT);
+
+ // Get the operands for the STACKRESTORE.
+ SDValue Chain = Op.getOperand(0);
+ SDValue SaveSP = Op.getOperand(1);
+
+ // Load the old link SP.
+ SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Restore the stack pointer.
+ Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
+
+ // Store the old link SP.
+ return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+}
+
+SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool isPPC64 = Subtarget.isPPC64();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int RASI = FI->getReturnAddrSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!RASI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
+ // Allocate the frame index for frame pointer save area.
+ RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
+ // Save the result.
+ FI->setReturnAddrSaveIndex(RASI);
+ }
+ return DAG.getFrameIndex(RASI, PtrVT);
+}
+
+SDValue
+PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool isPPC64 = Subtarget.isPPC64();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+ return DAG.getFrameIndex(FPSI, PtrVT);
+}
+
+SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ SDLoc dl(Op);
+
+ // Get the corect type for pointers.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ // Negate the size.
+ SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
+ DAG.getConstant(0, dl, PtrVT), Size);
+ // Construct a node for the frame pointer save index.
+ SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+ // Build a DYNALLOC node.
+ SDValue Ops[3] = { Chain, NegSize, FPSIdx };
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+ return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType().isVector())
+ return LowerVectorLoad(Op, DAG);
+
+ assert(Op.getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 loads");
+
+ // First, load 8 bits into 32 bits, then truncate to 1 bit.
+
+ SDLoc dl(Op);
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ MachineMemOperand *MMO = LD->getMemOperand();
+
+ SDValue NewLD =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
+ BasePtr, MVT::i8, MMO);
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
+
+ SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
+ return DAG.getMergeValues(Ops, dl);
+}
+
+SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getOperand(1).getValueType().isVector())
+ return LowerVectorStore(Op, DAG);
+
+ assert(Op.getOperand(1).getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 stores");
+
+ // First, zero extend to 32 bits, then use a truncating store to 8 bits.
+
+ SDLoc dl(Op);
+ StoreSDNode *ST = cast<StoreSDNode>(Op);
+
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ MachineMemOperand *MMO = ST->getMemOperand();
+
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
+ Value);
+ return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
+}
+
+// FIXME: Remove this once the ANDI glue bug is fixed:
+SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 results");
+
+ SDLoc DL(Op);
+ return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
+ Op.getOperand(0));
+}
+
+/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
+/// possible.
+SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ // Not FP? Not a fsel.
+ if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
+ !Op.getOperand(2).getValueType().isFloatingPoint())
+ return Op;
+
+ // We might be able to do better than this under some circumstances, but in
+ // general, fsel-based lowering of select is a finite-math-only optimization.
+ // For more information, see section F.3 of the 2.06 ISA specification.
+ if (!DAG.getTarget().Options.NoInfsFPMath ||
+ !DAG.getTarget().Options.NoNaNsFPMath)
+ return Op;
+ // TODO: Propagate flags from the select rather than global settings.
+ SDNodeFlags Flags;
+ Flags.setNoInfs(true);
+ Flags.setNoNaNs(true);
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ EVT ResVT = Op.getValueType();
+ EVT CmpVT = Op.getOperand(0).getValueType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
+ SDLoc dl(Op);
+
+ // If the RHS of the comparison is a 0.0, we don't need to do the
+ // subtraction at all.
+ SDValue Sel1;
+ if (isFloatingPointZero(RHS))
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETNE:
+ std::swap(TV, FV);
+ case ISD::SETEQ:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
+ case ISD::SETULT:
+ case ISD::SETLT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
+ }
+
+ SDValue Cmp;
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETNE:
+ std::swap(TV, FV);
+ case ISD::SETEQ:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
+ case ISD::SETULT:
+ case ISD::SETLT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ }
+ return Op;
+}
+
+void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+ SDValue Tmp;
+ switch (Op.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(
+ Op.getOpcode() == ISD::FP_TO_SINT
+ ? PPCISD::FCTIWZ
+ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+ dl, MVT::f64, Src);
+ break;
+ case MVT::i64:
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
+ "i64 FP_TO_UINT is supported only with FPCVT");
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ,
+ dl, MVT::f64, Src);
+ break;
+ }
+
+ // Convert the FP value to an int value through memory.
+ bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
+ (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
+ SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
+ int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+
+ // Emit a store to the stack slot.
+ SDValue Chain;
+ if (i32Stack) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
+ SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
+ Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
+ } else
+ Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+ MPI, false, false, 0);
+
+ // Result is a load from the stack slot. If loading 4 bytes, make sure to
+ // add in a bias.
+ if (Op.getValueType() == MVT::i32 && !i32Stack) {
+ FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
+ DAG.getConstant(4, dl, FIPtr.getValueType()));
+ MPI = MPI.getWithOffset(4);
+ }
+
+ RLI.Chain = Chain;
+ RLI.Ptr = FIPtr;
+ RLI.MPI = MPI;
+}
+
+/// \brief Custom lowers floating point to integer conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
+ SDValue Src = Op.getOperand(0);
+
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+ SDValue Tmp;
+ switch (Op.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(
+ Op.getOpcode() == ISD::FP_TO_SINT
+ ? PPCISD::FCTIWZ
+ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+ dl, MVT::f64, Src);
+ Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
+ break;
+ case MVT::i64:
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
+ "i64 FP_TO_UINT is supported only with FPCVT");
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ,
+ dl, MVT::f64, Src);
+ Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
+ break;
+ }
+ return Tmp;
+}
+
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const {
+ if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
+ return LowerFP_TO_INTDirectMove(Op, DAG, dl);
+
+ ReuseLoadInfo RLI;
+ LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
+
+ return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
+ false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
+ RLI.Ranges);
+}
+
+// We're trying to insert a regular store, S, and then a load, L. If the
+// incoming value, O, is a load, we might just be able to have our load use the
+// address used by O. However, we don't know if anything else will store to
+// that address before we can load from it. To prevent this situation, we need
+// to insert our load, L, into the chain as a peer of O. To do this, we give L
+// the same chain operand as O, we create a token factor from the chain results
+// of O and L, and we replace all uses of O's chain result with that token
+// factor (see spliceIntoChain below for this last part).
+bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
+ ReuseLoadInfo &RLI,
+ SelectionDAG &DAG,
+ ISD::LoadExtType ET) const {
+ SDLoc dl(Op);
+ if (ET == ISD::NON_EXTLOAD &&
+ (Op.getOpcode() == ISD::FP_TO_UINT ||
+ Op.getOpcode() == ISD::FP_TO_SINT) &&
+ isOperationLegalOrCustom(Op.getOpcode(),
+ Op.getOperand(0).getValueType())) {
+
+ LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
+ return true;
+ }
+
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
+ if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
+ LD->isNonTemporal())
+ return false;
+ if (LD->getMemoryVT() != MemVT)
+ return false;
+
+ RLI.Ptr = LD->getBasePtr();
+ if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) {
+ assert(LD->getAddressingMode() == ISD::PRE_INC &&
+ "Non-pre-inc AM on PPC?");
+ RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
+ LD->getOffset());
+ }
+
+ RLI.Chain = LD->getChain();
+ RLI.MPI = LD->getPointerInfo();
+ RLI.IsInvariant = LD->isInvariant();
+ RLI.Alignment = LD->getAlignment();
+ RLI.AAInfo = LD->getAAInfo();
+ RLI.Ranges = LD->getRanges();
+
+ RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
+ return true;
+}
+
+// Given the head of the old chain, ResChain, insert a token factor containing
+// it and NewResChain, and make users of ResChain now be users of that token
+// factor.
+void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
+ SDValue NewResChain,
+ SelectionDAG &DAG) const {
+ if (!ResChain)
+ return;
+
+ SDLoc dl(NewResChain);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ NewResChain, DAG.getUNDEF(MVT::Other));
+ assert(TF.getNode() != NewResChain.getNode() &&
+ "A new TF really is required here");
+
+ DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
+ DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
+}
+
+/// \brief Custom lowers integer to floating point conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
+ assert((Op.getValueType() == MVT::f32 ||
+ Op.getValueType() == MVT::f64) &&
+ "Invalid floating point type as target of conversion");
+ assert(Subtarget.hasFPCVT() &&
+ "Int to FP conversions with direct moves require FPCVT");
+ SDValue FP;
+ SDValue Src = Op.getOperand(0);
+ bool SinglePrec = Op.getValueType() == MVT::f32;
+ bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
+ bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
+ unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
+ (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
+
+ if (WordInt) {
+ FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
+ dl, MVT::f64, Src);
+ FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+ }
+ else {
+ FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
+ FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+ }
+
+ return FP;
+}
+
+SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+
+ if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
+ if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
+ return SDValue();
+
+ SDValue Value = Op.getOperand(0);
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
+ FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, FPHalfs, FPHalfs,
+ FPHalfs, FPHalfs);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ if (Op.getValueType() != MVT::v4f64)
+ Value = DAG.getNode(ISD::FP_ROUND, dl,
+ Op.getValueType(), Value,
+ DAG.getIntPtrConstant(1, dl));
+ return Value;
+ }
+
+ // Don't handle ppc_fp128 here; let it be lowered to a libcall.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+
+ if (Op.getOperand(0).getValueType() == MVT::i1)
+ return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
+ DAG.getConstantFP(1.0, dl, Op.getValueType()),
+ DAG.getConstantFP(0.0, dl, Op.getValueType()));
+
+ // If we have direct moves, we can do all the conversion, skip the store/load
+ // however, without FPCVT we can't do most conversions.
+ if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
+ return LowerINT_TO_FPDirectMove(Op, DAG, dl);
+
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
+ "UINT_TO_FP is supported only with FPCVT");
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
+ : PPCISD::FCFIDS)
+ : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
+ : PPCISD::FCFID);
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? MVT::f32
+ : MVT::f64;
+
+ if (Op.getOperand(0).getValueType() == MVT::i64) {
+ SDValue SINT = Op.getOperand(0);
+ // When converting to single-precision, we actually need to convert
+ // to double-precision first and then round to single-precision.
+ // To avoid double-rounding effects during that operation, we have
+ // to prepare the input operand. Bits that might be truncated when
+ // converting to double-precision are replaced by a bit that won't
+ // be lost at this stage, but is below the single-precision rounding
+ // position.
+ //
+ // However, if -enable-unsafe-fp-math is in effect, accept double
+ // rounding to avoid the extra overhead.
+ if (Op.getValueType() == MVT::f32 &&
+ !Subtarget.hasFPCVT() &&
+ !DAG.getTarget().Options.UnsafeFPMath) {
+
+ // Twiddle input to make sure the low 11 bits are zero. (If this
+ // is the case, we are guaranteed the value will fit into the 53 bit
+ // mantissa of an IEEE double-precision value without rounding.)
+ // If any of those low 11 bits were not zero originally, make sure
+ // bit 12 (value 2048) is set instead, so that the final rounding
+ // to single-precision gets the correct result.
+ SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ SINT, DAG.getConstant(2047, dl, MVT::i64));
+ Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Round, DAG.getConstant(2047, dl, MVT::i64));
+ Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
+ Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ Round, DAG.getConstant(-2048, dl, MVT::i64));
+
+ // However, we cannot use that value unconditionally: if the magnitude
+ // of the input value is small, the bit-twiddling we did above might
+ // end up visibly changing the output. Fortunately, in that case, we
+ // don't need to twiddle bits since the original input will convert
+ // exactly to double-precision floating-point already. Therefore,
+ // construct a conditional to use the original value if the top 11
+ // bits are all sign-bit copies, and use the rounded value computed
+ // above otherwise.
+ SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
+ SINT, DAG.getConstant(53, dl, MVT::i32));
+ Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Cond, DAG.getConstant(1, dl, MVT::i64));
+ Cond = DAG.getSetCC(dl, MVT::i32,
+ Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
+
+ SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
+ }
+
+ ReuseLoadInfo RLI;
+ SDValue Bits;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
+ Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
+ false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
+ RLI.Ranges);
+ spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+ } else if (Subtarget.hasLFIWAX() &&
+ canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
+ Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
+ DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i32, MMO);
+ spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+ } else if (Subtarget.hasFPCVT() &&
+ canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
+ Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
+ DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i32, MMO);
+ spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+ } else if (((Subtarget.hasLFIWAX() &&
+ SINT.getOpcode() == ISD::SIGN_EXTEND) ||
+ (Subtarget.hasFPCVT() &&
+ SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
+ SINT.getOperand(0).getValueType() == MVT::i32) {
+ MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store = DAG.getStore(
+ DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+
+ RLI.Ptr = FIdx;
+ RLI.Chain = Store;
+ RLI.MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+ RLI.Alignment = 4;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
+ Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
+ PPCISD::LFIWZX : PPCISD::LFIWAX,
+ dl, DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i32, MMO);
+ } else
+ Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
+
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
+
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
+ FP = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
+ return FP;
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "Unhandled INT_TO_FP type in custom expander!");
+ // Since we only generate this in 64-bit mode, we can take advantage of
+ // 64-bit registers. In particular, sign extend the input value into the
+ // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+ // then lfd it and fcfid it.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+
+ SDValue Ld;
+ if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
+ ReuseLoadInfo RLI;
+ bool ReusingLoad;
+ if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
+ DAG))) {
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store = DAG.getStore(
+ DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+
+ RLI.Ptr = FIdx;
+ RLI.Chain = Store;
+ RLI.MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+ RLI.Alignment = 4;
+ }
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
+ Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::LFIWZX : PPCISD::LFIWAX,
+ dl, DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i32, MMO);
+ if (ReusingLoad)
+ spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
+ } else {
+ assert(Subtarget.isPPC64() &&
+ "i32->FP without LFIWAX supported only on PPC64");
+
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
+ Op.getOperand(0));
+
+ // STD the extended value into the stack slot.
+ SDValue Store = DAG.getStore(
+ DAG.getEntryNode(), dl, Ext64, FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, 0);
+
+ // Load the value as a double.
+ Ld = DAG.getLoad(
+ MVT::f64, dl, Store, FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, false, 0);
+ }
+
+ // FCFID it and return it.
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
+ DAG.getIntPtrConstant(0, dl));
+ return FP;
+}
+
+SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ /*
+ The rounding mode is in bits 30:31 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to 0
+ 10 Round to +inf
+ 11 Round to -inf
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT VT = Op.getValueType();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+
+ // Save FP Control Word to register
+ EVT NodeTys[] = {
+ MVT::f64, // return register
+ MVT::Glue // unused in this context
+ };
+ SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
+
+ // Save FP register to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
+ StackSlot, MachinePointerInfo(), false, false,0);
+
+ // Load FP Control Word from low 32 bits of stack slot.
+ SDValue Four = DAG.getConstant(4, dl, PtrVT);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
+ SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ CWD, DAG.getConstant(3, dl, MVT::i32));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::XOR, dl, MVT::i32,
+ CWD, DAG.getConstant(3, dl, MVT::i32)),
+ DAG.getConstant(3, dl, MVT::i32)),
+ DAG.getConstant(1, dl, MVT::i32));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+}
+
+SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ SDLoc dl(Op);
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SHL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, dl, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, dl, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
+ SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, dl, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, dl, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
+ SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRA!");
+
+ // Expand into a bunch of logical ops, followed by a select_cc.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, dl, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, dl, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
+ SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
+ SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
+ Tmp4, Tmp6, ISD::SETLE);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, dl);
+}
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering.
+//
+
+/// BuildSplatI - Build a canonical splati of Val with an element size of
+/// SplatSize. Cast the result to VT.
+static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
+
+ static const MVT VTys[] = { // canonical VT to use for each size.
+ MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+ };
+
+ EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+
+ // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
+ if (Val == -1)
+ SplatSize = 1;
+
+ EVT CanonicalVT = VTys[SplatSize-1];
+
+ // Build a canonical splat for this value.
+ SDValue Elt = DAG.getConstant(Val, dl, MVT::i32);
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
+}
+
+/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
+ SelectionDAG &DAG, SDLoc dl,
+ EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, dl, MVT::i32), Op);
+}
+
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, SDLoc dl,
+ EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
+}
+
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
+ SDValue Op2, SelectionDAG &DAG,
+ SDLoc dl, EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
+}
+
+/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
+/// amount. The result has the specified value type.
+static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
+ EVT VT, SelectionDAG &DAG, SDLoc dl) {
+ // Force LHS/RHS to be the right type.
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
+
+ int Ops[16];
+ for (unsigned i = 0; i != 16; ++i)
+ Ops[i] = i + Amt;
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it. If we CAN select this case, and if it
+// selects to a single instruction, return Op. Otherwise, if we can codegen
+// this case more efficiently than a constant pool load, lower it to the
+// sequence of ops that should be used.
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+
+ if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
+ // We first build an i32 vector, load it into a QPX register,
+ // then convert it to a floating-point vector and compare it
+ // to a zero vector to get the boolean result.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ assert(BVN->getNumOperands() == 4 &&
+ "BUILD_VECTOR for v4i1 does not have 4 operands");
+
+ bool IsConst = true;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
+ IsConst = false;
+ break;
+ }
+ }
+
+ if (IsConst) {
+ Constant *One =
+ ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
+ Constant *NegOne =
+ ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
+
+ SmallVector<Constant*, 4> CV(4, NegOne);
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
+ CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
+ else if (isNullConstant(BVN->getOperand(i)))
+ continue;
+ else
+ CV[i] = One;
+ }
+
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
+ 16 /* alignment */);
+
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(DAG.getEntryNode());
+ Ops.push_back(CPIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::v4i1);
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ return DAG.getMemIntrinsicNode(
+ PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ }
+
+ SmallVector<SDValue, 4> Stores;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ unsigned Offset = 4*i;
+ SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
+ if (StoreSize > 4) {
+ Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+ BVN->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
+ MVT::i32, false, false, 0));
+ } else {
+ SDValue StoreValue = BVN->getOperand(i);
+ if (StoreSize < 4)
+ StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
+
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+ StoreValue, Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, 0));
+ }
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty())
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Now load from v4i32 into the QPX register; this will extend it to
+ // v4i64 but not yet convert it to a floating point. Nevertheless, this
+ // is typed as v4f64 because the QPX register integer states are not
+ // explicitly represented.
+
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(StoreChain);
+ Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32));
+ Ops.push_back(FIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::v4f64);
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+ LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
+ LoadedVect);
+
+ SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::f64);
+ FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+ FPZeros, FPZeros, FPZeros, FPZeros);
+
+ return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
+ }
+
+ // All other QPX vectors are handled by generic code.
+ if (Subtarget.hasQPX())
+ return SDValue();
+
+ // Check if this is a splat of a constant value.
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
+ SplatBitSize > 32)
+ return SDValue();
+
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ unsigned SplatUndef = APSplatUndef.getZExtValue();
+ unsigned SplatSize = SplatBitSize / 8;
+
+ // First, handle single instruction cases.
+
+ // All zeros?
+ if (SplatBits == 0) {
+ // Canonicalize all zero vectors to be v4i32.
+ if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+ SDValue Z = DAG.getConstant(0, dl, MVT::i32);
+ Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
+ Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
+ }
+ return Op;
+ }
+
+ // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+ int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
+ (32-SplatBitSize));
+ if (SextVal >= -16 && SextVal <= 15)
+ return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+
+ // Two instruction sequences.
+
+ // If this value is in the range [-32,30] and is even, use:
+ // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
+ // If this value is in the range [17,31] and is odd, use:
+ // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
+ // If this value is in the range [-31,-17] and is odd, use:
+ // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
+ // Note the last two are three-instruction sequences.
+ if (SextVal >= -32 && SextVal <= 31) {
+ // To avoid having these optimizations undone by constant folding,
+ // we convert to a pseudo that will be expanded later into one of
+ // the above forms.
+ SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
+ EVT VT = (SplatSize == 1 ? MVT::v16i8 :
+ (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
+ SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
+ SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
+ if (VT == Op.getValueType())
+ return RetVal;
+ else
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
+ }
+
+ // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
+ // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
+ // for fneg/fabs.
+ if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+ // Make -1 and vspltisw -1:
+ SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+
+ // Make the VSLW intrinsic, computing 0x8000_0000.
+ SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+ OnesV, DAG, dl);
+
+ // xor by OnesV to invert it.
+ Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // Check to see if this is a wide variety of vsplti*, binop self cases.
+ static const signed char SplatCsts[] = {
+ -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+ -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+ };
+
+ for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
+ // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+ // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
+ int i = SplatCsts[idx];
+
+ // Figure out what shift amount will be used by altivec if shifted by i in
+ // this splat size.
+ unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+ // vsplti + shl self.
+ if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+ Intrinsic::ppc_altivec_vslw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + srl self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+ Intrinsic::ppc_altivec_vsrw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + sra self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+ Intrinsic::ppc_altivec_vsraw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + rol self.
+ if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+ ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+ Intrinsic::ppc_altivec_vrlw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // t = vsplti c, result = vsldoi t, t, 1
+ if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
+ return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 2
+ if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
+ return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 3
+ if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
+ return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
+ }
+ }
+
+ return SDValue();
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ SDLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VMRGHW,
+ OP_VMRGLW,
+ OP_VSPLTISW0,
+ OP_VSPLTISW1,
+ OP_VSPLTISW2,
+ OP_VSPLTISW3,
+ OP_VSLDOI4,
+ OP_VSLDOI8,
+ OP_VSLDOI12
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+
+ int ShufIdxs[16];
+ switch (OpNum) {
+ default: llvm_unreachable("Unknown i32 permute!");
+ case OP_VMRGHW:
+ ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
+ ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
+ ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
+ ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
+ break;
+ case OP_VMRGLW:
+ ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
+ ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
+ ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
+ ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
+ break;
+ case OP_VSPLTISW0:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+0;
+ break;
+ case OP_VSPLTISW1:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+4;
+ break;
+ case OP_VSPLTISW2:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+8;
+ break;
+ case OP_VSPLTISW3:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+12;
+ break;
+ case OP_VSLDOI4:
+ return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI8:
+ return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI12:
+ return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
+ }
+ EVT VT = OpLHS.getValueType();
+ OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
+ OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
+/// is a shuffle we can handle in a single instruction, return it. Otherwise,
+/// return the code it can be lowered into. Worst case, it can always be
+/// lowered into a vperm.
+SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ EVT VT = Op.getValueType();
+ bool isLittleEndian = Subtarget.isLittleEndian();
+
+ if (Subtarget.hasQPX()) {
+ if (VT.getVectorNumElements() != 4)
+ return SDValue();
+
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
+ if (AlignIdx != -1) {
+ return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
+ DAG.getConstant(AlignIdx, dl, MVT::i32));
+ } else if (SVOp->isSplat()) {
+ int SplatIdx = SVOp->getSplatIndex();
+ if (SplatIdx >= 4) {
+ std::swap(V1, V2);
+ SplatIdx -= 4;
+ }
+
+ // FIXME: If SplatIdx == 0 and the input came from a load, then there is
+ // nothing to do.
+
+ return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
+ DAG.getConstant(SplatIdx, dl, MVT::i32));
+ }
+
+ // Lower this into a qvgpci/qvfperm pair.
+
+ // Compute the qvgpci literal
+ unsigned idx = 0;
+ for (unsigned i = 0; i < 4; ++i) {
+ int m = SVOp->getMaskElt(i);
+ unsigned mm = m >= 0 ? (unsigned) m : i;
+ idx |= mm << (3-i)*3;
+ }
+
+ SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
+ DAG.getConstant(idx, dl, MVT::i32));
+ return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
+ }
+
+ // Cases that are handled by instructions that take permute immediates
+ // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+ // selected by the instruction selector.
+ if (V2.getOpcode() == ISD::UNDEF) {
+ if (PPC::isSplatShuffleMask(SVOp, 1) ||
+ PPC::isSplatShuffleMask(SVOp, 2) ||
+ PPC::isSplatShuffleMask(SVOp, 4) ||
+ PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
+ (Subtarget.hasP8Altivec() && (
+ PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
+ return Op;
+ }
+ }
+
+ // Altivec has a variety of "shuffle immediates" that take two vector inputs
+ // and produce a fixed permutation. If any of these match, do not lower to
+ // VPERM.
+ unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
+ if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
+ (Subtarget.hasP8Altivec() && (
+ PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
+ PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
+ return Op;
+
+ // Check to see if this is a shuffle of 4-byte values. If so, we can use our
+ // perfect shuffle table to emit an optimal matching sequence.
+ ArrayRef<int> PermMask = SVOp->getMask();
+
+ unsigned PFIndexes[4];
+ bool isFourElementShuffle = true;
+ for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
+ unsigned EltNo = 8; // Start out undef.
+ for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
+ if (PermMask[i*4+j] < 0)
+ continue; // Undef, ignore it.
+
+ unsigned ByteSource = PermMask[i*4+j];
+ if ((ByteSource & 3) != j) {
+ isFourElementShuffle = false;
+ break;
+ }
+
+ if (EltNo == 8) {
+ EltNo = ByteSource/4;
+ } else if (EltNo != ByteSource/4) {
+ isFourElementShuffle = false;
+ break;
+ }
+ }
+ PFIndexes[i] = EltNo;
+ }
+
+ // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
+ // perfect shuffle vector to determine if it is cost effective to do this as
+ // discrete instructions, or whether we should use a vperm.
+ // For now, we skip this for little endian until such time as we have a
+ // little-endian perfect shuffle table.
+ if (isFourElementShuffle && !isLittleEndian) {
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ // Determining when to avoid vperm is tricky. Many things affect the cost
+ // of vperm, particularly how many times the perm mask needs to be computed.
+ // For example, if the perm mask can be hoisted out of a loop or is already
+ // used (perhaps because there are multiple permutes with the same shuffle
+ // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
+ // the loop requires an extra register.
+ //
+ // As a compromise, we only emit discrete instructions if the shuffle can be
+ // generated in 3 or fewer operations. When we have loop information
+ // available, if this block is within a loop, we should avoid using vperm
+ // for 3-operation perms and use a constant pool load instead.
+ if (Cost < 3)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+ // vector that will get spilled to the constant pool.
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+ // that it is in input element units, not in bytes. Convert now.
+
+ // For little endian, the order of the input vectors is reversed, and
+ // the permutation mask is complemented with respect to 31. This is
+ // necessary to produce proper semantics with the big-endian-biased vperm
+ // instruction.
+ EVT EltVT = V1.getValueType().getVectorElementType();
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
+
+ for (unsigned j = 0; j != BytesPerElement; ++j)
+ if (isLittleEndian)
+ ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
+ dl, MVT::i32));
+ else
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
+ MVT::i32));
+ }
+
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ ResultMask);
+ if (isLittleEndian)
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V2, V1, VPermMask);
+ else
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V1, V2, VPermMask);
+}
+
+/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
+/// vector comparison. If it is, return true and fill in Opc/isDot with
+/// information about the intrinsic.
+static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
+ bool &isDot, const PPCSubtarget &Subtarget) {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ CompareOpc = -1;
+ isDot = false;
+ switch (IntrinsicID) {
+ default: return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 1;
+ } else
+ return false;
+
+ break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 1;
+ } else
+ return false;
+
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 1;
+ } else
+ return false;
+
+ break;
+ // VSX predicate comparisons use the same infrastructure
+ case Intrinsic::ppc_vsx_xvcmpeqdp_p:
+ case Intrinsic::ppc_vsx_xvcmpgedp_p:
+ case Intrinsic::ppc_vsx_xvcmpgtdp_p:
+ case Intrinsic::ppc_vsx_xvcmpeqsp_p:
+ case Intrinsic::ppc_vsx_xvcmpgesp_p:
+ case Intrinsic::ppc_vsx_xvcmpgtsp_p:
+ if (Subtarget.hasVSX()) {
+ switch (IntrinsicID) {
+ case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
+ case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
+ case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
+ case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
+ case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
+ case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
+ }
+ isDot = 1;
+ }
+ else
+ return false;
+
+ break;
+
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 0;
+ } else
+ return false;
+
+ break;
+ case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 0;
+ } else
+ return false;
+
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtud:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 0;
+ } else
+ return false;
+
+ break;
+ }
+ return true;
+}
+
+/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
+/// lower, do it, otherwise return null.
+SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ // If this is a lowered altivec predicate compare, CompareOpc is set to the
+ // opcode number of the comparison.
+ SDLoc dl(Op);
+ int CompareOpc;
+ bool isDot;
+ if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
+ return SDValue(); // Don't custom lower most intrinsics.
+
+ // If this is a non-dot comparison, make the VCMP node and we are done.
+ if (!isDot) {
+ SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(CompareOpc, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
+ }
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ SDValue Ops[] = {
+ Op.getOperand(2), // LHS
+ Op.getOperand(3), // RHS
+ DAG.getConstant(CompareOpc, dl, MVT::i32)
+ };
+ EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
+
+ // Now that we have the comparison, emit a copy from the CR to a GPR.
+ // This is flagged to the above dot comparison.
+ SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ CompNode.getValue(1));
+
+ // Unpack the result based on how the target uses it.
+ unsigned BitNo; // Bit # of CR6.
+ bool InvertBit; // Invert result?
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Return the value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = false;
+ break;
+ case 1: // Return the inverted value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = true;
+ break;
+ case 2: // Return the value of the LT bit of CR6.
+ BitNo = 2; InvertBit = false;
+ break;
+ case 3: // Return the inverted value of the LT bit of CR6.
+ BitNo = 2; InvertBit = true;
+ break;
+ }
+
+ // Shift the bit into the low position.
+ Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
+ DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
+ // Isolate the bit.
+ Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
+ DAG.getConstant(1, dl, MVT::i32));
+
+ // If we are supposed to, toggle the bit.
+ if (InvertBit)
+ Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
+ DAG.getConstant(1, dl, MVT::i32));
+ return Flags;
+}
+
+SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
+ // instructions), but for smaller types, we need to first extend up to v2i32
+ // before doing going farther.
+ if (Op.getValueType() == MVT::v2i64) {
+ EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ if (ExtVT != MVT::v2i32) {
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
+ DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
+ ExtVT.getVectorElementType(), 4)));
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
+ DAG.getValueType(MVT::v2i32));
+ }
+
+ return Op;
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ // Create a stack slot that is 16-byte aligned.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ // Store the input value into Value#0 of the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Op.getOperand(0), FIdx, MachinePointerInfo(),
+ false, false, 0);
+ // Load it out.
+ return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDNode *N = Op.getNode();
+
+ assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
+ "Unknown extract_vector_elt type");
+
+ SDValue Value = N->getOperand(0);
+
+ // The first part of this is like the store lowering except that we don't
+ // need to track the chain.
+
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+ // understand how to form the extending load.
+ SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
+ FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+ FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ // Now convert to an integer and store.
+ Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
+ Value);
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue StoreChain = DAG.getEntryNode();
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(StoreChain);
+ Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
+ Ops.push_back(Value);
+ Ops.push_back(FIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+ // Extract the value requested.
+ unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, false, 0);
+
+ if (!Subtarget.useCRBits())
+ return IntVal;
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
+}
+
+/// Lowering for QPX v4i1 loads
+SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+ SDValue LoadChain = LN->getChain();
+ SDValue BasePtr = LN->getBasePtr();
+
+ if (Op.getValueType() == MVT::v4f64 ||
+ Op.getValueType() == MVT::v4f32) {
+ EVT MemVT = LN->getMemoryVT();
+ unsigned Alignment = LN->getAlignment();
+
+ // If this load is properly aligned, then it is legal.
+ if (Alignment >= MemVT.getStoreSize())
+ return Op;
+
+ EVT ScalarVT = Op.getValueType().getScalarType(),
+ ScalarMemVT = MemVT.getScalarType();
+ unsigned Stride = ScalarMemVT.getStoreSize();
+
+ SmallVector<SDValue, 8> Vals, LoadChains;
+ for (unsigned Idx = 0; Idx < 4; ++Idx) {
+ SDValue Load;
+ if (ScalarVT != ScalarMemVT)
+ Load =
+ DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
+ BasePtr,
+ LN->getPointerInfo().getWithOffset(Idx*Stride),
+ ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(),
+ LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
+ LN->getAAInfo());
+ else
+ Load =
+ DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
+ LN->getPointerInfo().getWithOffset(Idx*Stride),
+ LN->isVolatile(), LN->isNonTemporal(),
+ LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
+ LN->getAAInfo());
+
+ if (Idx == 0 && LN->isIndexed()) {
+ assert(LN->getAddressingMode() == ISD::PRE_INC &&
+ "Unknown addressing mode on vector load");
+ Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
+ LN->getAddressingMode());
+ }
+
+ Vals.push_back(Load);
+ LoadChains.push_back(Load.getValue(1));
+
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, dl,
+ BasePtr.getValueType()));
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Op.getValueType(), Vals);
+
+ if (LN->isIndexed()) {
+ SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ SDValue RetOps[] = { Value, TF };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
+ assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
+
+ // To lower v4i1 from a byte array, we load the byte elements of the
+ // vector and then reuse the BUILD_VECTOR logic.
+
+ SmallVector<SDValue, 4> VectElmts, VectElmtChains;
+ for (unsigned i = 0; i < 4; ++i) {
+ SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+ VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD,
+ dl, MVT::i32, LoadChain, Idx,
+ LN->getPointerInfo().getWithOffset(i),
+ MVT::i8 /* memory type */,
+ LN->isVolatile(), LN->isNonTemporal(),
+ LN->isInvariant(),
+ 1 /* alignment */, LN->getAAInfo()));
+ VectElmtChains.push_back(VectElmts[i].getValue(1));
+ }
+
+ LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts);
+
+ SDValue RVals[] = { Value, LoadChain };
+ return DAG.getMergeValues(RVals, dl);
+}
+
+/// Lowering for QPX v4i1 stores
+SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+ SDValue StoreChain = SN->getChain();
+ SDValue BasePtr = SN->getBasePtr();
+ SDValue Value = SN->getValue();
+
+ if (Value.getValueType() == MVT::v4f64 ||
+ Value.getValueType() == MVT::v4f32) {
+ EVT MemVT = SN->getMemoryVT();
+ unsigned Alignment = SN->getAlignment();
+
+ // If this store is properly aligned, then it is legal.
+ if (Alignment >= MemVT.getStoreSize())
+ return Op;
+
+ EVT ScalarVT = Value.getValueType().getScalarType(),
+ ScalarMemVT = MemVT.getScalarType();
+ unsigned Stride = ScalarMemVT.getStoreSize();
+
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < 4; ++Idx) {
+ SDValue Ex = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
+ DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Store;
+ if (ScalarVT != ScalarMemVT)
+ Store =
+ DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
+ SN->getPointerInfo().getWithOffset(Idx*Stride),
+ ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(),
+ MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
+ else
+ Store =
+ DAG.getStore(StoreChain, dl, Ex, BasePtr,
+ SN->getPointerInfo().getWithOffset(Idx*Stride),
+ SN->isVolatile(), SN->isNonTemporal(),
+ MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
+
+ if (Idx == 0 && SN->isIndexed()) {
+ assert(SN->getAddressingMode() == ISD::PRE_INC &&
+ "Unknown addressing mode on vector store");
+ Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
+ SN->getAddressingMode());
+ }
+
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, dl,
+ BasePtr.getValueType()));
+ Stores.push_back(Store);
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ if (SN->isIndexed()) {
+ SDValue RetOps[] = { TF, Stores[0].getValue(1) };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ return TF;
+ }
+
+ assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
+ assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
+
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+ // understand how to form the extending load.
+ SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
+ FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+ FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ // Now convert to an integer and store.
+ Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
+ Value);
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(StoreChain);
+ Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
+ Ops.push_back(Value);
+ Ops.push_back(FIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+ // Move data into the byte array.
+ SmallVector<SDValue, 4> Loads, LoadChains;
+ for (unsigned i = 0; i < 4; ++i) {
+ unsigned Offset = 4*i;
+ SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, false, 0));
+ LoadChains.push_back(Loads[i].getValue(1));
+ }
+
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+
+ SmallVector<SDValue, 4> Stores;
+ for (unsigned i = 0; i < 4; ++i) {
+ SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+ Stores.push_back(DAG.getTruncStore(
+ StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
+ MVT::i8 /* memory type */, SN->isNonTemporal(), SN->isVolatile(),
+ 1 /* alignment */, SN->getAAInfo()));
+ }
+
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ return StoreChain;
+}
+
+SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ if (Op.getValueType() == MVT::v4i32) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
+ SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
+
+ SDValue RHSSwap = // = vrlw RHS, 16
+ BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
+
+ // Shrinkify inputs to v8i16.
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
+ RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
+
+ // Low parts multiplied together, generating 32-bit results (we ignore the
+ // top parts).
+ SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
+ LHS, RHS, DAG, dl, MVT::v4i32);
+
+ SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
+ LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
+ // Shift the high parts up 16 bits.
+ HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
+ Neg16, DAG, dl);
+ return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
+ } else if (Op.getValueType() == MVT::v8i16) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
+
+ return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
+ LHS, RHS, Zero, DAG, dl);
+ } else if (Op.getValueType() == MVT::v16i8) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ bool isLittleEndian = Subtarget.isLittleEndian();
+
+ // Multiply the even 8-bit parts, producing 16-bit sums.
+ SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
+
+ // Multiply the odd 8-bit parts, producing 16-bit sums.
+ SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
+
+ // Merge the results together. Because vmuleub and vmuloub are
+ // instructions with a big-endian bias, we must reverse the
+ // element numbering and reverse the meaning of "odd" and "even"
+ // when generating little endian code.
+ int Ops[16];
+ for (unsigned i = 0; i != 8; ++i) {
+ if (isLittleEndian) {
+ Ops[i*2 ] = 2*i;
+ Ops[i*2+1] = 2*i+16;
+ } else {
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
+ }
+ }
+ if (isLittleEndian)
+ return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
+ else
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+ } else {
+ llvm_unreachable("Unknown mul to lower!");
+ }
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG, Subtarget);
+
+ case ISD::VAARG:
+ return LowerVAARG(Op, DAG, Subtarget);
+
+ case ISD::VACOPY:
+ return LowerVACOPY(Op, DAG, Subtarget);
+
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget);
+ case ISD::DYNAMIC_STACKALLOC:
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
+ case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG, Subtarget);
+
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
+ SDLoc(Op));
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+
+ // Lower 64-bit shifts.
+ case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
+ case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
+ case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+
+ // For counter-based loop handling.
+ case ISD::INTRINSIC_W_CHAIN: return SDValue();
+
+ // Frame & Return address.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ }
+}
+
+void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::READCYCLECOUNTER: {
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
+
+ Results.push_back(RTB);
+ Results.push_back(RTB.getValue(1));
+ Results.push_back(RTB.getValue(2));
+ break;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
+ Intrinsic::ppc_is_decremented_ctr_nonzero)
+ break;
+
+ assert(N->getValueType(0) == MVT::i1 &&
+ "Unexpected result type for CTR decrement intrinsic");
+ EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ N->getValueType(0));
+ SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
+ SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
+ N->getOperand(1));
+
+ Results.push_back(NewInt);
+ Results.push_back(NewInt.getValue(1));
+ break;
+ }
+ case ISD::VAARG: {
+ if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
+ return;
+
+ EVT VT = N->getValueType(0);
+
+ if (VT == MVT::i64) {
+ SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
+
+ Results.push_back(NewNode);
+ Results.push_back(NewNode.getValue(1));
+ }
+ return;
+ }
+ case ISD::FP_ROUND_INREG: {
+ assert(N->getValueType(0) == MVT::ppcf128);
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(0, dl));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(1, dl));
+
+ // Add the two halves of the long double in round-to-zero mode.
+ SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
+
+ // We know the low half is about to be thrown away, so just use something
+ // convenient.
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
+ FPreg, FPreg));
+ return;
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ // LowerFP_TO_INT() can only handle f32 and f64.
+ if (N->getOperand(0).getValueType() == MVT::ppcf128)
+ return;
+ Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
+ return;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *Func = Intrinsic::getDeclaration(M, Id);
+ return Builder.CreateCall(Func, {});
+}
+
+// The mappings for emitLeading/TrailingFence is taken from
+// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (Ord == SequentiallyConsistent)
+ return callIntrinsic(Builder, Intrinsic::ppc_sync);
+ if (isAtLeastRelease(Ord))
+ return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+ return nullptr;
+}
+
+Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (IsLoad && isAtLeastAcquire(Ord))
+ return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+ // FIXME: this is too conservative, a dependent branch + isync is enough.
+ // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
+ // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+ // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+ return nullptr;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned AtomicSize,
+ unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch (AtomicSize) {
+ default:
+ llvm_unreachable("Unexpected size of atomic entity");
+ case 1:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 2:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 4:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case 8:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned TmpReg = (!BinOpcode) ? incr :
+ RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
+ : &PPC::GPRCRegClass);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // l[wd]arx dest, ptr
+ // add r0, dest, incr
+ // st[wd]cx. r0, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
+ .addReg(ptrA).addReg(ptrB);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
+ .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ bool is8bit, // operation
+ unsigned BinOpcode) const {
+ // If we support part-word atomic mnemonics, just use them
+ if (Subtarget.hasPartwordAtomics())
+ return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
+
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ // In 64 bit mode we have to use 64 bits for addresses, even though the
+ // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
+ // registers without caring whether they're 32 or 64, but here we're
+ // doing actual arithmetic on the addresses.
+ bool is64bit = Subtarget.isPPC64();
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
+ : &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw incr2, incr, shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // loopMBB:
+ // lwarx tmpDest, ptr
+ // add tmp, tmpDest, incr2
+ // andc tmp2, tmpDest, mask
+ // and tmp3, tmp, mask
+ // or tmp4, tmp3, tmp2
+ // stwcx. tmp4, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ // srw dest, tmpDest, shift
+ if (ptrA != ZeroReg) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
+ .addReg(incr).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
+ .addReg(Incr2Reg).addReg(TmpDestReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
+ .addReg(TmpReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
+ .addReg(Tmp3Reg).addReg(Tmp2Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
+ .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
+ .addReg(ShiftReg);
+ return BB;
+}
+
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = ++MBB->getIterator();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MVT PVT = getPointerTy(MF->getDataLayout());
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // SjLjSetup mainMBB
+ // bl mainMBB
+ // v_restore = 1
+ // b sinkMBB
+ //
+ // mainMBB:
+ // buf[LabelOffset] = LR
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Note that the structure of the jmp_buf used here is not compatible
+ // with that used by libc, and is not designed to be. Specifically, it
+ // stores only those 'reserved' registers that LLVM does not otherwise
+ // understand how to spill. Also, by convention, by the time this
+ // intrinsic is called, Clang has already stored the frame address in the
+ // first slot of the buffer and stack address in the third. Following the
+ // X86 target code, we'll store the jump address in the second slot. We also
+ // need to save the TOC pointer (R2) to handle jumps between shared
+ // libraries, and that will be stored in the fourth slot. The thread
+ // identifier (R13) is not affected.
+
+ // thisMBB:
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
+
+ // Prepare IP either in reg.
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+ unsigned BufReg = MI->getOperand(1).getReg();
+
+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
+ setUsesTOCBasePtr(*MBB->getParent());
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+ .addReg(PPC::X2)
+ .addImm(TOCOffset)
+ .addReg(BufReg);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Naked functions never have a base pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned BaseReg;
+ if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
+ BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
+ else
+ BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
+
+ MIB = BuildMI(*thisMBB, MI, DL,
+ TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
+ .addReg(BaseReg)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ MIB.addRegMask(TRI->getNoPreservedMask());
+
+ BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+ .addMBB(mainMBB);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
+ thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
+
+ // mainMBB:
+ // mainDstReg = 0
+ MIB =
+ BuildMI(mainMBB, DL,
+ TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+ // Store IP
+ if (Subtarget.isPPC64()) {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(PPC::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(thisMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy(MF->getDataLayout());
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+ unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+ unsigned BP =
+ (PVT == MVT::i64)
+ ? PPC::X30
+ : (Subtarget.isSVR4ABI() &&
+ MF->getTarget().getRelocationModel() == Reloc::PIC_
+ ? PPC::R29
+ : PPC::R30);
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
+
+ unsigned BufReg = MI->getOperand(0).getReg();
+
+ // Reload FP (the jumped-to function may not have had a
+ // frame pointer, and if so, then its r31 will be restored
+ // as necessary).
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload IP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload SP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload BP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload TOC
+ if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
+ setUsesTOCBasePtr(*MBB->getParent());
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+ .addImm(TOCOffset)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Jump
+ BuildMI(*MBB, MI, DL,
+ TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+ BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == TargetOpcode::STACKMAP ||
+ MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
+ MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+ // Call lowering should have added an r2 operand to indicate a dependence
+ // on the TOC base pointer value. It can't however, because there is no
+ // way to mark the dependence as implicit there, and so the stackmap code
+ // will confuse it with a regular operand. Instead, add the dependence
+ // here.
+ setUsesTOCBasePtr(*BB->getParent());
+ MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
+ }
+
+ return emitPatchPoint(MI, BB);
+ }
+
+ if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+ return emitEHSjLjSetJmp(MI, BB);
+ } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+ return emitEHSjLjLongJmp(MI, BB);
+ }
+
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+
+ // To "insert" these instructions we actually have to insert their
+ // control-flow patterns.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineFunction *F = BB->getParent();
+
+ if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8)) {
+ SmallVector<MachineOperand, 2> Cond;
+ if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8)
+ Cond.push_back(MI->getOperand(4));
+ else
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
+ Cond.push_back(MI->getOperand(1));
+
+ DebugLoc dl = MI->getDebugLoc();
+ TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
+ Cond, MI->getOperand(2).getReg(),
+ MI->getOperand(3).getReg());
+ } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_QFRC ||
+ MI->getOpcode() == PPC::SELECT_CC_QSRC ||
+ MI->getOpcode() == PPC::SELECT_CC_QBRC ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC ||
+ MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_CC_VSSRC ||
+ MI->getOpcode() == PPC::SELECT_CC_VSRC ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8 ||
+ MI->getOpcode() == PPC::SELECT_F4 ||
+ MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_QFRC ||
+ MI->getOpcode() == PPC::SELECT_QSRC ||
+ MI->getOpcode() == PPC::SELECT_QBRC ||
+ MI->getOpcode() == PPC::SELECT_VRRC ||
+ MI->getOpcode() == PPC::SELECT_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_VSSRC ||
+ MI->getOpcode() == PPC::SELECT_VSRC) {
+ // The incoming instruction knows the destination vreg to set, the
+ // condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ DebugLoc dl = MI->getDebugLoc();
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ if (MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8 ||
+ MI->getOpcode() == PPC::SELECT_F4 ||
+ MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_QFRC ||
+ MI->getOpcode() == PPC::SELECT_QSRC ||
+ MI->getOpcode() == PPC::SELECT_QBRC ||
+ MI->getOpcode() == PPC::SELECT_VRRC ||
+ MI->getOpcode() == PPC::SELECT_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_VSSRC ||
+ MI->getOpcode() == PPC::SELECT_VSRC) {
+ BuildMI(BB, dl, TII->get(PPC::BC))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ } else {
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ }
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(PPC::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ } else if (MI->getOpcode() == PPC::ReadTB) {
+ // To read the 64-bit time-base register on a 32-bit target, we read the
+ // two halves. Should the counter have wrapped while it was being read, we
+ // need to try again.
+ // ...
+ // readLoop:
+ // mfspr Rx,TBU # load from TBU
+ // mfspr Ry,TB # load from TB
+ // mfspr Rz,TBU # load from TBU
+ // cmpw crX,Rx,Rz # check if 'old'='new'
+ // bne readLoop # branch if they're not equal
+ // ...
+
+ MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ DebugLoc dl = MI->getDebugLoc();
+ F->insert(It, readMBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(readMBB);
+ BB = readMBB;
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ unsigned LoReg = MI->getOperand(0).getReg();
+ unsigned HiReg = MI->getOperand(1).getReg();
+
+ BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
+ BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
+ BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
+
+ unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+
+ BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
+ .addReg(HiReg).addReg(ReadAgainReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
+
+ BB->addSuccessor(readMBB);
+ BB->addSuccessor(sinkMBB);
+ }
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
+ BB = EmitAtomicBinary(MI, BB, 4, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
+ BB = EmitAtomicBinary(MI, BB, 8, 0);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
+ bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch(MI->getOpcode()) {
+ default:
+ llvm_unreachable("Compare and swap of unknown size");
+ case PPC::ATOMIC_CMP_SWAP_I8:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I16:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I32:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I64:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // l[bhwd]arx dest, ptr
+ // cmp[wd] dest, oldval
+ // bne- midMBB
+ // loop2MBB:
+ // st[bhwd]cx. newval, ptr
+ // bne- loopMBB
+ // b exitBB
+ // midMBB:
+ // st[bhwd]cx. dest, ptr
+ // exitBB:
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
+ .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+ .addReg(oldval).addReg(dest);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
+ .addReg(newval).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
+ .addReg(dest).addReg(ptrA).addReg(ptrB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
+ // We must use 64-bit registers for addresses when targeting 64-bit,
+ // since we're actually doing arithmetic on them. Other registers
+ // can be 32-bit.
+ bool is64bit = Subtarget.isPPC64();
+ bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
+ : &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw newval2, newval, shift
+ // slw oldval2, oldval,shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // and newval3, newval2, mask
+ // and oldval3, oldval2, mask
+ // loop1MBB:
+ // lwarx tmpDest, ptr
+ // and tmp, tmpDest, mask
+ // cmpw tmp, oldval3
+ // bne- midMBB
+ // loop2MBB:
+ // andc tmp2, tmpDest, mask
+ // or tmp4, tmp2, newval3
+ // stwcx. tmp4, ptr
+ // bne- loop1MBB
+ // b exitBB
+ // midMBB:
+ // stwcx. tmpDest, ptr
+ // exitBB:
+ // srw dest, tmpDest, shift
+ if (ptrA != ZeroReg) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
+ .addReg(newval).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
+ .addReg(oldval).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+ .addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
+ .addReg(NewVal2Reg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
+ .addReg(OldVal2Reg).addReg(MaskReg);
+
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
+ .addReg(TmpReg).addReg(OldVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
+ .addReg(Tmp2Reg).addReg(NewVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
+ .addReg(ShiftReg);
+ } else if (MI->getOpcode() == PPC::FADDrtz) {
+ // This pseudo performs an FADD with rounding mode temporarily forced
+ // to round-to-zero. We emit this via custom inserter since the FPSCR
+ // is not modeled at the SelectionDAG level.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src1 = MI->getOperand(1).getReg();
+ unsigned Src2 = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+ // Save FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
+
+ // Set rounding mode to round-to-zero.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+
+ // Perform addition.
+ BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+
+ // Restore FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
+ } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
+ unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
+ PPC::ANDIo8 : PPC::ANDIo;
+ bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
+ &PPC::GPRCRegClass :
+ &PPC::G8RCRegClass);
+
+ DebugLoc dl = MI->getDebugLoc();
+ BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
+ .addReg(MI->getOperand(1).getReg()).addImm(1);
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
+ MI->getOperand(0).getReg())
+ .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+ } else if (MI->getOpcode() == PPC::TCHECK_RET) {
+ DebugLoc Dl = MI->getDebugLoc();
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+ BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
+ return BB;
+ } else {
+ llvm_unreachable("Unexpected instr type to insert");
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+static std::string getRecipOp(const char *Base, EVT VT) {
+ std::string RecipOp(Base);
+ if (VT.getScalarType() == MVT::f64)
+ RecipOp += "d";
+ else
+ RecipOp += "f";
+
+ if (VT.isVector())
+ RecipOp = "vec-" + RecipOp;
+
+ return RecipOp;
+}
+
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const {
+ EVT VT = Operand.getValueType();
+ if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+ (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+ (VT == MVT::v4f64 && Subtarget.hasQPX())) {
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ std::string RecipOp = getRecipOp("sqrt", VT);
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
+ UseOneConstNR = true;
+ return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
+ }
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
+ EVT VT = Operand.getValueType();
+ if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+ (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+ (VT == MVT::v4f64 && Subtarget.hasQPX())) {
+ TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+ std::string RecipOp = getRecipOp("div", VT);
+ if (!Recips.isEnabled(RecipOp))
+ return SDValue();
+
+ RefinementSteps = Recips.getRefinementSteps(RecipOp);
+ return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+ }
+ return SDValue();
+}
+
+unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
+ // Note: This functionality is used only when unsafe-fp-math is enabled, and
+ // on cores with reciprocal estimates (which are used when unsafe-fp-math is
+ // enabled for division), this functionality is redundant with the default
+ // combiner logic (once the division -> reciprocal/multiply transformation
+ // has taken place). As a result, this matters more for older cores than for
+ // newer ones.
+
+ // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+ // reciprocal if there are two or more FDIVs (for embedded cores with only
+ // one FP pipeline) for three or more FDIVs (for generic OOO cores).
+ switch (Subtarget.getDarwinDirective()) {
+ default:
+ return 3;
+ case PPC::DIR_440:
+ case PPC::DIR_A2:
+ case PPC::DIR_E500mc:
+ case PPC::DIR_E5500:
+ return 2;
+ }
+}
+
+// isConsecutiveLSLoc needs to work even if all adds have not yet been
+// collapsed, and so we need to look through chains of them.
+static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
+ int64_t& Offset, SelectionDAG &DAG) {
+ if (DAG.isBaseWithConstantOffset(Loc)) {
+ Base = Loc.getOperand(0);
+ Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
+
+ // The base might itself be a base plus an offset, and if so, accumulate
+ // that as well.
+ getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
+ }
+}
+
+static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
+ unsigned Bytes, int Dist,
+ SelectionDAG &DAG) {
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue BaseLoc = Base->getBasePtr();
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+
+ SDValue Base1 = Loc, Base2 = BaseLoc;
+ int64_t Offset1 = 0, Offset2 = 0;
+ getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
+ getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
+ if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
+ return true;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const GlobalValue *GV1 = nullptr;
+ const GlobalValue *GV2 = nullptr;
+ Offset1 = 0;
+ Offset2 = 0;
+ bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
+ unsigned Bytes, int Dist,
+ SelectionDAG &DAG) {
+ if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
+ EVT VT = LS->getMemoryVT();
+ SDValue Loc = LS->getBasePtr();
+ return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
+ }
+
+ if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+ EVT VT;
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default: return false;
+ case Intrinsic::ppc_qpx_qvlfd:
+ case Intrinsic::ppc_qpx_qvlfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfs:
+ case Intrinsic::ppc_qpx_qvlfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcd:
+ case Intrinsic::ppc_qpx_qvlfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcs:
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ VT = MVT::v2f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfiwa:
+ case Intrinsic::ppc_qpx_qvlfiwz:
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::ppc_vsx_lxvw4x:
+ VT = MVT::v4i32;
+ break;
+ case Intrinsic::ppc_vsx_lxvd2x:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_altivec_lvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_lvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_lvewx:
+ VT = MVT::i32;
+ break;
+ }
+
+ return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
+ }
+
+ if (N->getOpcode() == ISD::INTRINSIC_VOID) {
+ EVT VT;
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default: return false;
+ case Intrinsic::ppc_qpx_qvstfd:
+ case Intrinsic::ppc_qpx_qvstfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ case Intrinsic::ppc_qpx_qvstfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcd:
+ case Intrinsic::ppc_qpx_qvstfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcs:
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ VT = MVT::v2f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfiw:
+ case Intrinsic::ppc_qpx_qvstfiwa:
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ case Intrinsic::ppc_vsx_stxvw4x:
+ VT = MVT::v4i32;
+ break;
+ case Intrinsic::ppc_vsx_stxvd2x:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_altivec_stvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_stvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_stvewx:
+ VT = MVT::i32;
+ break;
+ }
+
+ return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
+ }
+
+ return false;
+}
+
+// Return true is there is a nearyby consecutive load to the one provided
+// (regardless of alignment). We search up and down the chain, looking though
+// token factors and other loads (but nothing else). As a result, a true result
+// indicates that it is safe to create a new consecutive load adjacent to the
+// load provided.
+static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
+ SDValue Chain = LD->getChain();
+ EVT VT = LD->getMemoryVT();
+
+ SmallSet<SDNode *, 16> LoadRoots;
+ SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
+ SmallSet<SDNode *, 16> Visited;
+
+ // First, search up the chain, branching to follow all token-factor operands.
+ // If we find a consecutive load, then we're done, otherwise, record all
+ // nodes just above the top-level loads and token factors.
+ while (!Queue.empty()) {
+ SDNode *ChainNext = Queue.pop_back_val();
+ if (!Visited.insert(ChainNext).second)
+ continue;
+
+ if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
+ if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+ return true;
+
+ if (!Visited.count(ChainLD->getChain().getNode()))
+ Queue.push_back(ChainLD->getChain().getNode());
+ } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
+ for (const SDUse &O : ChainNext->ops())
+ if (!Visited.count(O.getNode()))
+ Queue.push_back(O.getNode());
+ } else
+ LoadRoots.insert(ChainNext);
+ }
+
+ // Second, search down the chain, starting from the top-level nodes recorded
+ // in the first phase. These top-level nodes are the nodes just above all
+ // loads and token factors. Starting with their uses, recursively look though
+ // all loads (just the chain uses) and token factors to find a consecutive
+ // load.
+ Visited.clear();
+ Queue.clear();
+
+ for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
+ IE = LoadRoots.end(); I != IE; ++I) {
+ Queue.push_back(*I);
+
+ while (!Queue.empty()) {
+ SDNode *LoadRoot = Queue.pop_back_val();
+ if (!Visited.insert(LoadRoot).second)
+ continue;
+
+ if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
+ if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+ return true;
+
+ for (SDNode::use_iterator UI = LoadRoot->use_begin(),
+ UE = LoadRoot->use_end(); UI != UE; ++UI)
+ if (((isa<MemSDNode>(*UI) &&
+ cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+ UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
+ Queue.push_back(*UI);
+ }
+ }
+
+ return false;
+}
+
+SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+
+ assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
+ // If we're tracking CR bits, we need to be careful that we don't have:
+ // trunc(binary-ops(zext(x), zext(y)))
+ // or
+ // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
+ // such that we're unnecessarily moving things into GPRs when it would be
+ // better to keep them in CR bits.
+
+ // Note that trunc here can be an actual i1 trunc, or can be the effective
+ // truncation that comes from a setcc or select_cc.
+ if (N->getOpcode() == ISD::TRUNCATE &&
+ N->getValueType(0) != MVT::i1)
+ return SDValue();
+
+ if (N->getOperand(0).getValueType() != MVT::i32 &&
+ N->getOperand(0).getValueType() != MVT::i64)
+ return SDValue();
+
+ if (N->getOpcode() == ISD::SETCC ||
+ N->getOpcode() == ISD::SELECT_CC) {
+ // If we're looking at a comparison, then we need to make sure that the
+ // high bits (all except for the first) don't matter the result.
+ ISD::CondCode CC =
+ cast<CondCodeSDNode>(N->getOperand(
+ N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
+ unsigned OpBits = N->getOperand(0).getValueSizeInBits();
+
+ if (ISD::isSignedIntSetCC(CC)) {
+ if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
+ DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
+ return SDValue();
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (!DAG.MaskedValueIsZero(N->getOperand(0),
+ APInt::getHighBitsSet(OpBits, OpBits-1)) ||
+ !DAG.MaskedValueIsZero(N->getOperand(1),
+ APInt::getHighBitsSet(OpBits, OpBits-1)))
+ return SDValue();
+ } else {
+ // This is neither a signed nor an unsigned comparison, just make sure
+ // that the high bits are equal.
+ APInt Op1Zero, Op1One;
+ APInt Op2Zero, Op2One;
+ DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
+ DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
+
+ // We don't really care about what is known about the first bit (if
+ // anything), so clear it in all masks prior to comparing them.
+ Op1Zero.clearBit(0); Op1One.clearBit(0);
+ Op2Zero.clearBit(0); Op2One.clearBit(0);
+
+ if (Op1Zero != Op2Zero || Op1One != Op2One)
+ return SDValue();
+ }
+ }
+
+ // We now know that the higher-order bits are irrelevant, we just need to
+ // make sure that all of the intermediate operations are bit operations, and
+ // all inputs are extensions.
+ if (N->getOperand(0).getOpcode() != ISD::AND &&
+ N->getOperand(0).getOpcode() != ISD::OR &&
+ N->getOperand(0).getOpcode() != ISD::XOR &&
+ N->getOperand(0).getOpcode() != ISD::SELECT &&
+ N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
+ N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
+ N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+
+ if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
+ N->getOperand(1).getOpcode() != ISD::AND &&
+ N->getOperand(1).getOpcode() != ISD::OR &&
+ N->getOperand(1).getOpcode() != ISD::XOR &&
+ N->getOperand(1).getOpcode() != ISD::SELECT &&
+ N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
+ N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
+ N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+
+ SmallVector<SDValue, 4> Inputs;
+ SmallVector<SDValue, 8> BinOps, PromOps;
+ SmallPtrSet<SDNode *, 16> Visited;
+
+ for (unsigned i = 0; i < 2; ++i) {
+ if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+ N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+ isa<ConstantSDNode>(N->getOperand(i)))
+ Inputs.push_back(N->getOperand(i));
+ else
+ BinOps.push_back(N->getOperand(i));
+
+ if (N->getOpcode() == ISD::TRUNCATE)
+ break;
+ }
+
+ // Visit all inputs, collect all binary operations (and, or, xor and
+ // select) that are all fed by extensions.
+ while (!BinOps.empty()) {
+ SDValue BinOp = BinOps.back();
+ BinOps.pop_back();
+
+ if (!Visited.insert(BinOp.getNode()).second)
+ continue;
+
+ PromOps.push_back(BinOp);
+
+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+ // The condition of the select is not promoted.
+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+ continue;
+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+ continue;
+
+ if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+ BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+ isa<ConstantSDNode>(BinOp.getOperand(i))) {
+ Inputs.push_back(BinOp.getOperand(i));
+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+ BinOp.getOperand(i).getOpcode() == ISD::OR ||
+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
+ BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+ BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
+ BinOps.push_back(BinOp.getOperand(i));
+ } else {
+ // We have an input that is not an extension or another binary
+ // operation; we'll abort this transformation.
+ return SDValue();
+ }
+ }
+ }
+
+ // Make sure that this is a self-contained cluster of operations (which
+ // is not quite the same thing as saying that everything has only one
+ // use).
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+ UE = Inputs[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == Inputs[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == Inputs[i] ||
+ User->getOperand(1) == Inputs[i])
+ return SDValue();
+ }
+ }
+ }
+
+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+ UE = PromOps[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == PromOps[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == PromOps[i] ||
+ User->getOperand(1) == PromOps[i])
+ return SDValue();
+ }
+ }
+ }
+
+ // Replace all inputs with the extension operand.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ // Constants may have users outside the cluster of to-be-promoted nodes,
+ // and so we need to replace those as we do the promotions.
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+ else
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
+ }
+
+ // Replace all operations (these are all the same, but have a different
+ // (i1) return type). DAG.getNode will validate that the types of
+ // a binary operator match, so go through the list in reverse so that
+ // we've likely promoted both operands first. Any intermediate truncations or
+ // extensions disappear.
+ while (!PromOps.empty()) {
+ SDValue PromOp = PromOps.back();
+ PromOps.pop_back();
+
+ if (PromOp.getOpcode() == ISD::TRUNCATE ||
+ PromOp.getOpcode() == ISD::SIGN_EXTEND ||
+ PromOp.getOpcode() == ISD::ZERO_EXTEND ||
+ PromOp.getOpcode() == ISD::ANY_EXTEND) {
+ if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
+ PromOp.getOperand(0).getValueType() != MVT::i1) {
+ // The operand is not yet ready (see comment below).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SDValue RepValue = PromOp.getOperand(0);
+ if (isa<ConstantSDNode>(RepValue))
+ RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
+ continue;
+ }
+
+ unsigned C;
+ switch (PromOp.getOpcode()) {
+ default: C = 0; break;
+ case ISD::SELECT: C = 1; break;
+ case ISD::SELECT_CC: C = 2; break;
+ }
+
+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+ PromOp.getOperand(C).getValueType() != MVT::i1) ||
+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+ PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
+ // The to-be-promoted operands of this node have not yet been
+ // promoted (this should be rare because we're going through the
+ // list backward, but if one of the operands has several users in
+ // this cluster of to-be-promoted nodes, it is possible).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+ PromOp.getNode()->op_end());
+
+ // If there are any constant inputs, make sure they're replaced now.
+ for (unsigned i = 0; i < 2; ++i)
+ if (isa<ConstantSDNode>(Ops[C+i]))
+ Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp,
+ DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
+ }
+
+ // Now we're left with the initial truncation itself.
+ if (N->getOpcode() == ISD::TRUNCATE)
+ return N->getOperand(0);
+
+ // Otherwise, this is a comparison. The operands to be compared have just
+ // changed type (to i1), but everything else is the same.
+ return SDValue(N, 0);
+}
+
+SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+
+ // If we're tracking CR bits, we need to be careful that we don't have:
+ // zext(binary-ops(trunc(x), trunc(y)))
+ // or
+ // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
+ // such that we're unnecessarily moving things into CR bits that can more
+ // efficiently stay in GPRs. Note that if we're not certain that the high
+ // bits are set as required by the final extension, we still may need to do
+ // some masking to get the proper behavior.
+
+ // This same functionality is important on PPC64 when dealing with
+ // 32-to-64-bit extensions; these occur often when 32-bit values are used as
+ // the return values of functions. Because it is so similar, it is handled
+ // here as well.
+
+ if (N->getValueType(0) != MVT::i32 &&
+ N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
+ (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
+ return SDValue();
+
+ if (N->getOperand(0).getOpcode() != ISD::AND &&
+ N->getOperand(0).getOpcode() != ISD::OR &&
+ N->getOperand(0).getOpcode() != ISD::XOR &&
+ N->getOperand(0).getOpcode() != ISD::SELECT &&
+ N->getOperand(0).getOpcode() != ISD::SELECT_CC)
+ return SDValue();
+
+ SmallVector<SDValue, 4> Inputs;
+ SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
+ SmallPtrSet<SDNode *, 16> Visited;
+
+ // Visit all inputs, collect all binary operations (and, or, xor and
+ // select) that are all fed by truncations.
+ while (!BinOps.empty()) {
+ SDValue BinOp = BinOps.back();
+ BinOps.pop_back();
+
+ if (!Visited.insert(BinOp.getNode()).second)
+ continue;
+
+ PromOps.push_back(BinOp);
+
+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+ // The condition of the select is not promoted.
+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+ continue;
+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+ continue;
+
+ if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+ isa<ConstantSDNode>(BinOp.getOperand(i))) {
+ Inputs.push_back(BinOp.getOperand(i));
+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+ BinOp.getOperand(i).getOpcode() == ISD::OR ||
+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
+ BinOps.push_back(BinOp.getOperand(i));
+ } else {
+ // We have an input that is not a truncation or another binary
+ // operation; we'll abort this transformation.
+ return SDValue();
+ }
+ }
+ }
+
+ // The operands of a select that must be truncated when the select is
+ // promoted because the operand is actually part of the to-be-promoted set.
+ DenseMap<SDNode *, EVT> SelectTruncOp[2];
+
+ // Make sure that this is a self-contained cluster of operations (which
+ // is not quite the same thing as saying that everything has only one
+ // use).
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+ UE = Inputs[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // If we're going to promote the non-output-value operand(s) or SELECT or
+ // SELECT_CC, record them for truncation.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == Inputs[i])
+ SelectTruncOp[0].insert(std::make_pair(User,
+ User->getOperand(0).getValueType()));
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == Inputs[i])
+ SelectTruncOp[0].insert(std::make_pair(User,
+ User->getOperand(0).getValueType()));
+ if (User->getOperand(1) == Inputs[i])
+ SelectTruncOp[1].insert(std::make_pair(User,
+ User->getOperand(1).getValueType()));
+ }
+ }
+ }
+
+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+ UE = PromOps[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // If we're going to promote the non-output-value operand(s) or SELECT or
+ // SELECT_CC, record them for truncation.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == PromOps[i])
+ SelectTruncOp[0].insert(std::make_pair(User,
+ User->getOperand(0).getValueType()));
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == PromOps[i])
+ SelectTruncOp[0].insert(std::make_pair(User,
+ User->getOperand(0).getValueType()));
+ if (User->getOperand(1) == PromOps[i])
+ SelectTruncOp[1].insert(std::make_pair(User,
+ User->getOperand(1).getValueType()));
+ }
+ }
+ }
+
+ unsigned PromBits = N->getOperand(0).getValueSizeInBits();
+ bool ReallyNeedsExt = false;
+ if (N->getOpcode() != ISD::ANY_EXTEND) {
+ // If all of the inputs are not already sign/zero extended, then
+ // we'll still need to do that at the end.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ unsigned OpBits =
+ Inputs[i].getOperand(0).getValueSizeInBits();
+ assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
+
+ if ((N->getOpcode() == ISD::ZERO_EXTEND &&
+ !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
+ APInt::getHighBitsSet(OpBits,
+ OpBits-PromBits))) ||
+ (N->getOpcode() == ISD::SIGN_EXTEND &&
+ DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
+ (OpBits-(PromBits-1)))) {
+ ReallyNeedsExt = true;
+ break;
+ }
+ }
+ }
+
+ // Replace all inputs, either with the truncation operand, or a
+ // truncation or extension to the final output type.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ // Constant inputs need to be replaced with the to-be-promoted nodes that
+ // use them because they might have users outside of the cluster of
+ // promoted nodes.
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ SDValue InSrc = Inputs[i].getOperand(0);
+ if (Inputs[i].getValueType() == N->getValueType(0))
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
+ else if (N->getOpcode() == ISD::SIGN_EXTEND)
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ else
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ }
+
+ // Replace all operations (these are all the same, but have a different
+ // (promoted) return type). DAG.getNode will validate that the types of
+ // a binary operator match, so go through the list in reverse so that
+ // we've likely promoted both operands first.
+ while (!PromOps.empty()) {
+ SDValue PromOp = PromOps.back();
+ PromOps.pop_back();
+
+ unsigned C;
+ switch (PromOp.getOpcode()) {
+ default: C = 0; break;
+ case ISD::SELECT: C = 1; break;
+ case ISD::SELECT_CC: C = 2; break;
+ }
+
+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+ PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+ PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
+ // The to-be-promoted operands of this node have not yet been
+ // promoted (this should be rare because we're going through the
+ // list backward, but if one of the operands has several users in
+ // this cluster of to-be-promoted nodes, it is possible).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ // For SELECT and SELECT_CC nodes, we do a similar check for any
+ // to-be-promoted comparison inputs.
+ if (PromOp.getOpcode() == ISD::SELECT ||
+ PromOp.getOpcode() == ISD::SELECT_CC) {
+ if ((SelectTruncOp[0].count(PromOp.getNode()) &&
+ PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
+ (SelectTruncOp[1].count(PromOp.getNode()) &&
+ PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+ }
+
+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+ PromOp.getNode()->op_end());
+
+ // If this node has constant inputs, then they'll need to be promoted here.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (!isa<ConstantSDNode>(Ops[C+i]))
+ continue;
+ if (Ops[C+i].getValueType() == N->getValueType(0))
+ continue;
+
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ else
+ Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ }
+
+ // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
+ // truncate them again to the original value type.
+ if (PromOp.getOpcode() == ISD::SELECT ||
+ PromOp.getOpcode() == ISD::SELECT_CC) {
+ auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
+ if (SI0 != SelectTruncOp[0].end())
+ Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
+ auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
+ if (SI1 != SelectTruncOp[1].end())
+ Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
+ }
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp,
+ DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
+ }
+
+ // Now we're left with the initial extension itself.
+ if (!ReallyNeedsExt)
+ return N->getOperand(0);
+
+ // To zero extend, just mask off everything except for the first bit (in the
+ // i1 case).
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
+ DAG.getConstant(APInt::getLowBitsSet(
+ N->getValueSizeInBits(0), PromBits),
+ dl, N->getValueType(0)));
+
+ assert(N->getOpcode() == ISD::SIGN_EXTEND &&
+ "Invalid extension type");
+ EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
+ SDValue ShiftCst =
+ DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
+ return DAG.getNode(
+ ISD::SRA, dl, N->getValueType(0),
+ DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
+ ShiftCst);
+}
+
+SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::UINT_TO_FP) &&
+ "Need an int -> FP conversion node here");
+
+ if (!Subtarget.has64BitSupport())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Op(N, 0);
+
+ // Don't handle ppc_fp128 here or i1 conversions.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+ if (Op.getOperand(0).getValueType() == MVT::i1)
+ return SDValue();
+
+ // For i32 intermediate values, unfortunately, the conversion functions
+ // leave the upper 32 bits of the value are undefined. Within the set of
+ // scalar instructions, we have no method for zero- or sign-extending the
+ // value. Thus, we cannot handle i32 intermediate values here.
+ if (Op.getOperand(0).getValueType() == MVT::i32)
+ return SDValue();
+
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
+ "UINT_TO_FP is supported only with FPCVT");
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
+ : PPCISD::FCFIDS)
+ : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
+ : PPCISD::FCFID);
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? MVT::f32
+ : MVT::f64;
+
+ // If we're converting from a float, to an int, and back to a float again,
+ // then we don't need the store/load pair at all.
+ if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
+ Subtarget.hasFPCVT()) ||
+ (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
+ SDValue Src = Op.getOperand(0).getOperand(0);
+ if (Src.getValueType() == MVT::f32) {
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+ DCI.AddToWorklist(Src.getNode());
+ } else if (Src.getValueType() != MVT::f64) {
+ // Make sure that we don't pick up a ppc_fp128 source value.
+ return SDValue();
+ }
+
+ unsigned FCTOp =
+ Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ;
+
+ SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
+
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
+ FP = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
+ DCI.AddToWorklist(FP.getNode());
+ }
+
+ return FP;
+ }
+
+ return SDValue();
+}
+
+// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
+// builtins) into loads with swaps.
+SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Chain;
+ SDValue Base;
+ MachineMemOperand *MMO;
+
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode for little endian VSX load");
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ Chain = LD->getChain();
+ Base = LD->getBasePtr();
+ MMO = LD->getMemOperand();
+ // If the MMO suggests this isn't a load of a full vector, leave
+ // things alone. For a built-in, we have to make the change for
+ // correctness, so if there is a size problem that will be a bug.
+ if (MMO->getSize() < 16)
+ return SDValue();
+ break;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
+ Chain = Intrin->getChain();
+ // Similarly to the store case below, Intrin->getBasePtr() doesn't get
+ // us what we want. Get operand 2 instead.
+ Base = Intrin->getOperand(2);
+ MMO = Intrin->getMemOperand();
+ break;
+ }
+ }
+
+ MVT VecTy = N->getValueType(0).getSimpleVT();
+ SDValue LoadOps[] = { Chain, Base };
+ SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
+ DAG.getVTList(VecTy, MVT::Other),
+ LoadOps, VecTy, MMO);
+ DCI.AddToWorklist(Load.getNode());
+ Chain = Load.getValue(1);
+ SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
+ DAG.getVTList(VecTy, MVT::Other), Chain, Load);
+ DCI.AddToWorklist(Swap.getNode());
+ return Swap;
+}
+
+// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
+// builtins) into stores with swaps.
+SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Chain;
+ SDValue Base;
+ unsigned SrcOpnd;
+ MachineMemOperand *MMO;
+
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode for little endian VSX store");
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ Chain = ST->getChain();
+ Base = ST->getBasePtr();
+ MMO = ST->getMemOperand();
+ SrcOpnd = 1;
+ // If the MMO suggests this isn't a store of a full vector, leave
+ // things alone. For a built-in, we have to make the change for
+ // correctness, so if there is a size problem that will be a bug.
+ if (MMO->getSize() < 16)
+ return SDValue();
+ break;
+ }
+ case ISD::INTRINSIC_VOID: {
+ MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
+ Chain = Intrin->getChain();
+ // Intrin->getBasePtr() oddly does not get what we want.
+ Base = Intrin->getOperand(3);
+ MMO = Intrin->getMemOperand();
+ SrcOpnd = 2;
+ break;
+ }
+ }
+
+ SDValue Src = N->getOperand(SrcOpnd);
+ MVT VecTy = Src.getValueType().getSimpleVT();
+ SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
+ DAG.getVTList(VecTy, MVT::Other), Chain, Src);
+ DCI.AddToWorklist(Swap.getNode());
+ Chain = Swap.getValue(1);
+ SDValue StoreOps[] = { Chain, Swap, Base };
+ SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
+ DAG.getVTList(MVT::Other),
+ StoreOps, VecTy, MMO);
+ DCI.AddToWorklist(Store.getNode());
+ return Store;
+}
+
+SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ switch (N->getOpcode()) {
+ default: break;
+ case PPCISD::SHL:
+ if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
+ return N->getOperand(0);
+ break;
+ case PPCISD::SRL:
+ if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
+ return N->getOperand(0);
+ break;
+ case PPCISD::SRA:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue() || // 0 >>s V -> 0.
+ C->isAllOnesValue()) // -1 >>s V -> -1.
+ return N->getOperand(0);
+ }
+ break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ return DAGCombineExtBoolTrunc(N, DCI);
+ case ISD::TRUNCATE:
+ case ISD::SETCC:
+ case ISD::SELECT_CC:
+ return DAGCombineTruncBoolExt(N, DCI);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return combineFPToIntToFP(N, DCI);
+ case ISD::STORE: {
+ // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
+ if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
+ N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
+ N->getOperand(1).getValueType() == MVT::i32 &&
+ N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(1).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+ Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+
+ SDValue Ops[] = {
+ N->getOperand(0), Val, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+
+ Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops,
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
+ DCI.AddToWorklist(Val.getNode());
+ return Val;
+ }
+
+ // Turn STORE (BSWAP) -> sthbrx/stwbrx.
+ if (cast<StoreSDNode>(N)->isUnindexed() &&
+ N->getOperand(1).getOpcode() == ISD::BSWAP &&
+ N->getOperand(1).getNode()->hasOneUse() &&
+ (N->getOperand(1).getValueType() == MVT::i32 ||
+ N->getOperand(1).getValueType() == MVT::i16 ||
+ (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
+ N->getOperand(1).getValueType() == MVT::i64))) {
+ SDValue BSwapOp = N->getOperand(1).getOperand(0);
+ // Do an any-extend to 32-bits if this is a half-word input.
+ if (BSwapOp.getValueType() == MVT::i16)
+ BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+
+ SDValue Ops[] = {
+ N->getOperand(0), BSwapOp, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+ return
+ DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
+ Ops, cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
+ }
+
+ // For little endian, VSX stores require generating xxswapd/lxvd2x.
+ EVT VT = N->getOperand(1).getValueType();
+ if (VT.isSimple()) {
+ MVT StoreVT = VT.getSimpleVT();
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
+ (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
+ StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
+ return expandVSXStoreForLE(N, DCI);
+ }
+ break;
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT VT = LD->getValueType(0);
+
+ // For little endian, VSX loads require generating lxvd2x/xxswapd.
+ if (VT.isSimple()) {
+ MVT LoadVT = VT.getSimpleVT();
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
+ (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
+ LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
+ return expandVSXLoadForLE(N, DCI);
+ }
+
+ EVT MemVT = LD->getMemoryVT();
+ Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
+ Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
+ unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
+ if (LD->isUnindexed() && VT.isVector() &&
+ ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
+ // P8 and later hardware should just use LOAD.
+ !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v4f32)) ||
+ (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
+ LD->getAlignment() >= ScalarABIAlignment)) &&
+ LD->getAlignment() < ABIAlignment) {
+ // This is a type-legal unaligned Altivec or QPX load.
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ bool isLittleEndian = Subtarget.isLittleEndian();
+
+ // This implements the loading of unaligned vectors as described in
+ // the venerable Apple Velocity Engine overview. Specifically:
+ // https://developer.apple.com/hardwaredrivers/ve/alignment.html
+ // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
+ //
+ // The general idea is to expand a sequence of one or more unaligned
+ // loads into an alignment-based permutation-control instruction (lvsl
+ // or lvsr), a series of regular vector loads (which always truncate
+ // their input address to an aligned address), and a series of
+ // permutations. The results of these permutations are the requested
+ // loaded values. The trick is that the last "extra" load is not taken
+ // from the address you might suspect (sizeof(vector) bytes after the
+ // last requested load), but rather sizeof(vector) - 1 bytes after the
+ // last requested vector. The point of this is to avoid a page fault if
+ // the base address happened to be aligned. This works because if the
+ // base address is aligned, then adding less than a full vector length
+ // will cause the last vector in the sequence to be (re)loaded.
+ // Otherwise, the next vector will be fetched as you might suspect was
+ // necessary.
+
+ // We might be able to reuse the permutation generation from
+ // a different base address offset from this one by an aligned amount.
+ // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
+ // optimization later.
+ Intrinsic::ID Intr, IntrLD, IntrPerm;
+ MVT PermCntlTy, PermTy, LDTy;
+ if (Subtarget.hasAltivec()) {
+ Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl;
+ IntrLD = Intrinsic::ppc_altivec_lvx;
+ IntrPerm = Intrinsic::ppc_altivec_vperm;
+ PermCntlTy = MVT::v16i8;
+ PermTy = MVT::v4i32;
+ LDTy = MVT::v4i32;
+ } else {
+ Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
+ Intrinsic::ppc_qpx_qvlpcls;
+ IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
+ Intrinsic::ppc_qpx_qvlfs;
+ IntrPerm = Intrinsic::ppc_qpx_qvfperm;
+ PermCntlTy = MVT::v4f64;
+ PermTy = MVT::v4f64;
+ LDTy = MemVT.getSimpleVT();
+ }
+
+ SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
+
+ // Create the new MMO for the new base load. It is like the original MMO,
+ // but represents an area in memory almost twice the vector size centered
+ // on the original address. If the address is unaligned, we might start
+ // reading up to (sizeof(vector)-1) bytes below the address of the
+ // original unaligned load.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *BaseMMO =
+ MF.getMachineMemOperand(LD->getMemOperand(),
+ -(long)MemVT.getStoreSize()+1,
+ 2*MemVT.getStoreSize()-1);
+
+ // Create the new base load.
+ SDValue LDXIntID =
+ DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
+ SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
+ SDValue BaseLoad =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+ DAG.getVTList(PermTy, MVT::Other),
+ BaseLoadOps, LDTy, BaseMMO);
+
+ // Note that the value of IncOffset (which is provided to the next
+ // load's pointer info offset value, and thus used to calculate the
+ // alignment), and the value of IncValue (which is actually used to
+ // increment the pointer value) are different! This is because we
+ // require the next load to appear to be aligned, even though it
+ // is actually offset from the base pointer by a lesser amount.
+ int IncOffset = VT.getSizeInBits() / 8;
+ int IncValue = IncOffset;
+
+ // Walk (both up and down) the chain looking for another load at the real
+ // (aligned) offset (the alignment of the other load does not matter in
+ // this case). If found, then do not use the offset reduction trick, as
+ // that will prevent the loads from being later combined (as they would
+ // otherwise be duplicates).
+ if (!findConsecutiveLoad(LD, DAG))
+ --IncValue;
+
+ SDValue Increment =
+ DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+
+ MachineMemOperand *ExtraMMO =
+ MF.getMachineMemOperand(LD->getMemOperand(),
+ 1, 2*MemVT.getStoreSize()-1);
+ SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
+ SDValue ExtraLoad =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+ DAG.getVTList(PermTy, MVT::Other),
+ ExtraLoadOps, LDTy, ExtraMMO);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ BaseLoad.getValue(1), ExtraLoad.getValue(1));
+
+ // Because vperm has a big-endian bias, we must reverse the order
+ // of the input vectors and complement the permute control vector
+ // when generating little endian code. We have already handled the
+ // latter by using lvsr instead of lvsl, so just reverse BaseLoad
+ // and ExtraLoad here.
+ SDValue Perm;
+ if (isLittleEndian)
+ Perm = BuildIntrinsicOp(IntrPerm,
+ ExtraLoad, BaseLoad, PermCntl, DAG, dl);
+ else
+ Perm = BuildIntrinsicOp(IntrPerm,
+ BaseLoad, ExtraLoad, PermCntl, DAG, dl);
+
+ if (VT != PermTy)
+ Perm = Subtarget.hasAltivec() ?
+ DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
+ DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
+ DAG.getTargetConstant(1, dl, MVT::i64));
+ // second argument is 1 because this rounding
+ // is always exact.
+
+ // The output of the permutation is our loaded result, the TokenFactor is
+ // our new chain.
+ DCI.CombineTo(N, Perm, TF);
+ return SDValue(N, 0);
+ }
+ }
+ break;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
+ : Intrinsic::ppc_altivec_lvsl);
+ if ((IID == Intr ||
+ IID == Intrinsic::ppc_qpx_qvlpcld ||
+ IID == Intrinsic::ppc_qpx_qvlpcls) &&
+ N->getOperand(1)->getOpcode() == ISD::ADD) {
+ SDValue Add = N->getOperand(1);
+
+ int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
+ 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
+
+ if (DAG.MaskedValueIsZero(
+ Add->getOperand(1),
+ APInt::getAllOnesValue(Bits /* alignment */)
+ .zext(
+ Add.getValueType().getScalarType().getSizeInBits()))) {
+ SDNode *BasePtr = Add->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
+ // We've found another LVSL/LVSR, and this address is an aligned
+ // multiple of that one. The results will be the same, so use the
+ // one we've just found instead.
+
+ return SDValue(*UI, 0);
+ }
+ }
+ }
+
+ if (isa<ConstantSDNode>(Add->getOperand(1))) {
+ SDNode *BasePtr = Add->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(UI->getOperand(1)) &&
+ (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
+ cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
+ (1ULL << Bits) == 0) {
+ SDNode *OtherAdd = *UI;
+ for (SDNode::use_iterator VI = OtherAdd->use_begin(),
+ VE = OtherAdd->use_end(); VI != VE; ++VI) {
+ if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
+ return SDValue(*VI, 0);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ break;
+ case ISD::INTRINSIC_W_CHAIN: {
+ // For little endian, VSX loads require generating lxvd2x/xxswapd.
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default:
+ break;
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x:
+ return expandVSXLoadForLE(N, DCI);
+ }
+ }
+ break;
+ }
+ case ISD::INTRINSIC_VOID: {
+ // For little endian, VSX stores require generating xxswapd/stxvd2x.
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default:
+ break;
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x:
+ return expandVSXStoreForLE(N, DCI);
+ }
+ }
+ break;
+ }
+ case ISD::BSWAP:
+ // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
+ if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+ N->getOperand(0).hasOneUse() &&
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
+ (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
+ N->getValueType(0) == MVT::i64))) {
+ SDValue Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+ // Create the byte-swapping load.
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ DAG.getValueType(N->getValueType(0)) // VT
+ };
+ SDValue BSLoad =
+ DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
+ DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+ MVT::i64 : MVT::i32, MVT::Other),
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+
+ // If this is an i16 load, insert the truncate.
+ SDValue ResVal = BSLoad;
+ if (N->getValueType(0) == MVT::i16)
+ ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
+
+ // First, combine the bswap away. This makes the value produced by the
+ // load dead.
+ DCI.CombineTo(N, ResVal);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the bswap is dead.
+ DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+
+ break;
+ case PPCISD::VCMP: {
+ // If a VCMPo node already exists with exactly the same operands as this
+ // node, use its result instead of this node (VCMPo computes both a CR6 and
+ // a normal output).
+ //
+ if (!N->getOperand(0).hasOneUse() &&
+ !N->getOperand(1).hasOneUse() &&
+ !N->getOperand(2).hasOneUse()) {
+
+ // Scan all of the users of the LHS, looking for VCMPo's that match.
+ SDNode *VCMPoNode = nullptr;
+
+ SDNode *LHSN = N->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
+ UI != E; ++UI)
+ if (UI->getOpcode() == PPCISD::VCMPo &&
+ UI->getOperand(1) == N->getOperand(1) &&
+ UI->getOperand(2) == N->getOperand(2) &&
+ UI->getOperand(0) == N->getOperand(0)) {
+ VCMPoNode = *UI;
+ break;
+ }
+
+ // If there is no VCMPo node, or if the flag value has a single use, don't
+ // transform this.
+ if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+ break;
+
+ // Look at the (necessarily single) use of the flag value. If it has a
+ // chain, this transformation is more complex. Note that multiple things
+ // could use the value result, which we should ignore.
+ SDNode *FlagUser = nullptr;
+ for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+ FlagUser == nullptr; ++UI) {
+ assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
+ FlagUser = User;
+ break;
+ }
+ }
+ }
+
+ // If the user is a MFOCRF instruction, we know this is safe.
+ // Otherwise we give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFOCRF)
+ return SDValue(VCMPoNode, 0);
+ }
+ break;
+ }
+ case ISD::BRCOND: {
+ SDValue Cond = N->getOperand(1);
+ SDValue Target = N->getOperand(2);
+
+ if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero) {
+
+ // We now need to make the intrinsic dead (it cannot be instruction
+ // selected).
+ DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
+ assert(Cond.getNode()->hasOneUse() &&
+ "Counter decrement has more than one use");
+
+ return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
+ N->getOperand(0), Target);
+ }
+ }
+ break;
+ case ISD::BR_CC: {
+ // If this is a branch on an altivec predicate comparison, lower this so
+ // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
+ // lowering is done pre-legalize, because the legalizer lowers the predicate
+ // compare down to code that is difficult to reassemble.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+
+ // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
+ // value. If so, pass-through the AND to get to the intrinsic.
+ if (LHS.getOpcode() == ISD::AND &&
+ LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ !isNullConstant(LHS.getOperand(1)))
+ LHS = LHS.getOperand(0);
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ isa<ConstantSDNode>(RHS)) {
+ assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ "Counter decrement comparison is not EQ or NE");
+
+ unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+ bool isBDNZ = (CC == ISD::SETEQ && Val) ||
+ (CC == ISD::SETNE && !Val);
+
+ // We now need to make the intrinsic dead (it cannot be instruction
+ // selected).
+ DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
+ assert(LHS.getNode()->hasOneUse() &&
+ "Counter decrement has more than one use");
+
+ return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
+ int CompareOpc;
+ bool isDot;
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
+ assert(isDot && "Can't compare against a vector result!");
+
+ // If this is a comparison against something other than 0/1, then we know
+ // that the condition is never/always true.
+ unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (Val != 0 && Val != 1) {
+ if (CC == ISD::SETEQ) // Cond never true, remove branch.
+ return N->getOperand(0);
+ // Always !=, turn it into an unconditional branch.
+ return DAG.getNode(ISD::BR, dl, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
+ bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ SDValue Ops[] = {
+ LHS.getOperand(2), // LHS of compare
+ LHS.getOperand(3), // RHS of compare
+ DAG.getConstant(CompareOpc, dl, MVT::i32)
+ };
+ EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
+
+ // Unpack the result based on how the target uses it.
+ PPC::Predicate CompOpc;
+ switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Branch on the value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
+ break;
+ case 1: // Branch on the inverted value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
+ break;
+ case 2: // Branch on the value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
+ break;
+ case 3: // Branch on the inverted value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
+ break;
+ }
+
+ return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
+ DAG.getConstant(CompOpc, dl, MVT::i32),
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ N->getOperand(4), CompNode.getValue(1));
+ }
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue
+PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const {
+ // fold (sdiv X, pow2)
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::i64 && !Subtarget.isPPC64())
+ return SDValue();
+ if ((VT != MVT::i32 && VT != MVT::i64) ||
+ !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+
+ bool IsNegPow2 = (-Divisor).isPowerOf2();
+ unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
+ SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
+
+ SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
+ if (Created)
+ Created->push_back(Op.getNode());
+
+ if (IsNegPow2) {
+ Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
+ if (Created)
+ Created->push_back(Op.getNode());
+ }
+
+ return Op;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case PPCISD::LBRX: {
+ // lhbrx is known to have the top bits cleared out.
+ if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
+ KnownZero = 0xFFFF0000;
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+ default: break;
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpequd_p:
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
+ KnownZero = ~1U; // All bits but the low one are known to be zero.
+ break;
+ }
+ }
+ }
+}
+
+unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+ switch (Subtarget.getDarwinDirective()) {
+ default: break;
+ case PPC::DIR_970:
+ case PPC::DIR_PWR4:
+ case PPC::DIR_PWR5:
+ case PPC::DIR_PWR5X:
+ case PPC::DIR_PWR6:
+ case PPC::DIR_PWR6X:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8: {
+ if (!ML)
+ break;
+
+ const PPCInstrInfo *TII = Subtarget.getInstrInfo();
+
+ // For small loops (between 5 and 8 instructions), align to a 32-byte
+ // boundary so that the entire loop fits in one instruction-cache line.
+ uint64_t LoopSize = 0;
+ for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
+ for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
+ LoopSize += TII->GetInstSizeInBytes(J);
+ if (LoopSize > 32)
+ break;
+ }
+
+ if (LoopSize > 16 && LoopSize <= 32)
+ return 5;
+
+ break;
+ }
+ }
+
+ return TargetLowering::getPrefLoopAlignment(ML);
+}
+
+/// getConstraintType - Given a constraint, return the type of
+/// constraint it is for this target.
+PPCTargetLowering::ConstraintType
+PPCTargetLowering::getConstraintType(StringRef Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ case 'Z':
+ // FIXME: While Z does indicate a memory constraint, it specifically
+ // indicates an r+r address (used in conjunction with the 'y' modifier
+ // in the replacement string). Currently, we're forcing the base
+ // register to be r0 in the asm printer (which is interpreted as zero)
+ // and forming the complete address in the second register. This is
+ // suboptimal.
+ return C_Memory;
+ }
+ } else if (Constraint == "wc") { // individual CR bits.
+ return C_RegisterClass;
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf" || Constraint == "ws") {
+ return C_RegisterClass; // VSX registers.
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+PPCTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (!CallOperandVal)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+
+ // Look at the constraint type.
+ if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
+ return CW_Register; // an individual CR bit.
+ else if ((StringRef(constraint) == "wa" ||
+ StringRef(constraint) == "wd" ||
+ StringRef(constraint) == "wf") &&
+ type->isVectorTy())
+ return CW_Register;
+ else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+ return CW_Register;
+
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'b':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ case 'd':
+ if (type->isDoubleTy())
+ weight = CW_Register;
+ break;
+ case 'v':
+ if (type->isVectorTy())
+ weight = CW_Register;
+ break;
+ case 'y':
+ weight = CW_Register;
+ break;
+ case 'Z':
+ weight = CW_Memory;
+ break;
+ }
+ return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ if (VT == MVT::i64 && Subtarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+ return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
+ case 'r': // R0-R31
+ if (VT == MVT::i64 && Subtarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RCRegClass);
+ return std::make_pair(0U, &PPC::GPRCRegClass);
+ case 'f':
+ if (VT == MVT::f32 || VT == MVT::i32)
+ return std::make_pair(0U, &PPC::F4RCRegClass);
+ if (VT == MVT::f64 || VT == MVT::i64)
+ return std::make_pair(0U, &PPC::F8RCRegClass);
+ if (VT == MVT::v4f64 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QFRCRegClass);
+ if (VT == MVT::v4f32 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QSRCRegClass);
+ break;
+ case 'v':
+ if (VT == MVT::v4f64 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QFRCRegClass);
+ if (VT == MVT::v4f32 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QSRCRegClass);
+ if (Subtarget.hasAltivec())
+ return std::make_pair(0U, &PPC::VRRCRegClass);
+ case 'y': // crrc
+ return std::make_pair(0U, &PPC::CRRCRegClass);
+ }
+ } else if (Constraint == "wc" && Subtarget.useCRBits()) {
+ // An individual CR bit.
+ return std::make_pair(0U, &PPC::CRBITRCRegClass);
+ } else if ((Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf") && Subtarget.hasVSX()) {
+ return std::make_pair(0U, &PPC::VSRCRegClass);
+ } else if (Constraint == "ws" && Subtarget.hasVSX()) {
+ if (VT == MVT::f32 && Subtarget.hasP8Vector())
+ return std::make_pair(0U, &PPC::VSSRCRegClass);
+ else
+ return std::make_pair(0U, &PPC::VSFRCRegClass);
+ }
+
+ std::pair<unsigned, const TargetRegisterClass *> R =
+ TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+
+ // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
+ // (which we call X[0-9]+). If a 64-bit value has been requested, and a
+ // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
+ // register.
+ // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
+ // the AsmName field from *RegisterInfo.td, then this would not be necessary.
+ if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
+ PPC::GPRCRegClass.contains(R.first))
+ return std::make_pair(TRI->getMatchingSuperReg(R.first,
+ PPC::sub_32, &PPC::G8RCRegClass),
+ &PPC::G8RCRegClass);
+
+ // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
+ if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
+ R.first = PPC::CR0;
+ R.second = &PPC::CRRCRegClass;
+ }
+
+ return R;
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result;
+
+ // Only support length 1 constraints.
+ if (Constraint.length() > 1) return;
+
+ char Letter = Constraint[0];
+ switch (Letter) {
+ default: break;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P': {
+ ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
+ if (!CST) return; // Must be an immediate to match.
+ SDLoc dl(Op);
+ int64_t Value = CST->getSExtValue();
+ EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
+ // numbers are printed as such.
+ switch (Letter) {
+ default: llvm_unreachable("Unknown constraint letter!");
+ case 'I': // "I" is a signed 16-bit constant.
+ if (isInt<16>(Value))
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
+ break;
+ case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
+ if (isShiftedUInt<16, 16>(Value))
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
+ break;
+ case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
+ if (isShiftedInt<16, 16>(Value))
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
+ break;
+ case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
+ if (isUInt<16>(Value))
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
+ break;
+ case 'M': // "M" is a constant that is greater than 31.
+ if (Value > 31)
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
+ break;
+ case 'N': // "N" is a positive constant that is an exact power of two.
+ if (Value > 0 && isPowerOf2_64(Value))
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
+ break;
+ case 'O': // "O" is the constant zero.
+ if (Value == 0)
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
+ break;
+ case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
+ if (isInt<16>(-Value))
+ Result = DAG.getTargetConstant(Value, dl, TCVT);
+ break;
+ }
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ // Handle standard constraint letters.
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS) const {
+ // PPC does not allow r+i addressing modes for vectors!
+ if (Ty->isVectorTy() && AM.BaseOffs != 0)
+ return false;
+
+ // PPC allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // PPC only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default:
+ // No other scales are supported.
+ return false;
+ }
+
+ return true;
+}
+
+SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ // Make sure the function does not optimize away the store of the RA to
+ // the stack.
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setLRStoreRequired();
+ bool isPPC64 = Subtarget.isPPC64();
+ auto PtrVT = getPointerTy(MF.getDataLayout());
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+ DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Just load the return address off the stack.
+ SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
+ MachinePointerInfo(), false, false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+ bool isPPC64 = PtrVT == MVT::i64;
+
+ // Naked functions never have a frame pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned FrameReg;
+ if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
+ FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+ else
+ FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
+ PtrVT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
+ FrameAddr, MachinePointerInfo(), false, false,
+ false, 0);
+ return FrameAddr;
+}
+
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+
+ if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
+ (!isPPC64 && VT != MVT::i32))
+ report_fatal_error("Invalid register global variable type");
+
+ bool is64Bit = isPPC64 && VT == MVT::i64;
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
+ .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
+ .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
+ (is64Bit ? PPC::X13 : PPC::R13))
+ .Default(0);
+
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
+bool
+PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The PowerPC target isn't yet aware of offsets.
+ return false;
+}
+
+bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+
+ switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvlfd:
+ case Intrinsic::ppc_qpx_qvlfs:
+ case Intrinsic::ppc_qpx_qvlfcd:
+ case Intrinsic::ppc_qpx_qvlfcs:
+ case Intrinsic::ppc_qpx_qvlfiwa:
+ case Intrinsic::ppc_qpx_qvlfiwz:
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::ppc_altivec_lvebx:
+ case Intrinsic::ppc_altivec_lvehx:
+ case Intrinsic::ppc_altivec_lvewx:
+ case Intrinsic::ppc_vsx_lxvd2x:
+ case Intrinsic::ppc_vsx_lxvw4x: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_altivec_lvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_lvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_lvewx:
+ VT = MVT::i32;
+ break;
+ case Intrinsic::ppc_vsx_lxvd2x:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfd:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfs:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcd:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcs:
+ VT = MVT::v2f32;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = -VT.getStoreSize()+1;
+ Info.size = 2*VT.getStoreSize()-1;
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::ppc_qpx_qvlfda:
+ case Intrinsic::ppc_qpx_qvlfsa:
+ case Intrinsic::ppc_qpx_qvlfcda:
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ case Intrinsic::ppc_qpx_qvlfiwaa:
+ case Intrinsic::ppc_qpx_qvlfiwza: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvlfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ VT = MVT::v2f32;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.size = VT.getStoreSize();
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::ppc_qpx_qvstfd:
+ case Intrinsic::ppc_qpx_qvstfs:
+ case Intrinsic::ppc_qpx_qvstfcd:
+ case Intrinsic::ppc_qpx_qvstfcs:
+ case Intrinsic::ppc_qpx_qvstfiw:
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ case Intrinsic::ppc_altivec_stvebx:
+ case Intrinsic::ppc_altivec_stvehx:
+ case Intrinsic::ppc_altivec_stvewx:
+ case Intrinsic::ppc_vsx_stxvd2x:
+ case Intrinsic::ppc_vsx_stxvw4x: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_altivec_stvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_stvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_stvewx:
+ VT = MVT::i32;
+ break;
+ case Intrinsic::ppc_vsx_stxvd2x:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfd:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcd:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcs:
+ VT = MVT::v2f32;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = -VT.getStoreSize()+1;
+ Info.size = 2*VT.getStoreSize()-1;
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::ppc_qpx_qvstfda:
+ case Intrinsic::ppc_qpx_qvstfsa:
+ case Intrinsic::ppc_qpx_qvstfcda:
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ case Intrinsic::ppc_qpx_qvstfiwa: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvstfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ VT = MVT::v2f32;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.size = VT.getStoreSize();
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
+ const Function *F = MF.getFunction();
+ // When expanding a memset, require at least two QPX instructions to cover
+ // the cost of loading the value to be stored from the constant pool.
+ if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+ (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+ return MVT::v4f64;
+ }
+
+ // We should use Altivec/VSX loads and stores when available. For unaligned
+ // addresses, unaligned VSX loads are only fast starting with the P8.
+ if (Subtarget.hasAltivec() && Size >= 16 &&
+ (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+ ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+ return MVT::v4i32;
+ }
+
+ if (Subtarget.isPPC64()) {
+ return MVT::i64;
+ }
+
+ return MVT::i32;
+}
+
+/// \brief Returns true if it is beneficial to convert a load of a constant
+/// to just the constant itself.
+bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ return !(BitSize == 0 || BitSize > 64);
+}
+
+bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ // Generally speaking, zexts are not free, but they are free when they can be
+ // folded with other operations.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
+ EVT MemVT = LD->getMemoryVT();
+ if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
+ (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
+ (LD->getExtensionType() == ISD::NON_EXTLOAD ||
+ LD->getExtensionType() == ISD::ZEXTLOAD))
+ return true;
+ }
+
+ // FIXME: Add other cases...
+ // - 32-bit shifts with a zext to i64
+ // - zext after ctlz, bswap, etc.
+ // - zext after and by a constant mask
+
+ return TargetLowering::isZExtFree(Val, VT2);
+}
+
+bool PPCTargetLowering::isFPExtFree(EVT VT) const {
+ assert(VT.isFloatingPoint());
+ return true;
+}
+
+bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
+bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
+bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
+ if (DisablePPCUnaligned)
+ return false;
+
+ // PowerPC supports unaligned memory access for simple non-vector types.
+ // Although accessing unaligned addresses is not as efficient as accessing
+ // aligned addresses, it is generally more efficient than manual expansion,
+ // and generally only traps for software emulation when crossing page
+ // boundaries.
+
+ if (!VT.isSimple())
+ return false;
+
+ if (VT.getSimpleVT().isVector()) {
+ if (Subtarget.hasVSX()) {
+ if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
+ VT != MVT::v4f32 && VT != MVT::v4i32)
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ if (VT == MVT::ppcf128)
+ return false;
+
+ if (Fast)
+ *Fast = true;
+
+ return true;
+}
+
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+const MCPhysReg *
+PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
+ // LR is a callee-save register, but we must treat it as clobbered by any call
+ // site. Hence we include LR in the scratch registers, which are in turn added
+ // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
+ // to CTR, which is used by any indirect call.
+ static const MCPhysReg ScratchRegs[] = {
+ PPC::X12, PPC::LR8, PPC::CTR8, 0
+ };
+
+ return ScratchRegs;
+}
+
+unsigned PPCTargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
+}
+
+unsigned PPCTargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
+}
+
+bool
+PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
+ EVT VT , unsigned DefinedValues) const {
+ if (VT == MVT::v2i64)
+ return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
+
+ if (Subtarget.hasQPX()) {
+ if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
+ return true;
+ }
+
+ return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
+
+Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
+ if (DisableILPPref || Subtarget.enableMachineScheduler())
+ return TargetLowering::getSchedulingPreference(N);
+
+ return Sched::ILP;
+}
+
+// Create a fast isel object.
+FastISel *
+PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const {
+ return PPC::createFastISel(FuncInfo, LibInfo);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
new file mode 100644
index 0000000..44bcb89
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -0,0 +1,902 @@
+//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PPC uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
+
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace PPCISD {
+ enum NodeType : unsigned {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// FSEL - Traditional three-operand fsel node.
+ ///
+ FSEL,
+
+ /// FCFID - The FCFID instruction, taking an f64 operand and producing
+ /// and f64 value containing the FP representation of the integer that
+ /// was temporarily in the f64 operand.
+ FCFID,
+
+ /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+ /// unsigned integers and single-precision outputs.
+ FCFIDU, FCFIDS, FCFIDUS,
+
+ /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+ /// operand, producing an f64 value containing the integer representation
+ /// of that FP value.
+ FCTIDZ, FCTIWZ,
+
+ /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+ /// unsigned integers.
+ FCTIDUZ, FCTIWUZ,
+
+ /// Reciprocal estimate instructions (unary FP ops).
+ FRE, FRSQRTE,
+
+ // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+ // three v4f32 operands and producing a v4f32 result.
+ VMADDFP, VNMSUBFP,
+
+ /// VPERM - The PPC VPERM Instruction.
+ ///
+ VPERM,
+
+ /// The CMPB instruction (takes two operands of i32 or i64).
+ CMPB,
+
+ /// Hi/Lo - These represent the high and low 16-bit parts of a global
+ /// address respectively. These nodes have two operands, the first of
+ /// which must be a TargetGlobalAddress, and the second of which must be a
+ /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
+ /// though these are usually folded into other nodes.
+ Hi, Lo,
+
+ /// The following two target-specific nodes are used for calls through
+ /// function pointers in the 64-bit SVR4 ABI.
+
+ /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an allocation on the stack.
+ DYNALLOC,
+
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an offset from native SP to the address of the most recent
+ /// dynamic alloca.
+ DYNAREAOFFSET,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
+ /// shift amounts. These nodes are generated by the multi-precision shift
+ /// code.
+ SRL, SRA, SHL,
+
+ /// The combination of sra[wd]i and addze used to implemented signed
+ /// integer division by a power of 2. The first operand is the dividend,
+ /// and the second is the constant shift amount (representing the
+ /// divisor).
+ SRA_ADDZE,
+
+ /// CALL - A direct function call.
+ /// CALL_NOP is a call with the special NOP which follows 64-bit
+ /// SVR4 calls.
+ CALL, CALL_NOP,
+
+ /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+ /// MTCTR instruction.
+ MTCTR,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+ /// BCTRL instruction.
+ BCTRL,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
+ /// instruction and the TOC reload required on SVR4 PPC64.
+ BCTRL_LOAD_TOC,
+
+ /// Return with a flag operand, matched by 'blr'
+ RET_FLAG,
+
+ /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
+ /// This copies the bits corresponding to the specified CRREG into the
+ /// resultant GPR. Bits corresponding to other CR regs are undefined.
+ MFOCRF,
+
+ /// Direct move from a VSX register to a GPR
+ MFVSR,
+
+ /// Direct move from a GPR to a VSX register (algebraic)
+ MTVSRA,
+
+ /// Direct move from a GPR to a VSX register (zero)
+ MTVSRZ,
+
+ // FIXME: Remove these once the ANDI glue bug is fixed:
+ /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
+ /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
+ /// implement truncation of i32 or i64 to i1.
+ ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
+
+ // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
+ // target (returns (Lo, Hi)). It takes a chain operand.
+ READ_TIME_BASE,
+
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
+ /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+ /// instructions. For lack of better number, we use the opcode number
+ /// encoding for the OPC field to identify the compare. For example, 838
+ /// is VCMPGTSH.
+ VCMP,
+
+ /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// opcode number encoding for the OPC field to identify the compare. For
+ /// example, 838 is VCMPGTSH.
+ VCMPo,
+
+ /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
+ /// condition register to branch on, OPC is the branch opcode to use (e.g.
+ /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH,
+
+ /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
+ /// loops.
+ BDNZ, BDZ,
+
+ /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+ /// towards zero. Used only as part of the long double-to-int
+ /// conversion sequence.
+ FADDRTZ,
+
+ /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+ MFFS,
+
+ /// TC_RETURN - A tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN,
+
+ /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
+ CR6SET,
+ CR6UNSET,
+
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
+ /// on PPC32.
+ PPC32_GOT,
+
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+ /// local dynamic TLS on PPC32.
+ PPC32_PICGOT,
+
+ /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
+ /// TLS model, produces an ADDIS8 instruction that adds the GOT
+ /// base to sym\@got\@tprel\@ha.
+ ADDIS_GOT_TPREL_HA,
+
+ /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
+ /// TLS model, produces a LD instruction with base register G8RReg
+ /// and offset sym\@got\@tprel\@l. This completes the addition that
+ /// finds the offset of "sym" relative to the thread pointer.
+ LD_GOT_TPREL_L,
+
+ /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+ /// model, produces an ADD instruction that adds the contents of
+ /// G8RReg to the thread pointer. Symbol contains a relocation
+ /// sym\@tls which is to be replaced by the thread pointer and
+ /// identifies to the linker that the instruction is part of a
+ /// TLS sequence.
+ ADD_TLS,
+
+ /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym\@got\@tlsgd\@ha.
+ ADDIS_TLSGD_HA,
+
+ /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
+ /// ADDIS_TLSGD_L_ADDR until after register assignment.
+ ADDI_TLSGD_L,
+
+ /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
+ /// ADDIS_TLSGD_L_ADDR until after register assignment.
+ GET_TLS_ADDR,
+
+ /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
+ /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
+ /// register assignment.
+ ADDI_TLSGD_L_ADDR,
+
+ /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym\@got\@tlsld\@ha.
+ ADDIS_TLSLD_HA,
+
+ /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
+ /// ADDIS_TLSLD_L_ADDR until after register assignment.
+ ADDI_TLSLD_L,
+
+ /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
+ /// ADDIS_TLSLD_L_ADDR until after register assignment.
+ GET_TLSLD_ADDR,
+
+ /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
+ /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
+ /// following register assignment.
+ ADDI_TLSLD_L_ADDR,
+
+ /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds X3 to
+ /// sym\@dtprel\@ha.
+ ADDIS_DTPREL_HA,
+
+ /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@dtprel\@l.
+ ADDI_DTPREL_L,
+
+ /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+ /// during instruction selection to optimize a BUILD_VECTOR into
+ /// operations on splats. This is necessary to avoid losing these
+ /// optimizations due to constant folding.
+ VADD_SPLAT,
+
+ /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned
+ /// operand identifies the operating system entry point.
+ SC,
+
+ /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
+ CLRBHRB,
+
+ /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
+ /// history rolling buffer entry.
+ MFBHRBE,
+
+ /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
+ RFEBB,
+
+ /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
+ /// endian. Maps to an xxswapd instruction that corrects an lxvd2x
+ /// or stxvd2x instruction. The chain is necessary because the
+ /// sequence replaces a load and needs to provide the same number
+ /// of outputs.
+ XXSWAPD,
+
+ /// QVFPERM = This corresponds to the QPX qvfperm instruction.
+ QVFPERM,
+
+ /// QVGPCI = This corresponds to the QPX qvgpci instruction.
+ QVGPCI,
+
+ /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
+ QVALIGNI,
+
+ /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
+ QVESPLATI,
+
+ /// QBFLT = Access the underlying QPX floating-point boolean
+ /// representation.
+ QBFLT,
+
+ /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+ /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
+ /// the GPRC input, then stores it through Ptr. Type can be either i16 or
+ /// i32.
+ STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
+
+ /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+ /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
+ /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
+ /// or i32.
+ LBRX,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to.
+ STFIWX,
+
+ /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+ /// load which sign-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWAX,
+
+ /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+ /// load which zero-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWZX,
+
+ /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
+ /// Maps directly to an lxvd2x instruction that will be followed by
+ /// an xxswapd.
+ LXVD2X,
+
+ /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
+ /// Maps directly to an stxvd2x instruction that will be preceded by
+ /// an xxswapd.
+ STXVD2X,
+
+ /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
+ /// The 4xf32 load used for v4i1 constants.
+ QVLFSb,
+
+ /// GPRC = TOC_ENTRY GA, TOC
+ /// Loads the entry for GA from the TOC, where the TOC base is given by
+ /// the last operand.
+ TOC_ENTRY
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace PPC {
+ /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUHUM instruction.
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
+
+ /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUWUM instruction.
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
+
+ /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUDUM instruction.
+ bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
+
+ /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned ShuffleKind, SelectionDAG &DAG);
+
+ /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned ShuffleKind, SelectionDAG &DAG);
+
+ /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VMRGEW or VMRGOW instruction
+ bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
+ unsigned ShuffleKind, SelectionDAG &DAG);
+
+ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
+ /// shift amount, otherwise return -1.
+ int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
+
+ /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of a single element that is suitable for input to
+ /// VSPLTB/VSPLTH/VSPLTW.
+ bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
+
+ /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+ /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
+
+ /// get_VSPLTI_elt - If this is a build_vector of constants which can be
+ /// formed by using a vspltis[bhw] instruction of the specified element
+ /// size, return the constant being splatted. The ByteSize field indicates
+ /// the number of bytes of each element [124] -> [bhw].
+ SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+
+ /// If this is a qvaligni shuffle mask, return the shift
+ /// amount, otherwise return -1.
+ int isQVALIGNIShuffleMask(SDNode *N);
+ }
+
+ class PPCTargetLowering : public TargetLowering {
+ const PPCSubtarget &Subtarget;
+
+ public:
+ explicit PPCTargetLowering(const PPCTargetMachine &TM,
+ const PPCSubtarget &STI);
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ bool useSoftFloat() const override;
+
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i32;
+ }
+
+ bool isCheapToSpeculateCttz() const override {
+ return true;
+ }
+
+ bool isCheapToSpeculateCtlz() const override {
+ return true;
+ }
+
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
+
+ /// Return true if target always beneficiates from combining into FMA for a
+ /// given value type. This must typically return false on targets where FMA
+ /// takes more cycles to execute than FADD.
+ bool enableAggressiveFMAFusion(EVT VT) const override;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
+
+ /// SelectAddressRegReg - Given the specified addressed, check to see if it
+ /// can be represented as an indexed [r+r] operation. Returns false if it
+ /// can be more efficiently represented with [r+imm].
+ bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImm - Returns true if the address N can be represented
+ /// by a base register plus a signed 16-bit displacement [r+imm], and if it
+ /// is not better represented as reg+reg. If Aligned is true, only accept
+ /// displacements suitable for STD and friends, i.e. multiples of 4.
+ bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG, bool Aligned) const;
+
+ /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ Sched::Preference getSchedulingPreference(SDNode *N) const override;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const override;
+
+ SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const override;
+
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
+
+ void computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
+
+ unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
+
+ Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+ Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const override;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned AtomicSize,
+ unsigned BinOpcode) const;
+ MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool is8bit, unsigned Opcode) const;
+
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ ConstraintType getConstraintType(StringRef Constraint) const override;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const override;
+
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. This is the actual
+ /// alignment, not its logarithm.
+ unsigned getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const override;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops.
+ void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
+
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+ if (ConstraintCode == "es")
+ return InlineAsm::Constraint_es;
+ else if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
+ else if (ConstraintCode == "Q")
+ return InlineAsm::Constraint_Q;
+ else if (ConstraintCode == "Z")
+ return InlineAsm::Constraint_Z;
+ else if (ConstraintCode == "Zy")
+ return InlineAsm::Constraint_Zy;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalICmpImmediate(int64_t Imm) const override;
+
+ /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// add immediate, that is the target has add instructions which can
+ /// add a register and the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalAddImmediate(int64_t Imm) const override;
+
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in
+ /// register X1 to i32 by referencing its sub-register R1.
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+ bool isTruncateFree(EVT VT1, EVT VT2) const override;
+
+ bool isZExtFree(SDValue Val, EVT VT2) const override;
+
+ bool isFPExtFree(EVT VT) const override;
+
+ /// \brief Returns true if it is beneficial to convert a load of a constant
+ /// to just the constant itself.
+ bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const override;
+
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const override;
+
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove
+ /// lowering. If DstAlign is zero that means it's safe to destination
+ /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+ /// means there isn't a need to check it against alignment requirement,
+ /// probably because the source does not need to be loaded. If 'IsMemset' is
+ /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+ /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+ /// source is constant so it does not need to be loaded.
+ /// It returns EVT::Other if the type should be determined using generic
+ /// target-independent logic.
+ EVT
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+ MachineFunction &MF) const override;
+
+ /// Is unaligned memory access allowed for the given type, and is it fast
+ /// relative to software emulation.
+ bool allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ unsigned Align = 1,
+ bool *Fast = nullptr) const override;
+
+ /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+ /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+ /// expanded to FMAs when this method returns true, otherwise fmuladd is
+ /// expanded to fmul + fadd.
+ bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+
+ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
+
+ // Should we expand the build vector with shuffles?
+ bool
+ shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const override;
+
+ /// createFastISel - This method returns a target-specific FastISel object,
+ /// or null if the target does not support "fast" instruction selection.
+ FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const override;
+
+ /// \brief Returns true if an argument of type Ty needs to be passed in a
+ /// contiguous block of registers in calling convention CallConv.
+ bool functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
+ // We support any array type as "consecutive" block in the parameter
+ // save area. The element type defines the alignment requirement and
+ // whether the argument should go in GPRs, FPRs, or VRs if available.
+ //
+ // Note that clang uses this capability both to implement the ELFv2
+ // homogeneous float/vector aggregate ABI, and to avoid having to use
+ // "byval" when passing aggregates that might fully fit in registers.
+ return Ty->isArrayTy();
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
+ private:
+ struct ReuseLoadInfo {
+ SDValue Ptr;
+ SDValue Chain;
+ SDValue ResChain;
+ MachinePointerInfo MPI;
+ bool IsInvariant;
+ unsigned Alignment;
+ AAMDNodes AAInfo;
+ const MDNode *Ranges;
+
+ ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {}
+ };
+
+ bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
+ SelectionDAG &DAG,
+ ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
+ void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
+ SelectionDAG &DAG) const;
+
+ void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
+ SelectionDAG &DAG, SDLoc dl) const;
+ SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const;
+ SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const;
+
+ SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
+ SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
+
+ bool
+ IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ bool isDarwinABI,
+ SDLoc dl) const;
+
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall,
+ bool isVarArg, bool IsPatchPoint, bool hasNest,
+ SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8>
+ &RegsToPass,
+ SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
+ SDValue &Callee,
+ int SPDiff, unsigned NumBytes,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const;
+
+ SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ SDValue
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
+ SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc dl, SelectionDAG &DAG) const override;
+
+ SDValue
+ extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
+ SDValue ArgVal, SDLoc dl) const;
+
+ SDValue
+ LowerFormalArguments_Darwin(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerFormalArguments_64SVR4(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerFormalArguments_32SVR4(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue
+ createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+ SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
+ SelectionDAG &DAG, SDLoc dl) const;
+
+ SDValue
+ LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall, bool IsPatchPoint,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const;
+ SDValue
+ LowerCall_64SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall, bool IsPatchPoint,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const;
+ SDValue
+ LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall, bool IsPatchPoint,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const;
+
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const override;
+ SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const override;
+ unsigned combineRepeatedFPDivisors() const override;
+
+ CCAssignFn *useFastISelCCs(unsigned Flag) const;
+ };
+
+ namespace PPC {
+ FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo);
+ }
+
+ bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+ bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+ bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+}
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
new file mode 100644
index 0000000..075e093
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -0,0 +1,1235 @@
+//===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC 64-bit instructions. These patterns are used
+// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands.
+//
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCU16ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<16>";
+}
+def s17imm64 : Operand<i64> {
+ // This operand type is used for addis/lis to allow the assembler parser
+ // to accept immediates in the range -65536..65535 for compatibility with
+ // the GNU assembler. The operand is treated as 16-bit otherwise.
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS17ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+}
+def tocentry : Operand<iPTR> {
+ let MIOperandInfo = (ops i64imm:$imm);
+}
+def tlsreg : Operand<i64> {
+ let EncoderMethod = "getTLSRegEncoding";
+ let ParserMatchClass = PPCTLSRegOperand;
+}
+def tlsgd : Operand<i64> {}
+def tlscall : Operand<i64> {
+ let PrintMethod = "printTLSCall";
+ let MIOperandInfo = (ops calltarget:$func, tlsgd:$sym);
+ let EncoderMethod = "getTLSCallEncoding";
+}
+
+//===----------------------------------------------------------------------===//
+// 64-bit transformation functions.
+//
+
+def SHL64 : SDNodeXForm<imm, [{
+ // Transformation function: 63 - imm
+ return getI32Imm(63 - N->getZExtValue(), SDLoc(N));
+}]>;
+
+def SRL64 : SDNodeXForm<imm, [{
+ // Transformation function: 64 - imm
+ return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue(), SDLoc(N))
+ : getI32Imm(0, SDLoc(N));
+}]>;
+
+def HI32_48 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 32, SDLoc(N)));
+}]>;
+
+def HI48_64 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 48, SDLoc(N)));
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calls.
+//
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isReturn = 1, Uses = [LR8, RM] in
+ def BLR8 : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
+ [(retflag)]>, Requires<[In64BitMode]>;
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+ def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+
+ def BCCTR8 : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 12, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ def BCCTR8n : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 4, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ }
+}
+
+let Defs = [LR8] in
+ def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
+ PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let Defs = [CTR8], Uses = [CTR8] in {
+ def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
+ }
+
+ let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
+ def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+ "bdzlr", IIC_BrB, []>;
+ def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+ "bdnzlr", IIC_BrB, []>;
+ }
+}
+
+
+
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
+
+ def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$func),
+ "bl $func", IIC_BrB, []>;
+
+ def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
+ "bla $func", IIC_BrB, [(PPCcall (i64 imm:$func))]>;
+ }
+ let Uses = [RM], isCodeGenOnly = 1 in {
+ def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func),
+ "bl $func\n\tnop", IIC_BrB, []>;
+
+ def BL8_NOP_TLS : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins tlscall:$func),
+ "bl $func\n\tnop", IIC_BrB, []>;
+
+ def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
+ (outs), (ins abscalltarget:$func),
+ "bla $func\n\tnop", IIC_BrB,
+ [(PPCcall_nop (i64 imm:$func))]>;
+ }
+ let Uses = [CTR8, RM] in {
+ def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
+ Requires<[In64BitMode]>;
+
+ let isCodeGenOnly = 1 in {
+ def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+
+ def BCCTRL8 : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 12, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ def BCCTRL8n : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 4, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ }
+ }
+}
+
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+ Defs = [LR8, X2], Uses = [CTR8, RM], RST = 2 in {
+ def BCTRL8_LDinto_toc :
+ XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
+ (ins memrix:$src),
+ "bctrl\n\tld 2, $src", IIC_BrB,
+ [(PPCbctrl_load_toc ixaddr:$src)]>,
+ Requires<[In64BitMode]>;
+}
+
+} // Interpretation64Bit
+
+// FIXME: Duplicating this for the asm parser should be unnecessary, but the
+// previous definition must be marked as CodeGen only to prevent decoding
+// conflicts.
+let Interpretation64Bit = 1, isAsmParserOnly = 1 in
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in
+def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func),
+ "bl $func", IIC_BrB, []>;
+
+// Calls
+def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
+ (BL8 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)),
+ (BL8_NOP tglobaladdr:$dst)>;
+
+def : Pat<(PPCcall (i64 texternalsym:$dst)),
+ (BL8 texternalsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
+ (BL8_NOP texternalsym:$dst)>;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+ let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
+ [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_SUB_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
+ [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_OR_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
+ [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_XOR_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
+ [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_AND_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
+ [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_NAND_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
+ [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
+ [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
+
+ def ATOMIC_SWAP_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
+ [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
+def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr", IIC_LdStLDARX, []>;
+
+// Instruction to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT;
+}
+
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in
+def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
+ "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi8 :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset),
+ "#TC_RETURNd8 $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai8 :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+ "#TC_RETURNa8 $func $offset",
+ [(PPCtc_return (i64 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
+ "#TC_RETURNr8 $dst $offset",
+ []>;
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", IIC_BrB,
+ []>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
+ "ba $dst", IIC_BrB,
+ []>;
+} // Interpretation64Bit
+
+def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi8 texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
+ (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+
+
+// 64-bit CR instructions
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+let hasSideEffects = 0 in {
+def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST),
+ "mtocrf $FXM, $ST", IIC_BrMCRX>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
+ "mtcrf $FXM, $rS", IIC_BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
+def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
+ "mfocrf $rT, $FXM", IIC_SprMFCRF>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
+ "mfcr $rT", IIC_SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasSideEffects = 0
+
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ let Defs = [CTR8] in
+ def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP64",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP64",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In64BitMode]>;
+}
+
+def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR),
+ "mfspr $RT, $SPR", IIC_SprMFSPR>;
+def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT),
+ "mtspr $SPR, $RT", IIC_SprMTSPR>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit SPR manipulation instrs.
+
+let Uses = [CTR8] in {
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins),
+ "mfctr $rT", IIC_SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
+ "mtctr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let hasSideEffects = 1, Defs = [CTR8] in {
+let Pattern = [(int_ppc_mtctr i64:$rS)] in
+def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
+ "mtctr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Pattern = [(set i64:$rT, readcyclecounter)] in
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
+ "mfspr $rT, 268", IIC_SprMFTB>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+// Note that encoding mftb using mfspr is now the preferred form,
+// and has been since at least ISA v2.03. The mftb instruction has
+// now been phased out. Using mfspr, however, is known not to work on
+// the POWER3.
+
+let Defs = [X1], Uses = [X1] in
+def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
+ [(set i64:$result,
+ (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
+def DYNAREAOFFSET8 : Pseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
+ [(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
+
+let Defs = [LR8] in {
+def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
+ "mtlr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR8] in {
+def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
+ "mflr $rT", IIC_SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+} // Interpretation64Bit
+
+//===----------------------------------------------------------------------===//
+// Fixed point instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+let Interpretation64Bit = 1 in {
+let hasSideEffects = 0 in {
+let isCodeGenOnly = 1 in {
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm),
+ "li $rD, $imm", IIC_IntSimple,
+ [(set i64:$rD, imm64SExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s17imm64:$imm),
+ "lis $rD, $imm", IIC_IntSimple,
+ [(set i64:$rD, imm16ShiftedSExt:$imm)]>;
+}
+
+// Logical ops.
+let isCommutable = 1 in {
+defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "nand", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
+defm AND8 : XForm_6r<31, 28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "and", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
+} // isCommutable
+defm ANDC8: XForm_6r<31, 60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "andc", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
+let isCommutable = 1 in {
+defm OR8 : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "or", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
+defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "nor", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
+} // isCommutable
+defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "orc", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
+let isCommutable = 1 in {
+defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "eqv", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
+defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "xor", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
+} // let isCommutable = 1
+
+// Logical ops with immediate.
+let Defs = [CR0] in {
+def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "andi. $dst, $src1, $src2", IIC_IntGeneral,
+ [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "andis. $dst, $src1, $src2", IIC_IntGeneral,
+ [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
+ isDOT;
+}
+def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "ori $dst, $src1, $src2", IIC_IntSimple,
+ [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
+def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "oris $dst, $src1, $src2", IIC_IntSimple,
+ [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "xori $dst, $src1, $src2", IIC_IntSimple,
+ [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
+def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "xoris $dst, $src1, $src2", IIC_IntSimple,
+ [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
+
+let isCommutable = 1 in
+defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "add", "$rT, $rA, $rB", IIC_IntSimple,
+ [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
+// initial-exec thread-local storage model.
+def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
+ "add $rT, $rA, $rB", IIC_IntSimple,
+ [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
+
+let isCommutable = 1 in
+defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "addc", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
+ PPC970_DGroup_Cracked;
+
+let Defs = [CARRY] in
+def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
+ "addic $rD, $rA, $imm", IIC_IntGeneral,
+ [(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>;
+def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm),
+ "addi $rD, $rA, $imm", IIC_IntSimple,
+ [(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm),
+ "addis $rD, $rA, $imm", IIC_IntSimple,
+ [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
+
+let Defs = [CARRY] in {
+def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
+ "subfic $rD, $rA, $imm", IIC_IntGeneral,
+ [(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>;
+defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
+ PPC970_DGroup_Cracked;
+}
+defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "subf", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
+defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "neg", "$rT, $rA", IIC_IntSimple,
+ [(set i64:$rT, (ineg i64:$rA))]>;
+let Uses = [CARRY] in {
+let isCommutable = 1 in
+defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "adde", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
+defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "addme", "$rT, $rA", IIC_IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, -1))]>;
+defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "addze", "$rT, $rA", IIC_IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, 0))]>;
+defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "subfe", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
+defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "subfme", "$rT, $rA", IIC_IntGeneral,
+ [(set i64:$rT, (sube -1, i64:$rA))]>;
+defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "subfze", "$rT, $rA", IIC_IntGeneral,
+ [(set i64:$rT, (sube 0, i64:$rA))]>;
+}
+} // isCodeGenOnly
+
+// FIXME: Duplicating this for the asm parser should be unnecessary, but the
+// previous definition must be marked as CodeGen only to prevent decoding
+// conflicts.
+let isAsmParserOnly = 1 in
+def ADD8TLS_ : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
+ "add $rT, $rA, $rB", IIC_IntSimple, []>;
+
+let isCommutable = 1 in {
+defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "mulhd", "$rT, $rA, $rB", IIC_IntMulHW,
+ [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
+defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "mulhdu", "$rT, $rA, $rB", IIC_IntMulHWU,
+ [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
+} // isCommutable
+}
+} // Interpretation64Bit
+
+let isCompare = 1, hasSideEffects = 0 in {
+ def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
+ "cmpd $crD, $rA, $rB", IIC_IntCompare>, isPPC64;
+ def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
+ "cmpld $crD, $rA, $rB", IIC_IntCompare>, isPPC64;
+ def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm64:$imm),
+ "cmpdi $crD, $rA, $imm", IIC_IntCompare>, isPPC64;
+ def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "cmpldi $dst, $src1, $src2",
+ IIC_IntCompare>, isPPC64;
+}
+
+let hasSideEffects = 0 in {
+defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+ "sld", "$rA, $rS, $rB", IIC_IntRotateD,
+ [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRD : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+ "srd", "$rA, $rS, $rB", IIC_IntRotateD,
+ [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+ "srad", "$rA, $rS, $rB", IIC_IntRotateD,
+ [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+defm CNTLZW8 : XForm_11r<31, 26, (outs g8rc:$rA), (ins g8rc:$rS),
+ "cntlzw", "$rA, $rS", IIC_IntGeneral, []>;
+
+defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
+ "extsb", "$rA, $rS", IIC_IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
+defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
+ "extsh", "$rA, $rS", IIC_IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
+
+defm SLW8 : XForm_6r<31, 24, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "slw", "$rA, $rS, $rB", IIC_IntGeneral, []>;
+defm SRW8 : XForm_6r<31, 536, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "srw", "$rA, $rS, $rB", IIC_IntGeneral, []>;
+} // Interpretation64Bit
+
+// For fast-isel:
+let isCodeGenOnly = 1 in {
+def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsb $rA, $rS", IIC_IntSimple, []>, isPPC64;
+def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsh $rA, $rS", IIC_IntSimple, []>, isPPC64;
+} // isCodeGenOnly for fast-isel
+
+defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
+ "extsw", "$rA, $rS", IIC_IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsw", "$rA, $rS", IIC_IntSimple,
+ [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
+
+defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+ "sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
+ [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
+ "cntlzd", "$rA, $rS", IIC_IntGeneral,
+ [(set i64:$rA, (ctlz i64:$rS))]>;
+def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
+ "popcntd $rA, $rS", IIC_IntGeneral,
+ [(set i64:$rA, (ctpop i64:$rS))]>;
+def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "bpermd $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>,
+ isPPC64, Requires<[HasBPERMD]>;
+
+let isCodeGenOnly = 1, isCommutable = 1 in
+def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "cmpb $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (PPCcmpb i64:$rS, i64:$rB))]>;
+
+// popcntw also does a population count on the high 32 bits (storing the
+// results in the high 32-bits of the output). We'll ignore that here (which is
+// safe because we never separately use the high part of the 64-bit registers).
+def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
+ "popcntw $rA, $rS", IIC_IntGeneral,
+ [(set i32:$rA, (ctpop i32:$rS))]>;
+
+defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divd", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64;
+defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdu", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64;
+def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divde $rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
+ isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divde. $rT, $rA, $rB", IIC_IntDivD,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ isPPC64, Requires<[HasExtDiv]>;
+def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdeu $rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
+ isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdeu. $rT, $rA, $rB", IIC_IntDivD,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ isPPC64, Requires<[HasExtDiv]>;
+let isCommutable = 1 in
+defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
+ [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
+ "mulli $rD, $rA, $imm", IIC_IntMulLI,
+ [(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>;
+}
+
+let hasSideEffects = 0 in {
+defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
+ (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldimi", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+
+// Rotate instructions.
+defm RLDCL : MDSForm_1r<30, 8,
+ (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
+ "rldcl", "$rA, $rS, $rB, $MBE", IIC_IntRotateD,
+ []>, isPPC64;
+defm RLDCR : MDSForm_1r<30, 9,
+ (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
+ "rldcr", "$rA, $rS, $rB, $MBE", IIC_IntRotateD,
+ []>, isPPC64;
+defm RLDICL : MDForm_1r<30, 0,
+ (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
+// For fast-isel:
+let isCodeGenOnly = 1 in
+def RLDICL_32_64 : MDForm_1<30, 0,
+ (outs g8rc:$rA),
+ (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
+// End fast-isel.
+defm RLDICR : MDForm_1r<30, 1,
+ (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
+defm RLDIC : MDForm_1r<30, 2,
+ (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA),
+ (ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm", "$rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
+ []>;
+
+defm RLWNM8 : MForm_2r<23, (outs g8rc:$rA),
+ (ins g8rc:$rS, g8rc:$rB, u5imm:$MB, u5imm:$ME),
+ "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral,
+ []>;
+
+// RLWIMI can be commuted if the rotate amount is zero.
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA),
+ (ins g8rc:$rSi, g8rc:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME",
+ IIC_IntRotate, []>, PPC970_DGroup_Cracked,
+ RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">;
+
+let isSelect = 1 in
+def ISEL8 : AForm_4<31, 15,
+ (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond),
+ "isel $rT, $rA, $rB, $cond", IIC_IntISEL,
+ []>;
+} // Interpretation64Bit
+} // hasSideEffects = 0
+} // End FXU Operations.
+
+
+//===----------------------------------------------------------------------===//
+// Load/Store instructions.
+//
+
+
+// Sign extending loads.
+let PPC970_Unit = 2 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
+ "lha $rD, $src", IIC_LdStLHA,
+ [(set i64:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
+ "lwa $rD, $src", IIC_LdStLWA,
+ [(set i64:$rD,
+ (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+def LHAX8: XForm_1<31, 343, (outs g8rc:$rD), (ins memrr:$src),
+ "lhax $rD, $src", IIC_LdStLHA,
+ [(set i64:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src),
+ "lwax $rD, $src", IIC_LdStLHA,
+ [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+// For fast-isel:
+let isCodeGenOnly = 1, mayLoad = 1 in {
+def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
+ "lwa $rD, $src", IIC_LdStLWA, []>, isPPC64,
+ PPC970_DGroup_Cracked;
+def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src),
+ "lwax $rD, $src", IIC_LdStLHA, []>, isPPC64,
+ PPC970_DGroup_Cracked;
+} // end fast-isel isCodeGenOnly
+
+// Update forms.
+let mayLoad = 1, hasSideEffects = 0 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memri:$addr),
+ "lhau $rD, $addr", IIC_LdStLHAU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+// NO LWAU!
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+def LHAUX8 : XForm_1<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", IIC_LdStLHAUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lwaux $rD, $addr", IIC_LdStLHAUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
+}
+}
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+// Zero extending loads.
+let PPC970_Unit = 2 in {
+def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
+ "lbz $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi8 iaddr:$src))]>;
+def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src),
+ "lhz $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src),
+ "lwz $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+
+def LBZX8 : XForm_1<31, 87, (outs g8rc:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi8 xaddr:$src))]>;
+def LHZX8 : XForm_1<31, 279, (outs g8rc:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX8 : XForm_1<31, 23, (outs g8rc:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi32 xaddr:$src))]>;
+
+
+// Update forms.
+let mayLoad = 1, hasSideEffects = 0 in {
+def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LBZUX8 : XForm_1<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZUX8 : XForm_1<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+} // Interpretation64Bit
+
+
+// Full 8-byte loads.
+let PPC970_Unit = 2 in {
+def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
+ "ld $rD, $src", IIC_LdStLD,
+ [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
+// The following four definitions are selected for small code model only.
+// Otherwise, we need to create two instructions to form a 32-bit offset,
+// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
+def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+ "#LDtoc",
+ [(set i64:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+ "#LDtocJTI",
+ [(set i64:$rD,
+ (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
+def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+ "#LDtocCPT",
+ [(set i64:$rD,
+ (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
+def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+ "#LDtocCPT",
+ [(set i64:$rD,
+ (PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
+
+def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src),
+ "ldx $rD, $src", IIC_LdStLD,
+ [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+def LDBRX : XForm_1<31, 532, (outs g8rc:$rD), (ins memrr:$src),
+ "ldbrx $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
+
+let mayLoad = 1, hasSideEffects = 0, isCodeGenOnly = 1 in {
+def LHBRX8 : XForm_1<31, 790, (outs g8rc:$rD), (ins memrr:$src),
+ "lhbrx $rD, $src", IIC_LdStLoad, []>;
+def LWBRX8 : XForm_1<31, 534, (outs g8rc:$rD), (ins memrr:$src),
+ "lwbrx $rD, $src", IIC_LdStLoad, []>;
+}
+
+let mayLoad = 1, hasSideEffects = 0 in {
+def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
+ "ldu $rD, $addr", IIC_LdStLDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
+ NoEncode<"$ea_result">;
+
+def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "ldux $rD, $addr", IIC_LdStLDUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
+}
+}
+
+// Support for medium and large code model.
+let hasSideEffects = 0 in {
+def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+ "#ADDIStocHA", []>, isPPC64;
+let mayLoad = 1 in
+def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
+ "#LDtocL", []>, isPPC64;
+def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+ "#ADDItocL", []>, isPPC64;
+}
+
+// Support for thread-local storage.
+def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDISgotTprelHA",
+ [(set i64:$rD,
+ (PPCaddisGotTprelHA i64:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
+ "#LDgotTprelL",
+ [(set i64:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
+ isPPC64;
+def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
+ (ADD8TLS $in, tglobaltlsaddr:$g)>;
+def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDIStlsgdHA",
+ [(set i64:$rD,
+ (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDItlsgdL",
+ [(set i64:$rD,
+ (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ "#GETtlsADDR",
+ [(set i64:$rD,
+ (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+// Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
+ in
+def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
+ (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
+ "#ADDItlsgdLADDR",
+ [(set i64:$rD,
+ (PPCaddiTlsgdLAddr i64:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDIStlsldHA",
+ [(set i64:$rD,
+ (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDItlsldL",
+ [(set i64:$rD,
+ (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ "#GETtlsldADDR",
+ [(set i64:$rD,
+ (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+// Combined op for ADDItlsldL and GETtlsADDR, late expanded. X3 and LR8
+// are true defines, while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
+ in
+def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
+ (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
+ "#ADDItlsldLADDR",
+ [(set i64:$rD,
+ (PPCaddiTlsldLAddr i64:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDISdtprelHA",
+ [(set i64:$rD,
+ (PPCaddisDtprelHA i64:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDIdtprelL",
+ [(set i64:$rD,
+ (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+
+let PPC970_Unit = 2 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+// Truncating stores.
+def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src),
+ "stb $rS, $src", IIC_LdStStore,
+ [(truncstorei8 i64:$rS, iaddr:$src)]>;
+def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src),
+ "sth $rS, $src", IIC_LdStStore,
+ [(truncstorei16 i64:$rS, iaddr:$src)]>;
+def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src),
+ "stw $rS, $src", IIC_LdStStore,
+ [(truncstorei32 i64:$rS, iaddr:$src)]>;
+def STBX8 : XForm_8<31, 215, (outs), (ins g8rc:$rS, memrr:$dst),
+ "stbx $rS, $dst", IIC_LdStStore,
+ [(truncstorei8 i64:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX8 : XForm_8<31, 407, (outs), (ins g8rc:$rS, memrr:$dst),
+ "sthx $rS, $dst", IIC_LdStStore,
+ [(truncstorei16 i64:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX8 : XForm_8<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
+ "stwx $rS, $dst", IIC_LdStStore,
+ [(truncstorei32 i64:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+} // Interpretation64Bit
+
+// Normal 8-byte stores.
+def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
+ "std $rS, $dst", IIC_LdStSTD,
+ [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
+def STDX : XForm_8<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
+ "stdx $rS, $dst", IIC_LdStSTD,
+ [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
+ "stdbrx $rS, $dst", IIC_LdStStore,
+ [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+// Stores with Update (pre-inc).
+let PPC970_Unit = 2, mayStore = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
+ "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
+ "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
+ "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
+ "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
+ "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
+ "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+} // Interpretation64Bit
+
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst),
+ "stdu $rS, $dst", IIC_LdStSTDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
+ "stdux $rS, $dst", IIC_LdStSTDUX, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked, isPPC64;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STDU $rS, iaddroff:$ptroff, $ptrreg)>;
+
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STDUX $rS, $ptrreg, $ptroff)>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating point instructions.
+//
+
+
+let PPC970_Unit = 3, hasSideEffects = 0,
+ Uses = [RM] in { // FPU Operations.
+defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fcfid", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctid", "$frD, $frB", IIC_FPGeneral,
+ []>, isPPC64;
+defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctidz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+
+defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fcfidu", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB),
+ "fcfids", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB),
+ "fcfidus", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiduz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwuz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Patterns
+//
+
+// Extensions and truncates to/from 32-bit regs.
+def : Pat<(i64 (zext i32:$in)),
+ (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
+ 0, 32)>;
+def : Pat<(i64 (anyext i32:$in)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>;
+def : Pat<(i32 (trunc i64:$in)),
+ (EXTRACT_SUBREG $in, sub_32)>;
+
+// Implement the 'not' operation with the NOR instruction.
+// (we could use the default xori pattern, but nor has lower latency on some
+// cores (such as the A2)).
+def i64not : OutPatFrag<(ops node:$in),
+ (NOR8 $in, $in)>;
+def : Pat<(not i64:$in),
+ (i64not $in)>;
+
+// Extending loads with i64 targets.
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ8 iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX8 xaddr:$src)>;
+def : Pat<(extloadi32 iaddr:$src),
+ (LWZ8 iaddr:$src)>;
+def : Pat<(extloadi32 xaddr:$src),
+ (LWZX8 xaddr:$src)>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
+// amounts.
+def : Pat<(sra i64:$rS, i32:$rB),
+ (SRAD $rS, $rB)>;
+def : Pat<(srl i64:$rS, i32:$rB),
+ (SRD $rS, $rB)>;
+def : Pat<(shl i64:$rS, i32:$rB),
+ (SLD $rS, $rB)>;
+
+// SHL/SRL
+def : Pat<(shl i64:$in, (i32 imm:$imm)),
+ (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>;
+
+// ROTL
+def : Pat<(rotl i64:$in, i32:$sh),
+ (RLDCL $in, $sh, 0)>;
+def : Pat<(rotl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, imm:$imm, 0)>;
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI8 tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in , 0), (LI8 tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in),
+ (ADDIS8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in),
+ (ADDI8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS8 $in, tglobaladdr:$g)>;
+def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS8 $in, tconstpool:$g)>;
+def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS8 $in, tjumptable:$g)>;
+def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS8 $in, tblockaddress:$g)>;
+
+// Patterns to match r+r indexed loads and stores for
+// addresses without at least 4-byte alignment.
+def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+ (LWAX xoaddr:$src)>;
+def : Pat<(i64 (unaligned4load xoaddr:$src)),
+ (LDX xoaddr:$src)>;
+def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
+ (STDX $rS, xoaddr:$dst)>;
+
+// 64-bits atomic loads and stores
+def : Pat<(atomic_load_64 ixaddr:$src), (LD memrix:$src)>;
+def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>;
+
+def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>;
+def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
new file mode 100644
index 0000000..cb0271f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -0,0 +1,1215 @@
+//===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Altivec extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+// *********************************** NOTE ***********************************
+// ** For POWER8 Little Endian, the VSX swap optimization relies on knowing **
+// ** which VMX and VSX instructions are lane-sensitive and which are not. **
+// ** A lane-sensitive instruction relies, implicitly or explicitly, on **
+// ** whether lanes are numbered from left to right. An instruction like **
+// ** VADDFP is not lane-sensitive, because each lane of the result vector **
+// ** relies only on the corresponding lane of the source vectors. However, **
+// ** an instruction like VMULESB is lane-sensitive, because "even" and **
+// ** "odd" lanes are different for big-endian and little-endian numbering. **
+// ** **
+// ** When adding new VMX and VSX instructions, please consider whether they **
+// ** are lane-sensitive. If so, they must be added to a switch statement **
+// ** in PPCVSXSwapRemoval::gatherVectorInstructions(). **
+// ****************************************************************************
+
+//===----------------------------------------------------------------------===//
+// Altivec transformation functions and pattern fragments.
+//
+
+// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be
+// of that type.
+def vnot_ppc : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+
+def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
+}]>;
+def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
+}]>;
+def vpkudum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
+}]>;
+def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
+}]>;
+def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
+}]>;
+def vpkudum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
+}]>;
+
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vpkuhum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
+def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
+def vpkudum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
+
+def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
+}]>;
+def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
+}]>;
+def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
+}]>;
+def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
+}]>;
+def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
+}]>;
+def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
+}]>;
+
+
+def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
+}]>;
+def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
+}]>;
+def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
+}]>;
+def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
+}]>;
+def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
+}]>;
+def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
+}]>;
+
+
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vmrglb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrglh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrglw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
+}]>;
+def vmrghb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrghh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
+}]>;
+
+
+def vmrgew_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 0, *CurDAG);
+}]>;
+def vmrgow_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 0, *CurDAG);
+}]>;
+def vmrgew_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 1, *CurDAG);
+}]>;
+def vmrgow_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 1, *CurDAG);
+}]>;
+def vmrgew_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 2, *CurDAG);
+}]>;
+def vmrgow_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 2, *CurDAG);
+}]>;
+
+
+
+def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG), SDLoc(N));
+}]>;
+def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, 0, *CurDAG) != -1;
+}], VSLDOI_get_imm>;
+
+
+/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
+/// vector_shuffle(X,undef,mask) by the dag combiner.
+def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG), SDLoc(N));
+}]>;
+def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, 1, *CurDAG) != -1;
+}], VSLDOI_unary_get_imm>;
+
+
+/// VSLDOI_swapped* - These fragments are provided for little-endian, where
+/// the inputs must be swapped for correct semantics.
+def VSLDOI_swapped_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG), SDLoc(N));
+}]>;
+def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1;
+}], VSLDOI_get_imm>;
+
+
+// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
+def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG), SDLoc(N));
+}]>;
+def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
+}], VSPLTB_get_imm>;
+def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG), SDLoc(N));
+}]>;
+def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
+}], VSPLTH_get_imm>;
+def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG), SDLoc(N));
+}]>;
+def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
+}], VSPLTW_get_imm>;
+
+
+// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
+def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG);
+}]>;
+def vecspltisb : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0;
+}], VSPLTISB_get_imm>;
+
+// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
+def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG);
+}]>;
+def vecspltish : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0;
+}], VSPLTISH_get_imm>;
+
+// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
+def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG);
+}]>;
+def vecspltisw : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
+}], VSPLTISW_get_imm>;
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
+class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
+
+// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
+
+// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two
+// input types and an output type.
+class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
+ [(set OutTy:$vD,
+ (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
+
+// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
+class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
+
+// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
+
+// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two
+// input types and an output type.
+class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
+ [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
+
+// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
+class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
+ !strconcat(opc, " $vD, $vB"), IIC_VecFP,
+ [(set v4f32:$vD, (IntID v4f32:$vB))]>;
+
+// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
+ !strconcat(opc, " $vD, $vB"), IIC_VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vB))]>;
+
+class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA),
+ !strconcat(opc, " $vD, $vA"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA))]>;
+
+class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
+ !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">;
+let Predicates = [HasAltivec] in {
+
+def DSS : DSS_Form<0, 822, (outs), (ins u5imm:$STRM),
+ "dss $STRM", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dss imm:$STRM)]>,
+ Deprecated<DeprecatedDST> {
+ let A = 0;
+ let B = 0;
+}
+
+def DSSALL : DSS_Form<1, 822, (outs), (ins),
+ "dssall", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dssall)]>,
+ Deprecated<DeprecatedDST> {
+ let STRM = 0;
+ let A = 0;
+ let B = 0;
+}
+
+def DST : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+def DSTT : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+def DSTST : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+def DSTSTT : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+let isCodeGenOnly = 1 in {
+ // The very same instructions as above, but formally matching 64bit registers.
+ def DST64 : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTT64 : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTST64 : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstst i64:$rA, i32:$rB,
+ imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTSTT64 : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dststt i64:$rA, i32:$rB,
+ imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+}
+
+def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
+ "mfvscr $vD", IIC_LdStStore,
+ [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
+def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
+ "mtvscr $vB", IIC_LdStLoad,
+ [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
+
+let PPC970_Unit = 2 in { // Loads.
+def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
+ "lvebx $vD, $src", IIC_LdStLoad,
+ [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+def LVEHX: XForm_1<31, 39, (outs vrrc:$vD), (ins memrr:$src),
+ "lvehx $vD, $src", IIC_LdStLoad,
+ [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+def LVEWX: XForm_1<31, 71, (outs vrrc:$vD), (ins memrr:$src),
+ "lvewx $vD, $src", IIC_LdStLoad,
+ [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+def LVX : XForm_1<31, 103, (outs vrrc:$vD), (ins memrr:$src),
+ "lvx $vD, $src", IIC_LdStLoad,
+ [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+def LVXL : XForm_1<31, 359, (outs vrrc:$vD), (ins memrr:$src),
+ "lvxl $vD, $src", IIC_LdStLoad,
+ [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+}
+
+def LVSL : XForm_1<31, 6, (outs vrrc:$vD), (ins memrr:$src),
+ "lvsl $vD, $src", IIC_LdStLoad,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src),
+ "lvsr $vD, $src", IIC_LdStLoad,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+
+let PPC970_Unit = 2 in { // Stores.
+def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
+ "stvebx $rS, $dst", IIC_LdStStore,
+ [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
+def STVEHX: XForm_8<31, 167, (outs), (ins vrrc:$rS, memrr:$dst),
+ "stvehx $rS, $dst", IIC_LdStStore,
+ [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
+def STVEWX: XForm_8<31, 199, (outs), (ins vrrc:$rS, memrr:$dst),
+ "stvewx $rS, $dst", IIC_LdStStore,
+ [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
+def STVX : XForm_8<31, 231, (outs), (ins vrrc:$rS, memrr:$dst),
+ "stvx $rS, $dst", IIC_LdStStore,
+ [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
+def STVXL : XForm_8<31, 487, (outs), (ins vrrc:$rS, memrr:$dst),
+ "stvxl $rS, $dst", IIC_LdStStore,
+ [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
+}
+
+let PPC970_Unit = 5 in { // VALU Operations.
+// VA-Form instructions. 3-input AltiVec ops.
+let isCommutable = 1 in {
+def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
+ "vmaddfp $vD, $vA, $vC, $vB", IIC_VecFP,
+ [(set v4f32:$vD,
+ (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
+
+// FIXME: The fma+fneg pattern won't match because fneg is not legal.
+def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
+ "vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP,
+ [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
+ (fneg v4f32:$vB))))]>;
+
+def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
+def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
+ v8i16>;
+def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
+} // isCommutable
+
+def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
+ v4i32, v4i32, v16i8>;
+def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
+
+// Shuffles.
+def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH),
+ "vsldoi $vD, $vA, $vB, $SH", IIC_VecFP,
+ [(set v16i8:$vD,
+ (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
+
+// VX-Form instructions. AltiVec arithmetic ops.
+let isCommutable = 1 in {
+def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vaddfp $vD, $vA, $vB", IIC_VecFP,
+ [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
+
+def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vaddubm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
+def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vadduhm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
+def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vadduwm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
+
+def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
+def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
+def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>;
+def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>;
+def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>;
+def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
+def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
+} // isCommutable
+
+let isCommutable = 1 in
+def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vand $vD, $vA, $vB", IIC_VecFP,
+ [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
+def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vandc $vD, $vA, $vB", IIC_VecFP,
+ [(set v4i32:$vD, (and v4i32:$vA,
+ (vnot_ppc v4i32:$vB)))]>;
+
+def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vcfsx $vD, $vB, $UIMM", IIC_VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
+def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vcfux $vD, $vB, $UIMM", IIC_VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
+def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vctsxs $vD, $vB, $UIMM", IIC_VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
+def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vctuxs $vD, $vB, $UIMM", IIC_VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
+
+// Defines with the UIM field set to 0 for floating-point
+// to integer (fp_to_sint/fp_to_uint) conversions and integer
+// to floating-point (sint_to_fp/uint_to_fp) conversions.
+let isCodeGenOnly = 1, VA = 0 in {
+def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vcfsx $vD, $vB, 0", IIC_VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
+def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vctuxs $vD, $vB, 0", IIC_VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
+def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vcfux $vD, $vB, 0", IIC_VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
+def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vctsxs $vD, $vB, 0", IIC_VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
+}
+def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>;
+
+let isCommutable = 1 in {
+def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>;
+def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>;
+def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>;
+def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>;
+def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>;
+def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>;
+
+def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>;
+def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>;
+def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>;
+def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>;
+def VMAXUB : VX1_Int_Ty< 2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>;
+def VMAXUH : VX1_Int_Ty< 66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>;
+def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>;
+def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>;
+def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>;
+def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>;
+def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>;
+def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
+def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
+def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
+} // isCommutable
+
+def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrghb $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrghh $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrghw $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrglb $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrglh $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrglw $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
+
+def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
+ v4i32, v16i8, v4i32>;
+def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm,
+ v4i32, v8i16, v4i32>;
+def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
+ v4i32, v8i16, v4i32>;
+def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm,
+ v4i32, v16i8, v4i32>;
+def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
+ v4i32, v8i16, v4i32>;
+def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
+ v4i32, v8i16, v4i32>;
+
+let isCommutable = 1 in {
+def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
+ v8i16, v16i8>;
+def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh,
+ v4i32, v8i16>;
+def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub,
+ v8i16, v16i8>;
+def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh,
+ v4i32, v8i16>;
+def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb,
+ v8i16, v16i8>;
+def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh,
+ v4i32, v8i16>;
+def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub,
+ v8i16, v16i8>;
+def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
+ v4i32, v8i16>;
+} // isCommutable
+
+def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>;
+def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>;
+def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>;
+def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>;
+def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+
+def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
+
+def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubfp $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
+def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsububm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
+def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubuhm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
+def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubuwm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
+
+def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
+def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
+def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>;
+def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>;
+def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>;
+def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
+
+def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
+def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
+
+def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+ v4i32, v16i8, v4i32>;
+def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
+ v4i32, v8i16, v4i32>;
+def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
+ v4i32, v16i8, v4i32>;
+
+def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vnor $vD, $vA, $vB", IIC_VecFP,
+ [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
+ v4i32:$vB)))]>;
+let isCommutable = 1 in {
+def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vor $vD, $vA, $vB", IIC_VecFP,
+ [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
+def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vxor $vD, $vA, $vB", IIC_VecFP,
+ [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
+} // isCommutable
+
+def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>;
+def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>;
+def VRLW : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>;
+
+def VSL : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl, v4i32 >;
+def VSLO : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>;
+
+def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
+def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
+def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
+
+def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vspltb $vD, $vB, $UIMM", IIC_VecPerm,
+ [(set v16i8:$vD,
+ (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vsplth $vD, $vB, $UIMM", IIC_VecPerm,
+ [(set v16i8:$vD,
+ (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vspltw $vD, $vB, $UIMM", IIC_VecPerm,
+ [(set v16i8:$vD,
+ (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+
+def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>;
+def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>;
+
+def VSRAB : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>;
+def VSRAH : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>;
+def VSRAW : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>;
+def VSRB : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>;
+def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
+def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
+
+
+def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM),
+ "vspltisb $vD, $SIMM", IIC_VecPerm,
+ [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
+def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM),
+ "vspltish $vD, $SIMM", IIC_VecPerm,
+ [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
+def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM),
+ "vspltisw $vD, $SIMM", IIC_VecPerm,
+ [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
+
+// Vector Pack.
+def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx,
+ v8i16, v4i32>;
+def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
+ v16i8, v8i16>;
+def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
+ v16i8, v8i16>;
+def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
+ v16i8, v4i32>;
+def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
+ v8i16, v4i32>;
+def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vpkuhum $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD,
+ (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
+ v16i8, v8i16>;
+def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vpkuwum $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD,
+ (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
+ v8i16, v4i32>;
+
+// Vector Unpack.
+def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx,
+ v4i32, v8i16>;
+def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb,
+ v8i16, v16i8>;
+def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh,
+ v4i32, v8i16>;
+def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx,
+ v4i32, v8i16>;
+def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb,
+ v8i16, v16i8>;
+def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
+ v4i32, v8i16>;
+
+
+// Altivec Comparisons.
+
+class VCMP<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
+ IIC_VecFPCompare,
+ [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
+class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
+ IIC_VecFPCompare,
+ [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
+ let Defs = [CR6];
+ let RC = 1;
+}
+
+// f32 element comparisons.0
+def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
+def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
+def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
+def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
+def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+
+// i8 element comparisons.
+def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
+def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
+def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
+def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+
+// i16 element comparisons.
+def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
+def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
+def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
+def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+
+// i32 element comparisons.
+def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
+def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
+def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
+def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+
+let isCodeGenOnly = 1 in {
+def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
+ "vxor $vD, $vD, $vD", IIC_VecFP,
+ [(set v16i8:$vD, (v16i8 immAllZerosV))]>;
+def V_SET0H : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
+ "vxor $vD, $vD, $vD", IIC_VecFP,
+ [(set v8i16:$vD, (v8i16 immAllZerosV))]>;
+def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
+ "vxor $vD, $vD, $vD", IIC_VecFP,
+ [(set v4i32:$vD, (v4i32 immAllZerosV))]>;
+
+let IMM=-1 in {
+def V_SETALLONESB : VXForm_3<908, (outs vrrc:$vD), (ins),
+ "vspltisw $vD, -1", IIC_VecFP,
+ [(set v16i8:$vD, (v16i8 immAllOnesV))]>;
+def V_SETALLONESH : VXForm_3<908, (outs vrrc:$vD), (ins),
+ "vspltisw $vD, -1", IIC_VecFP,
+ [(set v8i16:$vD, (v8i16 immAllOnesV))]>;
+def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
+ "vspltisw $vD, -1", IIC_VecFP,
+ [(set v4i32:$vD, (v4i32 immAllOnesV))]>;
+}
+}
+} // VALU Operations.
+
+//===----------------------------------------------------------------------===//
+// Additional Altivec Patterns
+//
+
+// Loads.
+def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store v4i32:$rS, xoaddr:$dst),
+ (STVX $rS, xoaddr:$dst)>;
+
+// Bit conversions.
+def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v1i128 VRRC:$src))), (v16i8 VRRC:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v1i128 VRRC:$src))), (v8i16 VRRC:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v1i128 VRRC:$src))), (v4i32 VRRC:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v1i128 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VRRC:$src))), (v2i64 VRRC:$src)>;
+def : Pat<(v2i64 (bitconvert (v1i128 VRRC:$src))), (v2i64 VRRC:$src)>;
+
+def : Pat<(v1i128 (bitconvert (v16i8 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v8i16 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
+def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
+
+// Shuffles.
+
+// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
+def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef),
+ (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm $in))>;
+def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
+ (VPKUWUM $vA, $vA)>;
+def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
+ (VPKUHUM $vA, $vA)>;
+
+// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands.
+// These fragments are matched for little-endian, where the inputs must
+// be swapped for correct semantics.
+def:Pat<(vsldoi_swapped_shuffle:$in v16i8:$vA, v16i8:$vB),
+ (VSLDOI $vB, $vA, (VSLDOI_swapped_get_imm $in))>;
+def:Pat<(vpkuwum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUWUM $vB, $vA)>;
+def:Pat<(vpkuhum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUHUM $vB, $vA)>;
+
+// Match vmrg*(x,x)
+def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
+ (VMRGLB $vA, $vA)>;
+def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef),
+ (VMRGLH $vA, $vA)>;
+def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef),
+ (VMRGLW $vA, $vA)>;
+def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef),
+ (VMRGHB $vA, $vA)>;
+def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
+ (VMRGHH $vA, $vA)>;
+def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
+ (VMRGHW $vA, $vA)>;
+
+// Match vmrg*(y,x), i.e., swapped operands. These fragments
+// are matched for little-endian, where the inputs must be
+// swapped for correct semantics.
+def:Pat<(vmrglb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLB $vB, $vA)>;
+def:Pat<(vmrglh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLH $vB, $vA)>;
+def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLW $vB, $vA)>;
+def:Pat<(vmrghb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHB $vB, $vA)>;
+def:Pat<(vmrghh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHH $vB, $vA)>;
+def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHW $vB, $vA)>;
+
+// Logical Operations
+def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>;
+
+def : Pat<(vnot_ppc (or v4i32:$A, v4i32:$B)),
+ (VNOR $A, $B)>;
+def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)),
+ (VANDC $A, $B)>;
+
+def : Pat<(fmul v4f32:$vA, v4f32:$vB),
+ (VMADDFP $vA, $vB,
+ (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>;
+
+// Fused multiply add and multiply sub for packed float. These are represented
+// separately from the real instructions above, for operations that must have
+// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
+def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
+
+def : Pat<(PPCvperm v16i8:$vA, v16i8:$vB, v16i8:$vC),
+ (VPERM $vA, $vB, $vC)>;
+
+def : Pat<(PPCfre v4f32:$A), (VREFP $A)>;
+def : Pat<(PPCfrsqrte v4f32:$A), (VRSQRTEFP $A)>;
+
+// Vector shifts
+def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSLB $vA, $vB))>;
+def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSLH $vA, $vB))>;
+def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSLW $vA, $vB))>;
+
+def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRB $vA, $vB))>;
+def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRH $vA, $vB))>;
+def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRW $vA, $vB))>;
+
+def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRAB $vA, $vB))>;
+def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRAH $vA, $vB))>;
+def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRAW $vA, $vB))>;
+
+// Float to integer and integer to float conversions
+def : Pat<(v4i32 (fp_to_sint v4f32:$vA)),
+ (VCTSXS_0 $vA)>;
+def : Pat<(v4i32 (fp_to_uint v4f32:$vA)),
+ (VCTUXS_0 $vA)>;
+def : Pat<(v4f32 (sint_to_fp v4i32:$vA)),
+ (VCFSX_0 $vA)>;
+def : Pat<(v4f32 (uint_to_fp v4i32:$vA)),
+ (VCFUX_0 $vA)>;
+
+// Floating-point rounding
+def : Pat<(v4f32 (ffloor v4f32:$vA)),
+ (VRFIM $vA)>;
+def : Pat<(v4f32 (fceil v4f32:$vA)),
+ (VRFIP $vA)>;
+def : Pat<(v4f32 (ftrunc v4f32:$vA)),
+ (VRFIZ $vA)>;
+def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
+ (VRFIN $vA)>;
+
+} // end HasAltivec
+
+def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
+def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
+let Predicates = [HasP8Altivec] in {
+
+let isCommutable = 1 in {
+def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw,
+ v2i64, v4i32>;
+def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw,
+ v2i64, v4i32>;
+def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw,
+ v2i64, v4i32>;
+def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw,
+ v2i64, v4i32>;
+def VMULUWM : VXForm_1<137, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmuluwm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (mul v4i32:$vA, v4i32:$vB))]>;
+def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>;
+def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>;
+def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
+def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
+} // isCommutable
+
+// Vector merge
+def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrgew $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrgew_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGOW : VXForm_1<1676, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrgow $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrgow_shuffle v16i8:$vA, v16i8:$vB))]>;
+
+// Match vmrgew(x,x) and vmrgow(x,x)
+def:Pat<(vmrgew_unary_shuffle v16i8:$vA, undef),
+ (VMRGEW $vA, $vA)>;
+def:Pat<(vmrgow_unary_shuffle v16i8:$vA, undef),
+ (VMRGOW $vA, $vA)>;
+
+// Match vmrgew(y,x) and vmrgow(y,x), i.e., swapped operands. These fragments
+// are matched for little-endian, where the inputs must be swapped for correct
+// semantics.w
+def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGEW $vB, $vA)>;
+def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGOW $vB, $vA)>;
+
+
+// Vector shifts
+def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
+def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsld $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>;
+def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsrd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>;
+def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsrad $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>;
+
+// Vector Integer Arithmetic Instructions
+let isCommutable = 1 in {
+def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vaddudm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>;
+def VADDUQM : VXForm_1<256, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vadduqm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (add v1i128:$vA, v1i128:$vB))]>;
+} // isCommutable
+
+// Vector Quadword Add
+def VADDEUQM : VA1a_Int_Ty<60, "vaddeuqm", int_ppc_altivec_vaddeuqm, v1i128>;
+def VADDCUQ : VX1_Int_Ty<320, "vaddcuq", int_ppc_altivec_vaddcuq, v1i128>;
+def VADDECUQ : VA1a_Int_Ty<61, "vaddecuq", int_ppc_altivec_vaddecuq, v1i128>;
+
+// Vector Doubleword Subtract
+def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubudm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>;
+
+// Vector Quadword Subtract
+def VSUBUQM : VXForm_1<1280, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubuqm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (sub v1i128:$vA, v1i128:$vB))]>;
+def VSUBEUQM : VA1a_Int_Ty<62, "vsubeuqm", int_ppc_altivec_vsubeuqm, v1i128>;
+def VSUBCUQ : VX1_Int_Ty<1344, "vsubcuq", int_ppc_altivec_vsubcuq, v1i128>;
+def VSUBECUQ : VA1a_Int_Ty<63, "vsubecuq", int_ppc_altivec_vsubecuq, v1i128>;
+
+// Count Leading Zeros
+def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vclzb $vD, $vB", IIC_VecGeneral,
+ [(set v16i8:$vD, (ctlz v16i8:$vB))]>;
+def VCLZH : VXForm_2<1858, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vclzh $vD, $vB", IIC_VecGeneral,
+ [(set v8i16:$vD, (ctlz v8i16:$vB))]>;
+def VCLZW : VXForm_2<1922, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vclzw $vD, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (ctlz v4i32:$vB))]>;
+def VCLZD : VXForm_2<1986, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vclzd $vD, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (ctlz v2i64:$vB))]>;
+
+// Population Count
+def VPOPCNTB : VXForm_2<1795, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vpopcntb $vD, $vB", IIC_VecGeneral,
+ [(set v16i8:$vD, (ctpop v16i8:$vB))]>;
+def VPOPCNTH : VXForm_2<1859, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vpopcnth $vD, $vB", IIC_VecGeneral,
+ [(set v8i16:$vD, (ctpop v8i16:$vB))]>;
+def VPOPCNTW : VXForm_2<1923, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vpopcntw $vD, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (ctpop v4i32:$vB))]>;
+def VPOPCNTD : VXForm_2<1987, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vpopcntd $vD, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (ctpop v2i64:$vB))]>;
+
+let isCommutable = 1 in {
+// FIXME: Use AddedComplexity > 400 to ensure these patterns match before the
+// VSX equivalents. We need to fix this up at some point. Two possible
+// solutions for this problem:
+// 1. Disable Altivec patterns that compete with VSX patterns using the
+// !HasVSX predicate. This essentially favours VSX over Altivec, in
+// hopes of reducing register pressure (larger register set using VSX
+// instructions than VMX instructions)
+// 2. Employ a more disciplined use of AddedComplexity, which would provide
+// more fine-grained control than option 1. This would be beneficial
+// if we find situations where Altivec is really preferred over VSX.
+def VEQV : VXForm_1<1668, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "veqv $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (vnot_ppc (xor v4i32:$vA, v4i32:$vB)))]>;
+def VNAND : VXForm_1<1412, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vnand $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (vnot_ppc (and v4i32:$vA, v4i32:$vB)))]>;
+} // isCommutable
+
+def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vorc $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (or v4i32:$vA,
+ (vnot_ppc v4i32:$vB)))]>;
+
+// i64 element comparisons.
+def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
+def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
+def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
+def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+
+// The cryptography instructions that do not require Category:Vector.Crypto
+def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
+ int_ppc_altivec_crypto_vpmsumb, v16i8>;
+def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh",
+ int_ppc_altivec_crypto_vpmsumh, v8i16>;
+def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
+ int_ppc_altivec_crypto_vpmsumw, v4i32>;
+def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
+ int_ppc_altivec_crypto_vpmsumd, v2i64>;
+def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
+ int_ppc_altivec_crypto_vpermxor, v16i8>;
+
+// Vector doubleword integer pack and unpack.
+def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss,
+ v4i32, v2i64>;
+def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus,
+ v4i32, v2i64>;
+def VPKUDUM : VXForm_1<1102, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vpkudum $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD,
+ (vpkudum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus,
+ v4i32, v2i64>;
+def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw,
+ v2i64, v4i32>;
+def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw,
+ v2i64, v4i32>;
+
+// Shuffle patterns for unary and swapped (LE) vector pack modulo.
+def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef),
+ (VPKUDUM $vA, $vA)>;
+def:Pat<(vpkudum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUDUM $vB, $vA)>;
+
+def VGBBD : VX2_Int_Ty2<1292, "vgbbd", int_ppc_altivec_vgbbd, v16i8, v16i8>;
+def VBPERMQ : VX1_Int_Ty2<1356, "vbpermq", int_ppc_altivec_vbpermq,
+ v2i64, v16i8>;
+} // end HasP8Altivec
+
+// Crypto instructions (from builtins)
+let Predicates = [HasP8Crypto] in {
+def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw",
+ int_ppc_altivec_crypto_vshasigmaw, v4i32>;
+def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad",
+ int_ppc_altivec_crypto_vshasigmad, v2i64>;
+def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher,
+ v2i64>;
+def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast",
+ int_ppc_altivec_crypto_vcipherlast, v2i64>;
+def VNCIPHER : VX1_Int_Ty<1352, "vncipher",
+ int_ppc_altivec_crypto_vncipher, v2i64>;
+def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast",
+ int_ppc_altivec_crypto_vncipherlast, v2i64>;
+def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
+} // HasP8Crypto
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
new file mode 100644
index 0000000..cf71b1c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -0,0 +1,43 @@
+//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H
+#define LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
new file mode 100644
index 0000000..4e03ed2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -0,0 +1,1675 @@
+//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// PowerPC instruction formats
+
+class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+ let Size = 4;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+
+ // Fields used for relation models.
+ string BaseName = "";
+
+ // For cases where multiple instruction definitions really represent the
+ // same underlying instruction but with one definition for 64-bit arguments
+ // and one for 32-bit arguments, this bit breaks the degeneracy between
+ // the two forms and allows TableGen to generate mapping tables.
+ bit Interpretation64Bit = 0;
+}
+
+class PPC970_DGroup_First { bits<1> PPC970_First = 1; }
+class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; }
+class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; }
+class PPC970_MicroCode;
+
+class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; }
+class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; }
+class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; }
+class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; }
+class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; }
+class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
+class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
+class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
+
+// Two joined instructions; used to emit two adjacent instructions as one.
+// The itinerary from the first instruction is used for scheduling and
+// classification.
+class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : Instruction {
+ field bits<64> Inst;
+ field bits<64> SoftFail = 0;
+ let Size = 8;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode1;
+ let Inst{32-37} = opcode2;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+
+ // Fields used for relation models.
+ string BaseName = "";
+ bit Interpretation64Bit = 0;
+}
+
+// 1.7.1 I-Form
+class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.2 B-Form
+class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+ bits<14> BD;
+
+ bits<5> BI;
+ let BI{0-1} = BIBO{5-6};
+ let BI{2-4} = CR{0-2};
+
+ let Inst{6-10} = BIBO{4-0};
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
+ string asmstr>
+ : BForm<opcode, aa, lk, OOL, IOL, asmstr> {
+ let BIBO{4-0} = bo;
+ let BIBO{6-5} = 0;
+ let CR = 0;
+}
+
+class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = bi;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+class BForm_3<bits<6> opcode, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<14> BD;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+class BForm_4<bits<6> opcode, bits<5> bo, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
+ bits<5> BI;
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.3 SC-Form
+class SCForm<bits<6> opcode, bits<1> xo,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<7> LEV;
+
+ let Pattern = pattern;
+
+ let Inst{20-26} = LEV;
+ let Inst{30} = xo;
+}
+
+// 1.7.4 D-Form
+class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<21> Addr;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = Addr{20-16}; // Base Reg
+ let Inst{16-31} = Addr{15-0}; // Displacement
+}
+
+class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> C;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+
+class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> {
+
+ // Even though ADDICo does not really have an RC bit, provide
+ // the declaration of one here so that isDOT has something to set.
+ bit RC = 0;
+}
+
+class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = 0;
+ let Inst{16-31} = B;
+}
+
+class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> B;
+ bits<5> A;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let Addr = 0;
+}
+
+class DForm_4_fixedreg_zero<bits<6> opcode, bits<5> R, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : DForm_4<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = R;
+ let B = R;
+ let C = 0;
+}
+
+class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<21> Addr;
+
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+
+ let Inst{38-42} = A;
+ let Inst{43-47} = Addr{20-16}; // Base Reg
+ let Inst{48-63} = Addr{15-0}; // Displacement
+}
+
+// This is used to emit BL8+NOP.
+class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : IForm_and_DForm_1<opcode1, aa, lk, opcode2,
+ OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let Addr = 0;
+}
+
+class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<16> I;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-31} = I;
+}
+
+class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class DForm_6<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin>;
+
+class DForm_6_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_6<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+
+// 1.7.5 DS-Form
+class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<19> DS_RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = DS_RA{18-14}; // Register #
+ let Inst{16-29} = DS_RA{13-0}; // Displacement.
+ let Inst{30-31} = xo;
+}
+
+
+// 1.7.6 X-Form
+class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_tlb<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : XForm_base_r3xo<31, xo, OOL, IOL, asmstr, itin, []> {
+ let RST = 0;
+}
+
+class XForm_attn<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Inst{21-30} = xo;
+}
+
+// This is the same as XForm_base_r3xo, but the first two operands are swapped
+// when code is emitted.
+class XForm_base_r3xo_swapped
+ <bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RST;
+ bits<5> B;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+
+class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+}
+
+class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let B = 0;
+}
+
+class XForm_tlbws<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<1> WS;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{20} = WS;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let B = 0;
+ let Pattern = pattern;
+}
+
+class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_icbt<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<4> CT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6} = 0;
+ let Inst{7-10} = CT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_sr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<4> SR;
+
+ let Inst{6-10} = RS;
+ let Inst{12-15} = SR;
+ let Inst{21-30} = xo;
+}
+
+class XForm_mbar<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> MO;
+
+ let Inst{6-10} = MO;
+ let Inst{21-30} = xo;
+}
+
+class XForm_srin<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<5> RB;
+
+ let Inst{6-10} = RS;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+}
+
+class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<1> L;
+
+ let Inst{6-10} = RS;
+ let Inst{15} = L;
+ let Inst{21-30} = xo;
+}
+
+class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<5> FRA;
+ bits<5> FRB;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// Used for QPX
+class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_19<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_18<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRA = 0;
+}
+
+class XForm_20<bits<6> opcode, bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRB;
+ bits<4> tttt;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-24} = tttt;
+ let Inst{25-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 31;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<2> L;
+
+ let Pattern = pattern;
+ let Inst{6-8} = 0;
+ let Inst{9-10} = L;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24_eieio<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XForm_24_sync<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let L = 0;
+}
+
+class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+}
+
+class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// This is used for MFFS, MTFSB0, MTFSB1. 42 is arbitrary; this series of
+// numbers presumably relates to some document, but I haven't found it.
+class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+ bits<5> FM;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FM;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+ let A = 0;
+ let B = 0;
+}
+
+class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+ let A = 0;
+}
+
+class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit R;
+
+ bit RC = 1;
+
+ let Inst{6-9} = 0;
+ let Inst{10} = R;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit A;
+
+ bit RC = 1;
+
+ let Inst{6} = A;
+ let Inst{7-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit L;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{7-9} = 0;
+ let Inst{10} = L;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+
+ bit RC = 0;
+
+ let Inst{6-8} = BF;
+ let Inst{9-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// XX*-Form (VSX)
+class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = XT{5};
+}
+
+class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let B = 0;
+}
+
+class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX2Form_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-13} = 0;
+ let Inst{14-15} = D;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX3Form_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = 0;
+ let Inst{22-23} = D;
+ let Inst{24-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = RC;
+ let Inst{22-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX4Form<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-25} = XC{4-0};
+ let Inst{26-27} = xo;
+ let Inst{28} = XC{5};
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+// DCB_Form - Form X instruction, used for dcb* instructions.
+class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = immfield;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class DCB_Form_hint<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<5> TH;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = TH;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// DSS_Form - Form X instruction, used for altivec dss* instructions.
+class DSS_Form<bits<1> T, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<2> STRM;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6} = T;
+ let Inst{7-8} = 0;
+ let Inst{9-10} = STRM;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.7 XL-Form
+class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+ bits<5> CRA;
+ bits<5> CRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRA;
+ let Inst{16-20} = CRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRD;
+ let Inst{16-20} = CRD;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<2> BH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = BH;
+ let Inst{21-30} = xo;
+ let Inst{31} = lk;
+}
+
+class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+
+ let BO = BIBO{4-0};
+ let BI{0-1} = BIBO{5-6};
+ let BI{2-4} = CR{0-2};
+ let BH = 0;
+}
+
+class XLForm_2_br2<bits<6> opcode, bits<10> xo, bits<5> bo, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BH = 0;
+}
+
+class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BI = bi;
+ let BH = 0;
+}
+
+class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<3> BFA;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-13} = BFA;
+ let Inst{14-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_4<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bit W;
+ bits<4> U;
+
+ bit RC = 0;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-14} = 0;
+ let Inst{15} = W;
+ let Inst{16-19} = U;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XLForm_S<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<1> S;
+
+ let Pattern = pattern;
+
+ let Inst{6-19} = 0;
+ let Inst{20} = S;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_2_and_DSForm_1<bits<6> opcode1, bits<10> xo1, bit lk,
+ bits<6> opcode2, bits<2> xo2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<2> BH;
+
+ bits<5> RST;
+ bits<19> DS_RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = BH;
+ let Inst{21-30} = xo1;
+ let Inst{31} = lk;
+
+ let Inst{38-42} = RST;
+ let Inst{43-47} = DS_RA{18-14}; // Register #
+ let Inst{48-61} = DS_RA{13-0}; // Displacement.
+ let Inst{62-63} = xo2;
+}
+
+class XLForm_2_ext_and_DSForm_1<bits<6> opcode1, bits<10> xo1,
+ bits<5> bo, bits<5> bi, bit lk,
+ bits<6> opcode2, bits<2> xo2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XLForm_2_and_DSForm_1<opcode1, xo1, lk, opcode2, xo2,
+ OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BI = bi;
+ let BH = 0;
+}
+
+// 1.7.8 XFX-Form
+class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<10> SPR;
+
+ let Inst{6-10} = RT;
+ let Inst{11} = SPR{4};
+ let Inst{12} = SPR{3};
+ let Inst{13} = SPR{2};
+ let Inst{14} = SPR{1};
+ let Inst{15} = SPR{0};
+ let Inst{16} = SPR{9};
+ let Inst{17} = SPR{8};
+ let Inst{18} = SPR{7};
+ let Inst{19} = SPR{6};
+ let Inst{20} = SPR{5};
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+
+ let Inst{6-10} = RT;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_3p<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<10> Entry;
+ let Pattern = pattern;
+
+ let Inst{6-10} = RT;
+ let Inst{11-20} = Entry;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FXM;
+ bits<5> rS;
+
+ let Inst{6-10} = rS;
+ let Inst{11} = 0;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> ST;
+ bits<8> FXM;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 1;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>;
+
+class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+// XFL-Form - MTFSF
+// This is probably 1.7.9, but I don't have the reference that uses this
+// numbering scheme...
+class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag>pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FM;
+ bits<5> rT;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6} = 0;
+ let Inst{7-14} = FM;
+ let Inst{15} = 0;
+ let Inst{16-20} = rT;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XFLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag>pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit L;
+ bits<8> FLM;
+ bit W;
+ bits<5> FRB;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6} = L;
+ let Inst{7-14} = FLM;
+ let Inst{15} = W;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// 1.7.10 XS-Form - SRADI.
+class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RS;
+ bits<6> SH;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = A;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+// 1.7.11 XO-Form
+class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21} = oe;
+ let Inst{22-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XOForm_3<bits<6> opcode, bits<9> xo, bit oe,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XOForm_1<opcode, xo, oe, OOL, IOL, asmstr, itin, pattern> {
+ let RB = 0;
+}
+
+// 1.7.12 A-Form
+class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRC;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-25} = FRC;
+ let Inst{26-30} = xo;
+ let Inst{31} = RC;
+}
+
+class AForm_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRC = 0;
+}
+
+class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+ bits<5> COND;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = COND;
+ let Inst{26-30} = xo;
+ let Inst{31} = 0;
+}
+
+// Used for QPX
+class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRA = 0;
+ let FRC = 0;
+}
+
+// 1.7.13 M-Form
+class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> RB;
+ bits<5> MB;
+ bits<5> ME;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = MB;
+ let Inst{26-30} = ME;
+ let Inst{31} = RC;
+}
+
+class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// 1.7.14 MD-Form
+class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<6> SH;
+ bits<6> MBE;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-26} = MBE{4,3,2,1,0,5};
+ let Inst{27-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+class MDSForm_1<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> RB;
+ bits<6> MBE;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-26} = MBE{4,3,2,1,0,5};
+ let Inst{27-30} = xo;
+ let Inst{31} = RC;
+}
+
+
+// E-1 VA-Form
+
+// VAForm_1 - DACB ordering.
+class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VC;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+// VAForm_1a - DABC ordering.
+class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<5> VC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<4> SH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = 0;
+ let Inst{22-25} = SH;
+ let Inst{26-31} = xo;
+}
+
+// E-2 VX-Form
+class VXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_setzero<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : VXForm_1<xo, OOL, IOL, asmstr, itin, pattern> {
+ let VA = VD;
+ let VB = VD;
+}
+
+
+class VXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> IMM;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = IMM;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr.
+class VXForm_4<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr.
+class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX"
+class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<1> ST;
+ bits<4> SIX;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16} = ST;
+ let Inst{17-20} = SIX;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox"
+class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+// E-4 VXR-Form
+class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bit RC = 0;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = RC;
+ let Inst{22-31} = xo;
+}
+
+// Z23-Form (used by QPX)
+class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRB;
+ bits<2> idx;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-22} = idx;
+ let Inst{23-30} = xo;
+ let Inst{31} = RC;
+}
+
+class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<12> idx;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-22} = idx;
+ let Inst{23-30} = xo;
+ let Inst{31} = RC;
+}
+
+//===----------------------------------------------------------------------===//
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : I<0, OOL, IOL, asmstr, NoItinerary> {
+ let isCodeGenOnly = 1;
+ let PPC64 = 0;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
new file mode 100644
index 0000000..6c4e212
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -0,0 +1,172 @@
+//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hardware Transactional Memory extension to the
+// PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+def HasHTM : Predicate<"PPCSubTarget->hasHTM()">;
+
+def HTM_get_imm : SDNodeXForm<imm, [{
+ return getI32Imm (N->getZExtValue(), SDLoc(N));
+}]>;
+
+let hasSideEffects = 1, usesCustomInserter = 1 in {
+def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+}
+
+
+let Predicates = [HasHTM] in {
+
+def TBEGIN : XForm_htm0 <31, 654,
+ (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+
+def TEND : XForm_htm1 <31, 686,
+ (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+
+def TABORT : XForm_base_r3xo <31, 910,
+ (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+ []>, isDOT {
+ let RST = 0;
+ let B = 0;
+}
+
+def TABORTWC : XForm_base_r3xo <31, 782,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTWCI : XForm_base_r3xo <31, 846,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTDC : XForm_base_r3xo <31, 814,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTDCI : XForm_base_r3xo <31, 878,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TSR : XForm_htm2 <31, 750,
+ (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TCHECK : XForm_htm3 <31, 718,
+ (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>;
+
+
+def TRECLAIM : XForm_base_r3xo <31, 942,
+ (outs crrc:$ret), (ins gprc:$A), "treclaim. $A",
+ IIC_SprMTSPR, []>,
+ isDOT {
+ let RST = 0;
+ let B = 0;
+}
+
+def TRECHKPT : XForm_base_r3xo <31, 1006,
+ (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>,
+ isDOT {
+ let RST = 0;
+ let A = 0;
+ let B = 0;
+}
+
+// Builtins
+
+// All HTM instructions, with the exception of tcheck, set CR0 with the
+// value of the MSR Transaction State (TS) bits that exist before the
+// instruction is executed. For tbegin., the EQ bit in CR0 can be used
+// to determine whether the transaction was successfully started (0) or
+// failed (1). We use an XORI pattern to 'flip' the bit to match the
+// tbegin builtin API which defines a return value of 1 as success.
+
+def : Pat<(int_ppc_tbegin i32:$R),
+ (XORI
+ (EXTRACT_SUBREG (
+ TBEGIN (HTM_get_imm imm:$R)), sub_eq),
+ 1)>;
+
+def : Pat<(int_ppc_tend i32:$R),
+ (TEND (HTM_get_imm imm:$R))>;
+
+
+def : Pat<(int_ppc_tabort i32:$R),
+ (TABORT $R)>;
+
+def : Pat<(int_ppc_tabortwc i32:$TO, i32:$RA, i32:$RB),
+ (TABORTWC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortwci i32:$TO, i32:$RA, i32:$SI),
+ (TABORTWCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tabortdc i32:$TO, i32:$RA, i32:$RB),
+ (TABORTDC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortdci i32:$TO, i32:$RA, i32:$SI),
+ (TABORTDCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tcheck),
+ (TCHECK_RET)>;
+
+def : Pat<(int_ppc_treclaim i32:$RA),
+ (TRECLAIM $RA)>;
+
+def : Pat<(int_ppc_trechkpt),
+ (TRECHKPT)>;
+
+def : Pat<(int_ppc_tsr i32:$L),
+ (TSR (HTM_get_imm imm:$L))>;
+
+def : Pat<(int_ppc_get_texasr),
+ (MFSPR8 130)>;
+
+def : Pat<(int_ppc_get_texasru),
+ (MFSPR8 131)>;
+
+def : Pat<(int_ppc_get_tfhar),
+ (MFSPR8 128)>;
+
+def : Pat<(int_ppc_get_tfiar),
+ (MFSPR8 129)>;
+
+
+def : Pat<(int_ppc_set_texasr i64:$V),
+ (MTSPR8 130, $V)>;
+
+def : Pat<(int_ppc_set_texasru i64:$V),
+ (MTSPR8 131, $V)>;
+
+def : Pat<(int_ppc_set_tfhar i64:$V),
+ (MTSPR8 128, $V)>;
+
+def : Pat<(int_ppc_set_tfiar i64:$V),
+ (MTSPR8 129, $V)>;
+
+
+// Extended mnemonics
+def : Pat<(int_ppc_tendall),
+ (TEND 1)>;
+
+def : Pat<(int_ppc_tresume),
+ (TSR 1)>;
+
+def : Pat<(int_ppc_tsuspend),
+ (TSR 0)>;
+
+def : Pat<(i64 (int_ppc_ttest)),
+ (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>;
+
+} // [HasHTM]
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
new file mode 100644
index 0000000..c17603a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -0,0 +1,1828 @@
+//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCHazardRecognizers.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-instr-info"
+
+#define GET_INSTRMAP_INFO
+#define GET_INSTRINFO_CTOR_DTOR
+#include "PPCGenInstrInfo.inc"
+
+static cl::
+opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
+ cl::desc("Disable analysis for CTR loops"));
+
+static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
+cl::desc("Disable compare instruction optimization"), cl::Hidden);
+
+static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
+cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
+cl::Hidden);
+
+static cl::opt<bool>
+UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
+ cl::desc("Use the old (incorrect) instruction latency calculation"));
+
+// Pin the vtable to this file.
+void PPCInstrInfo::anchor() {}
+
+PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ Subtarget(STI), RI(STI.getTargetMachine()) {}
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *
+PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive =
+ static_cast<const PPCSubtarget *>(STI)->getDarwinDirective();
+ if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
+ Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
+ const InstrItineraryData *II =
+ static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
+ return new ScoreboardHazardRecognizer(II, DAG);
+ }
+
+ return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
+}
+
+/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
+/// to use for this target when scheduling the DAG.
+ScheduleHazardRecognizer *
+PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive =
+ DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+
+ if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
+ return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
+
+ // Most subtargets use a PPC970 recognizer.
+ if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
+ Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
+ assert(DAG->TII && "No InstrInfo?");
+
+ return new PPCHazardRecognizer970(*DAG);
+ }
+
+ return new ScoreboardHazardRecognizer(II, DAG);
+}
+
+unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (!ItinData || UseOldLatencyCalc)
+ return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
+
+ // The default implementation of getInstrLatency calls getStageLatency, but
+ // getStageLatency does not do the right thing for us. While we have
+ // itinerary, most cores are fully pipelined, and so the itineraries only
+ // express the first part of the pipeline, not every stage. Instead, we need
+ // to use the listed output operand cycle number (using operand 0 here, which
+ // is an output).
+
+ unsigned Latency = 1;
+ unsigned DefClass = MI->getDesc().getSchedClass();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
+ continue;
+
+ int Cycle = ItinData->getOperandCycle(DefClass, i);
+ if (Cycle < 0)
+ continue;
+
+ Latency = std::max(Latency, (unsigned) Cycle);
+ }
+
+ return Latency;
+}
+
+int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
+ int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
+ UseMI, UseIdx);
+
+ if (!DefMI->getParent())
+ return Latency;
+
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ unsigned Reg = DefMO.getReg();
+
+ bool IsRegCR;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const MachineRegisterInfo *MRI =
+ &DefMI->getParent()->getParent()->getRegInfo();
+ IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
+ MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
+ } else {
+ IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
+ PPC::CRBITRCRegClass.contains(Reg);
+ }
+
+ if (UseMI->isBranch() && IsRegCR) {
+ if (Latency < 0)
+ Latency = getInstrLatency(ItinData, DefMI);
+
+ // On some cores, there is an additional delay between writing to a condition
+ // register, and using it from a branch.
+ unsigned Directive = Subtarget.getDarwinDirective();
+ switch (Directive) {
+ default: break;
+ case PPC::DIR_7400:
+ case PPC::DIR_750:
+ case PPC::DIR_970:
+ case PPC::DIR_E5500:
+ case PPC::DIR_PWR4:
+ case PPC::DIR_PWR5:
+ case PPC::DIR_PWR5X:
+ case PPC::DIR_PWR6:
+ case PPC::DIR_PWR6X:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
+ Latency += 2;
+ break;
+ }
+ }
+
+ return Latency;
+}
+
+// This function does not list all associative and commutative operations, but
+// only those worth feeding through the machine combiner in an attempt to
+// reduce the critical path. Mostly, this means floating-point operations,
+// because they have high latencies (compared to other operations, such and
+// and/or, which are also associative and commutative, but have low latencies).
+bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+ switch (Inst.getOpcode()) {
+ // FP Add:
+ case PPC::FADD:
+ case PPC::FADDS:
+ // FP Multiply:
+ case PPC::FMUL:
+ case PPC::FMULS:
+ // Altivec Add:
+ case PPC::VADDFP:
+ // VSX Add:
+ case PPC::XSADDDP:
+ case PPC::XVADDDP:
+ case PPC::XVADDSP:
+ case PPC::XSADDSP:
+ // VSX Multiply:
+ case PPC::XSMULDP:
+ case PPC::XVMULDP:
+ case PPC::XVMULSP:
+ case PPC::XSMULSP:
+ // QPX Add:
+ case PPC::QVFADD:
+ case PPC::QVFADDS:
+ case PPC::QVFADDSs:
+ // QPX Multiply:
+ case PPC::QVFMUL:
+ case PPC::QVFMULS:
+ case PPC::QVFMULSs:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool PPCInstrInfo::getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ // Using the machine combiner in this way is potentially expensive, so
+ // restrict to when aggressive optimizations are desired.
+ if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
+ return false;
+
+ // FP reassociation is only legal when we don't need strict IEEE semantics.
+ if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
+ return false;
+
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
+}
+
+// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
+bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: return false;
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64:
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SubIdx = PPC::sub_32;
+ return true;
+ }
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ // Note: This list must be kept consistent with LoadRegFromStackSlot.
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::LD:
+ case PPC::LWZ:
+ case PPC::LFS:
+ case PPC::LFD:
+ case PPC::RESTORE_CR:
+ case PPC::RESTORE_CRBIT:
+ case PPC::LVX:
+ case PPC::LXVD2X:
+ case PPC::QVLFDX:
+ case PPC::QVLFSXs:
+ case PPC::QVLFDXb:
+ case PPC::RESTORE_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ // Note: This list must be kept consistent with StoreRegToStackSlot.
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::STD:
+ case PPC::STW:
+ case PPC::STFS:
+ case PPC::STFD:
+ case PPC::SPILL_CR:
+ case PPC::SPILL_CRBIT:
+ case PPC::STVX:
+ case PPC::STXVD2X:
+ case PPC::QVSTFDX:
+ case PPC::QVSTFSXs:
+ case PPC::QVSTFDXb:
+ case PPC::SPILL_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+
+ // Normal instructions can be commuted the obvious way.
+ if (MI->getOpcode() != PPC::RLWIMI &&
+ MI->getOpcode() != PPC::RLWIMIo)
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+ // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
+ // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
+ // changing the relative order of the mask operands might change what happens
+ // to the high-bits of the mask (and, thus, the result).
+
+ // Cannot commute if it has a non-zero rotate count.
+ if (MI->getOperand(3).getImm() != 0)
+ return nullptr;
+
+ // If we have a zero rotate count, we have:
+ // M = mask(MB,ME)
+ // Op0 = (Op1 & ~M) | (Op2 & M)
+ // Change this to:
+ // M = mask((ME+1)&31, (MB-1)&31)
+ // Op0 = (Op2 & ~M) | (Op1 & M)
+
+ // Swap op1/op2
+ assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
+ "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo.");
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ unsigned SubReg1 = MI->getOperand(1).getSubReg();
+ unsigned SubReg2 = MI->getOperand(2).getSubReg();
+ bool Reg1IsKill = MI->getOperand(1).isKill();
+ bool Reg2IsKill = MI->getOperand(2).isKill();
+ bool ChangeReg0 = false;
+ // If machine instrs are no longer in two-address forms, update
+ // destination register as well.
+ if (Reg0 == Reg1) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ assert(MI->getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
+ Reg2IsKill = false;
+ ChangeReg0 = true;
+ }
+
+ // Masks.
+ unsigned MB = MI->getOperand(4).getImm();
+ unsigned ME = MI->getOperand(5).getImm();
+
+ // We can't commute a trivial mask (there is no way to represent an all-zero
+ // mask).
+ if (MB == 0 && ME == 31)
+ return nullptr;
+
+ if (NewMI) {
+ // Create a new instruction.
+ unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
+ bool Reg0IsDead = MI->getOperand(0).isDead();
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg1IsKill))
+ .addImm((ME+1) & 31)
+ .addImm((MB-1) & 31);
+ }
+
+ if (ChangeReg0) {
+ MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(0).setSubReg(SubReg2);
+ }
+ MI->getOperand(2).setReg(Reg1);
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setSubReg(SubReg1);
+ MI->getOperand(1).setSubReg(SubReg2);
+ MI->getOperand(2).setIsKill(Reg1IsKill);
+ MI->getOperand(1).setIsKill(Reg2IsKill);
+
+ // Swap the mask around.
+ MI->getOperand(4).setImm((ME+1) & 31);
+ MI->getOperand(5).setImm((MB-1) & 31);
+ return MI;
+}
+
+bool PPCInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ // For VSX A-Type FMA instructions, it is the first two operands that can be
+ // commuted, however, because the non-encoded tied input operand is listed
+ // first, the operands to swap are actually the second and third.
+
+ int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
+ if (AltOpc == -1)
+ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+
+ // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
+ // and SrcOpIdx2.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
+}
+
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ // This function is used for scheduling, and the nop wanted here is the type
+ // that terminates dispatch groups on the POWER cores.
+ unsigned Directive = Subtarget.getDarwinDirective();
+ unsigned Opcode;
+ switch (Directive) {
+ default: Opcode = PPC::NOP; break;
+ case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
+ case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
+ case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
+ }
+
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(Opcode));
+}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(PPC::NOP);
+}
+
+// Branch analysis.
+// Note: If the condition register is set to CTR or CTR8 then this is a
+// BDNZ (imm == 1) or BDZ (imm == 0) branch.
+bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ bool isPPC64 = Subtarget.isPPC64();
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return false;
+
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == PPC::B) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCC) {
+ if (!LastInst->getOperand(2).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BC) {
+ if (!LastInst->getOperand(1).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCn) {
+ if (!LastInst->getOperand(1).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BDNZ8 ||
+ LastInst->getOpcode() == PPC::BDNZ) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(1));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BDZ8 ||
+ LastInst->getOpcode() == PPC::BDZ) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ return false;
+ }
+
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with PPC::B and PPC:BCC, handle it.
+ if (SecondLastInst->getOpcode() == PPC::BCC &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(2).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (SecondLastInst->getOpcode() == PPC::BC &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(1).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (SecondLastInst->getOpcode() == PPC::BCn &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(1).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 ||
+ SecondLastInst->getOpcode() == PPC::BDNZ) &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(1));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if ((SecondLastInst->getOpcode() == PPC::BDZ8 ||
+ SecondLastInst->getOpcode() == PPC::BDZ) &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two PPC:Bs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == PPC::B &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return 0;
+
+ if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
+ I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
+ I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
+ I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
+ I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "PPC branch conditions have two components!");
+
+ bool isPPC64 = Subtarget.isPPC64();
+
+ // One-way branch.
+ if (!FBB) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
+ else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ BuildMI(&MBB, DL, get(Cond[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
+ BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
+ BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ BuildMI(&MBB, DL, get(Cond[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
+ BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
+ BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
+ else
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
+ return 2;
+}
+
+// Select analysis.
+bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+ ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+ if (!Subtarget.hasISEL())
+ return false;
+
+ if (Cond.size() != 2)
+ return false;
+
+ // If this is really a bdnz-like condition, then it cannot be turned into a
+ // select.
+ if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ return false;
+
+ // Check register classes.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ if (!RC)
+ return false;
+
+ // isel is for regular integer GPRs only.
+ if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
+ !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
+ !PPC::G8RCRegClass.hasSubClassEq(RC) &&
+ !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
+ return false;
+
+ // FIXME: These numbers are for the A2, how well they work for other cores is
+ // an open question. On the A2, the isel instruction has a 2-cycle latency
+ // but single-cycle throughput. These numbers are used in combination with
+ // the MispredictPenalty setting from the active SchedMachineModel.
+ CondCycles = 1;
+ TrueCycles = 1;
+ FalseCycles = 1;
+
+ return true;
+}
+
+void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc dl,
+ unsigned DestReg, ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg) const {
+ assert(Cond.size() == 2 &&
+ "PPC branch conditions have two components!");
+
+ assert(Subtarget.hasISEL() &&
+ "Cannot insert select on target without ISEL support");
+
+ // Get the register classes.
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ assert(RC && "TrueReg and FalseReg must have overlapping register classes");
+
+ bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
+ assert((Is64Bit ||
+ PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
+ "isel is for regular integer GPRs only");
+
+ unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
+ unsigned SelectPred = Cond[0].getImm();
+
+ unsigned SubIdx;
+ bool SwapOps;
+ switch (SelectPred) {
+ default: llvm_unreachable("invalid predicate for isel");
+ case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
+ case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
+ case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
+ case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
+ case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
+ case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
+ case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
+ case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+ case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
+ case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
+ }
+
+ unsigned FirstReg = SwapOps ? FalseReg : TrueReg,
+ SecondReg = SwapOps ? TrueReg : FalseReg;
+
+ // The first input register of isel cannot be r0. If it is a member
+ // of a register class that can be r0, then copy it first (the
+ // register allocator should eliminate the copy).
+ if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
+ MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
+ const TargetRegisterClass *FirstRC =
+ MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
+ &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
+ unsigned OldFirstReg = FirstReg;
+ FirstReg = MRI.createVirtualRegister(FirstRC);
+ BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
+ .addReg(OldFirstReg);
+ }
+
+ BuildMI(MBB, MI, dl, get(OpCode), DestReg)
+ .addReg(FirstReg).addReg(SecondReg)
+ .addReg(Cond[1].getReg(), 0, SubIdx);
+}
+
+static unsigned getCRBitValue(unsigned CRBit) {
+ unsigned Ret = 4;
+ if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
+ CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
+ CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
+ CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
+ Ret = 3;
+ if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
+ CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
+ CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
+ CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
+ Ret = 2;
+ if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
+ CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
+ CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
+ CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
+ Ret = 1;
+ if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
+ CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
+ CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
+ CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
+ Ret = 0;
+
+ assert(Ret != 4 && "Invalid CR bit register");
+ return Ret;
+}
+
+void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // We can end up with self copies and similar things as a result of VSX copy
+ // legalization. Promote them here.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (PPC::F8RCRegClass.contains(DestReg) &&
+ PPC::VSRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && SrcReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ DestReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(DestReg) &&
+ PPC::VSRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && SrcReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ DestReg = SuperReg;
+ } else if (PPC::F8RCRegClass.contains(SrcReg) &&
+ PPC::VSRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && DestReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ SrcReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(SrcReg) &&
+ PPC::VSRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && DestReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ SrcReg = SuperReg;
+ }
+
+ // Different class register copy
+ if (PPC::CRBITRCRegClass.contains(SrcReg) &&
+ PPC::GPRCRegClass.contains(DestReg)) {
+ unsigned CRReg = getCRFromCRBit(SrcReg);
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+ .addReg(CRReg), getKillRegState(KillSrc);
+ // Rotate the CR bit in the CR fields to be the least significant bit and
+ // then mask with 0x1 (MB = ME = 31).
+ BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
+ .addImm(31)
+ .addImm(31);
+ return;
+ } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+ PPC::G8RCRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg)
+ .addReg(SrcReg), getKillRegState(KillSrc);
+ return;
+ } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+ PPC::GPRCRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+ .addReg(SrcReg), getKillRegState(KillSrc);
+ return;
+ }
+
+ unsigned Opc;
+ if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR;
+ else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR8;
+ else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::FMR;
+ else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::MCRF;
+ else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::VOR;
+ else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
+ // There are two different ways this can be done:
+ // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
+ // issue in VSU pipeline 0.
+ // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
+ // can go to either pipeline.
+ // We'll always use xxlor here, because in practically all cases where
+ // copies are generated, they are close enough to some use that the
+ // lower-latency form is preferable.
+ Opc = PPC::XXLOR;
+ else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
+ PPC::VSSRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::XXLORf;
+ else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMR;
+ else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMRs;
+ else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMRb;
+ else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::CROR;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ const MCInstrDesc &MCID = get(Opc);
+ if (MCID.getNumOperands() == 3)
+ BuildMI(MBB, I, DL, MCID, DestReg)
+ .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+// This function returns true if a CR spill is necessary and false otherwise.
+bool
+PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional store instructions are added here,
+ // update isStoreToStackSlot.
+
+ DebugLoc DL;
+ if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CRBIT))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSSPX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ SpillsVRS = true;
+ } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFSXs))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDXb))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+
+ return false;
+}
+
+void
+PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
+ FuncInfo->setSpillsCR();
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FrameIdx),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+bool
+PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional load instructions are added here,
+ // update isLoadFromStackSlot.
+
+ if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ DestReg), FrameIdx));
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+ FrameIdx));
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
+ FrameIdx));
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
+ FrameIdx));
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg),
+ FrameIdx));
+ return true;
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CRBIT), DestReg),
+ FrameIdx));
+ return true;
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSSPX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_VRSAVE),
+ DestReg),
+ FrameIdx));
+ SpillsVRS = true;
+ } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFSXs), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+
+ return false;
+}
+
+void
+PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
+ FuncInfo->setSpillsCR();
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FrameIdx),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+bool PPCInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
+ if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
+ Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
+ else
+ // Leave the CR# the same, but invert the condition.
+ Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+ return false;
+}
+
+bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const {
+ // For some instructions, it is legal to fold ZERO into the RA register field.
+ // A zero immediate should always be loaded with a single li.
+ unsigned DefOpc = DefMI->getOpcode();
+ if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
+ return false;
+ if (!DefMI->getOperand(1).isImm())
+ return false;
+ if (DefMI->getOperand(1).getImm() != 0)
+ return false;
+
+ // Note that we cannot here invert the arguments of an isel in order to fold
+ // a ZERO into what is presented as the second argument. All we have here
+ // is the condition bit, and that might come from a CR-logical bit operation.
+
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+
+ // Only fold into real machine instructions.
+ if (UseMCID.isPseudo())
+ return false;
+
+ unsigned UseIdx;
+ for (UseIdx = 0; UseIdx < UseMI->getNumOperands(); ++UseIdx)
+ if (UseMI->getOperand(UseIdx).isReg() &&
+ UseMI->getOperand(UseIdx).getReg() == Reg)
+ break;
+
+ assert(UseIdx < UseMI->getNumOperands() && "Cannot find Reg in UseMI");
+ assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
+
+ const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx];
+
+ // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
+ // register (which might also be specified as a pointer class kind).
+ if (UseInfo->isLookupPtrRegClass()) {
+ if (UseInfo->RegClass /* Kind */ != 1)
+ return false;
+ } else {
+ if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
+ UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
+ return false;
+ }
+
+ // Make sure this is not tied to an output register (or otherwise
+ // constrained). This is true for ST?UX registers, for example, which
+ // are tied to their output registers.
+ if (UseInfo->Constraints != 0)
+ return false;
+
+ unsigned ZeroReg;
+ if (UseInfo->isLookupPtrRegClass()) {
+ bool isPPC64 = Subtarget.isPPC64();
+ ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
+ } else {
+ ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
+ PPC::ZERO8 : PPC::ZERO;
+ }
+
+ bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+ UseMI->getOperand(UseIdx).setReg(ZeroReg);
+
+ if (DeleteDef)
+ DefMI->eraseFromParent();
+
+ return true;
+}
+
+static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I)
+ if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8))
+ return true;
+ return false;
+}
+
+// We should make sure that, if we're going to predicate both sides of a
+// condition (a diamond), that both sides don't define the counter register. We
+// can predicate counter-decrement-based branches, but while that predicates
+// the branching, it does not predicate the counter decrement. If we tried to
+// merge the triangle into one predicated block, we'd decrement the counter
+// twice.
+bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ BranchProbability Probability) const {
+ return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
+}
+
+
+bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const {
+ // The predicated branches are identified by their type, not really by the
+ // explicit presence of a predicate. Furthermore, some of them can be
+ // predicated more than once. Because if conversion won't try to predicate
+ // any instruction which already claims to be predicated (by returning true
+ // here), always return false. In doing so, we let isPredicable() be the
+ // final word on whether not the instruction can be (further) predicated.
+
+ return false;
+}
+
+bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (!MI->isTerminator())
+ return false;
+
+ // Conditional branch is a special case.
+ if (MI->isBranch() && !MI->isBarrier())
+ return true;
+
+ return !isPredicated(MI);
+}
+
+bool PPCInstrInfo::PredicateInstruction(MachineInstr *MI,
+ ArrayRef<MachineOperand> Pred) const {
+ unsigned OpC = MI->getOpcode();
+ if (OpC == PPC::BLR || OpC == PPC::BLR8) {
+ if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+ bool isPPC64 = Subtarget.isPPC64();
+ MI->setDesc(get(Pred[0].getImm() ?
+ (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
+ (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
+ MI->setDesc(get(PPC::BCLR));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MI->setDesc(get(PPC::BCLRn));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ } else {
+ MI->setDesc(get(PPC::BCCLR));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
+ }
+
+ return true;
+ } else if (OpC == PPC::B) {
+ if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+ bool isPPC64 = Subtarget.isPPC64();
+ MI->setDesc(get(Pred[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BC));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BCn));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
+ } else {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BCC));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
+ }
+
+ return true;
+ } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 ||
+ OpC == PPC::BCTRL || OpC == PPC::BCTRL8) {
+ if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
+ llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
+
+ bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
+ bool isPPC64 = Subtarget.isPPC64();
+
+ if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
+ (setLR ? PPC::BCCTRL : PPC::BCCTR)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ return true;
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) :
+ (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ return true;
+ }
+
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) :
+ (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+ ArrayRef<MachineOperand> Pred2) const {
+ assert(Pred1.size() == 2 && "Invalid PPC first predicate");
+ assert(Pred2.size() == 2 && "Invalid PPC second predicate");
+
+ if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
+ return false;
+ if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
+ return false;
+
+ // P1 can only subsume P2 if they test the same condition register.
+ if (Pred1[1].getReg() != Pred2[1].getReg())
+ return false;
+
+ PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
+ PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
+
+ if (P1 == P2)
+ return true;
+
+ // Does P1 subsume P2, e.g. GE subsumes GT.
+ if (P1 == PPC::PRED_LE &&
+ (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
+ return true;
+ if (P1 == PPC::PRED_GE &&
+ (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
+ return true;
+
+ return false;
+}
+
+bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // Note: At the present time, the contents of Pred from this function is
+ // unused by IfConversion. This implementation follows ARM by pushing the
+ // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
+ // predicate, instructions defining CTR or CTR8 are also included as
+ // predicate-defining instructions.
+
+ const TargetRegisterClass *RCs[] =
+ { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
+ &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
+
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) {
+ const TargetRegisterClass *RC = RCs[c];
+ if (MO.isReg()) {
+ if (MO.isDef() && RC->contains(MO.getReg())) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ } else if (MO.isRegMask()) {
+ for (TargetRegisterClass::iterator I = RC->begin(),
+ IE = RC->end(); I != IE; ++I)
+ if (MO.clobbersPhysReg(*I)) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+ }
+ }
+
+ return Found;
+}
+
+bool PPCInstrInfo::isPredicable(MachineInstr *MI) const {
+ unsigned OpC = MI->getOpcode();
+ switch (OpC) {
+ default:
+ return false;
+ case PPC::B:
+ case PPC::BLR:
+ case PPC::BLR8:
+ case PPC::BCTR:
+ case PPC::BCTR8:
+ case PPC::BCTRL:
+ case PPC::BCTRL8:
+ return true;
+ }
+}
+
+bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const {
+ unsigned Opc = MI->getOpcode();
+
+ switch (Opc) {
+ default: return false;
+ case PPC::CMPWI:
+ case PPC::CMPLWI:
+ case PPC::CMPDI:
+ case PPC::CMPLDI:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ Value = MI->getOperand(2).getImm();
+ Mask = 0xFFFF;
+ return true;
+ case PPC::CMPW:
+ case PPC::CMPLW:
+ case PPC::CMPD:
+ case PPC::CMPLD:
+ case PPC::FCMPUS:
+ case PPC::FCMPUD:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = MI->getOperand(2).getReg();
+ return true;
+ }
+}
+
+bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const {
+ if (DisableCmpOpt)
+ return false;
+
+ int OpC = CmpInstr->getOpcode();
+ unsigned CRReg = CmpInstr->getOperand(0).getReg();
+
+ // FP record forms set CR1 based on the execption status bits, not a
+ // comparison with zero.
+ if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
+ return false;
+
+ // The record forms set the condition register based on a signed comparison
+ // with zero (so says the ISA manual). This is not as straightforward as it
+ // seems, however, because this is always a 64-bit comparison on PPC64, even
+ // for instructions that are 32-bit in nature (like slw for example).
+ // So, on PPC32, for unsigned comparisons, we can use the record forms only
+ // for equality checks (as those don't depend on the sign). On PPC64,
+ // we are restricted to equality for unsigned 64-bit comparisons and for
+ // signed 32-bit comparisons the applicability is more restricted.
+ bool isPPC64 = Subtarget.isPPC64();
+ bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
+ bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
+ bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
+
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
+ int MIOpC = MI->getOpcode();
+
+ bool equalityOnly = false;
+ bool noSub = false;
+ if (isPPC64) {
+ if (is32BitSignedCompare) {
+ // We can perform this optimization only if MI is sign-extending.
+ if (MIOpC == PPC::SRAW || MIOpC == PPC::SRAWo ||
+ MIOpC == PPC::SRAWI || MIOpC == PPC::SRAWIo ||
+ MIOpC == PPC::EXTSB || MIOpC == PPC::EXTSBo ||
+ MIOpC == PPC::EXTSH || MIOpC == PPC::EXTSHo ||
+ MIOpC == PPC::EXTSW || MIOpC == PPC::EXTSWo) {
+ noSub = true;
+ } else
+ return false;
+ } else if (is32BitUnsignedCompare) {
+ // We can perform this optimization, equality only, if MI is
+ // zero-extending.
+ if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo ||
+ MIOpC == PPC::SLW || MIOpC == PPC::SLWo ||
+ MIOpC == PPC::SRW || MIOpC == PPC::SRWo) {
+ noSub = true;
+ equalityOnly = true;
+ } else
+ return false;
+ } else
+ equalityOnly = is64BitUnsignedCompare;
+ } else
+ equalityOnly = is32BitUnsignedCompare;
+
+ if (equalityOnly) {
+ // We need to check the uses of the condition register in order to reject
+ // non-equality comparisons.
+ for (MachineRegisterInfo::use_instr_iterator I =MRI->use_instr_begin(CRReg),
+ IE = MRI->use_instr_end(); I != IE; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->getOpcode() == PPC::BCC) {
+ unsigned Pred = UseMI->getOperand(0).getImm();
+ if (Pred != PPC::PRED_EQ && Pred != PPC::PRED_NE)
+ return false;
+ } else if (UseMI->getOpcode() == PPC::ISEL ||
+ UseMI->getOpcode() == PPC::ISEL8) {
+ unsigned SubIdx = UseMI->getOperand(3).getSubReg();
+ if (SubIdx != PPC::sub_eq)
+ return false;
+ } else
+ return false;
+ }
+ }
+
+ MachineBasicBlock::iterator I = CmpInstr;
+
+ // Scan forward to find the first use of the compare.
+ for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end();
+ I != EL; ++I) {
+ bool FoundUse = false;
+ for (MachineRegisterInfo::use_instr_iterator J =MRI->use_instr_begin(CRReg),
+ JE = MRI->use_instr_end(); J != JE; ++J)
+ if (&*J == &*I) {
+ FoundUse = true;
+ break;
+ }
+
+ if (FoundUse)
+ break;
+ }
+
+ // There are two possible candidates which can be changed to set CR[01].
+ // One is MI, the other is a SUB instruction.
+ // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+ MachineInstr *Sub = nullptr;
+ if (SrcReg2 != 0)
+ // MI is not a candidate for CMPrr.
+ MI = nullptr;
+ // FIXME: Conservatively refuse to convert an instruction which isn't in the
+ // same BB as the comparison. This is to allow the check below to avoid calls
+ // (and other explicit clobbers); instead we should really check for these
+ // more explicitly (in at least a few predecessors).
+ else if (MI->getParent() != CmpInstr->getParent() || Value != 0) {
+ // PPC does not have a record-form SUBri.
+ return false;
+ }
+
+ // Search for Sub.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ --I;
+
+ // Get ready to iterate backward from CmpInstr.
+ MachineBasicBlock::iterator E = MI,
+ B = CmpInstr->getParent()->begin();
+
+ for (; I != E && !noSub; --I) {
+ const MachineInstr &Instr = *I;
+ unsigned IOpC = Instr.getOpcode();
+
+ if (&*I != CmpInstr && (
+ Instr.modifiesRegister(PPC::CR0, TRI) ||
+ Instr.readsRegister(PPC::CR0, TRI)))
+ // This instruction modifies or uses the record condition register after
+ // the one we want to change. While we could do this transformation, it
+ // would likely not be profitable. This transformation removes one
+ // instruction, and so even forcing RA to generate one move probably
+ // makes it unprofitable.
+ return false;
+
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
+ OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
+ (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
+ ((Instr.getOperand(1).getReg() == SrcReg &&
+ Instr.getOperand(2).getReg() == SrcReg2) ||
+ (Instr.getOperand(1).getReg() == SrcReg2 &&
+ Instr.getOperand(2).getReg() == SrcReg))) {
+ Sub = &*I;
+ break;
+ }
+
+ if (I == B)
+ // The 'and' is below the comparison instruction.
+ return false;
+ }
+
+ // Return false if no candidates exist.
+ if (!MI && !Sub)
+ return false;
+
+ // The single candidate is called MI.
+ if (!MI) MI = Sub;
+
+ int NewOpC = -1;
+ MIOpC = MI->getOpcode();
+ if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8)
+ NewOpC = MIOpC;
+ else {
+ NewOpC = PPC::getRecordFormOpcode(MIOpC);
+ if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
+ NewOpC = MIOpC;
+ }
+
+ // FIXME: On the non-embedded POWER architectures, only some of the record
+ // forms are fast, and we should use only the fast ones.
+
+ // The defining instruction has a record form (or is already a record
+ // form). It is possible, however, that we'll need to reverse the condition
+ // code of the users.
+ if (NewOpC == -1)
+ return false;
+
+ SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
+ SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
+
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
+ // needs to be updated to be based on SUB. Push the condition code
+ // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
+ // condition code of these operands will be modified.
+ bool ShouldSwap = false;
+ if (Sub) {
+ ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg;
+
+ // The operands to subf are the opposite of sub, so only in the fixed-point
+ // case, invert the order.
+ ShouldSwap = !ShouldSwap;
+ }
+
+ if (ShouldSwap)
+ for (MachineRegisterInfo::use_instr_iterator
+ I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
+ I != IE; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->getOpcode() == PPC::BCC) {
+ PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
+ assert((!equalityOnly ||
+ Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) &&
+ "Invalid predicate for equality-only optimization");
+ PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
+ PPC::getSwappedPredicate(Pred)));
+ } else if (UseMI->getOpcode() == PPC::ISEL ||
+ UseMI->getOpcode() == PPC::ISEL8) {
+ unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
+ assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
+ "Invalid CR bit for equality-only optimization");
+
+ if (NewSubReg == PPC::sub_lt)
+ NewSubReg = PPC::sub_gt;
+ else if (NewSubReg == PPC::sub_gt)
+ NewSubReg = PPC::sub_lt;
+
+ SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
+ NewSubReg));
+ } else // We need to abort on a user we don't understand.
+ return false;
+ }
+
+ // Create a new virtual register to hold the value of the CR set by the
+ // record-form instruction. If the instruction was not previously in
+ // record form, then set the kill flag on the CR.
+ CmpInstr->eraseFromParent();
+
+ MachineBasicBlock::iterator MII = MI;
+ BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
+ get(TargetOpcode::COPY), CRReg)
+ .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
+
+ if (MIOpC != NewOpC) {
+ // We need to be careful here: we're replacing one instruction with
+ // another, and we need to make sure that we get all of the right
+ // implicit uses and defs. On the other hand, the caller may be holding
+ // an iterator to this instruction, and so we can't delete it (this is
+ // specifically the case if this is the instruction directly after the
+ // compare).
+
+ const MCInstrDesc &NewDesc = get(NewOpC);
+ MI->setDesc(NewDesc);
+
+ if (NewDesc.ImplicitDefs)
+ for (const MCPhysReg *ImpDefs = NewDesc.getImplicitDefs();
+ *ImpDefs; ++ImpDefs)
+ if (!MI->definesRegister(*ImpDefs))
+ MI->addOperand(*MI->getParent()->getParent(),
+ MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (NewDesc.ImplicitUses)
+ for (const MCPhysReg *ImpUses = NewDesc.getImplicitUses();
+ *ImpUses; ++ImpUses)
+ if (!MI->readsRegister(*ImpUses))
+ MI->addOperand(*MI->getParent()->getParent(),
+ MachineOperand::CreateReg(*ImpUses, false, true));
+ }
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
+ PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
+
+ for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
+ SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
+
+ return true;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+///
+unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ unsigned Opcode = MI->getOpcode();
+
+ if (Opcode == PPC::INLINEASM) {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ } else if (Opcode == TargetOpcode::STACKMAP) {
+ return MI->getOperand(1).getImm();
+ } else if (Opcode == TargetOpcode::PATCHPOINT) {
+ PatchPointOpers Opers(MI);
+ return Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+ } else {
+ const MCInstrDesc &Desc = get(Opcode);
+ return Desc.getSize();
+ }
+}
+
+std::pair<unsigned, unsigned>
+PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ const unsigned Mask = PPCII::MO_ACCESS_MASK;
+ return std::make_pair(TF & Mask, TF & ~Mask);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ using namespace PPCII;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_LO, "ppc-lo"},
+ {MO_HA, "ppc-ha"},
+ {MO_TPREL_LO, "ppc-tprel-lo"},
+ {MO_TPREL_HA, "ppc-tprel-ha"},
+ {MO_DTPREL_LO, "ppc-dtprel-lo"},
+ {MO_TLSLD_LO, "ppc-tlsld-lo"},
+ {MO_TOC_LO, "ppc-toc-lo"},
+ {MO_TLS, "ppc-tls"}};
+ return makeArrayRef(TargetFlags);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+ using namespace PPCII;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_PLT_OR_STUB, "ppc-plt-or-stub"},
+ {MO_PIC_FLAG, "ppc-pic"},
+ {MO_NLP_FLAG, "ppc-nlp"},
+ {MO_NLP_HIDDEN_FLAG, "ppc-nlp-hidden"}};
+ return makeArrayRef(TargetFlags);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
new file mode 100644
index 0000000..c3c3a48
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -0,0 +1,279 @@
+//===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
+
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "PPCGenInstrInfo.inc"
+
+namespace llvm {
+
+/// PPCII - This namespace holds all of the PowerPC target-specific
+/// per-instruction flags. These must match the corresponding definitions in
+/// PPC.td and PPCInstrFormats.td.
+namespace PPCII {
+enum {
+ // PPC970 Instruction Flags. These flags describe the characteristics of the
+ // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
+ // raw machine instructions.
+
+ /// PPC970_First - This instruction starts a new dispatch group, so it will
+ /// always be the first one in the group.
+ PPC970_First = 0x1,
+
+ /// PPC970_Single - This instruction starts a new dispatch group and
+ /// terminates it, so it will be the sole instruction in the group.
+ PPC970_Single = 0x2,
+
+ /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
+ /// two dispatch pipes to be available to issue.
+ PPC970_Cracked = 0x4,
+
+ /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
+ /// an instruction is issued to.
+ PPC970_Shift = 3,
+ PPC970_Mask = 0x07 << PPC970_Shift
+};
+enum PPC970_Unit {
+ /// These are the various PPC970 execution unit pipelines. Each instruction
+ /// is one of these.
+ PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction
+ PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit
+ PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit
+ PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit
+ PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit
+ PPC970_VALU = 5 << PPC970_Shift, // Vector ALU
+ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
+ PPC970_BRU = 7 << PPC970_Shift // Branch Unit
+};
+} // end namespace PPCII
+
+class PPCSubtarget;
+class PPCInstrInfo : public PPCGenInstrInfo {
+ PPCSubtarget &Subtarget;
+ const PPCRegisterInfo RI;
+
+ bool StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
+ bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
+ virtual void anchor();
+
+protected:
+ /// Commutes the operands in the given instruction.
+ /// The commutable operands are specified by their indices OpIdx1 and OpIdx2.
+ ///
+ /// Do not call this method for a non-commutable instruction or for
+ /// non-commutable pair of operand indices OpIdx1 and OpIdx2.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ ///
+ /// For example, we can commute rlwimi instructions, but only if the
+ /// rotate amt is zero. We also have to munge the immediates a bit.
+ MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const override;
+
+public:
+ explicit PPCInstrInfo(PPCSubtarget &STI);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const override;
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const override;
+
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = nullptr) const override;
+
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const override;
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const override {
+ return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx,
+ UseNode, UseIdx);
+ }
+
+ bool hasLowDefLatency(const TargetSchedModel &SchedModel,
+ const MachineInstr *DefMI,
+ unsigned DefIdx) const override {
+ // Machine LICM should hoist all instructions in low-register-pressure
+ // situations; none are sufficiently free to justify leaving in a loop
+ // body.
+ return false;
+ }
+
+ bool useMachineCombiner() const override {
+ return true;
+ }
+
+ /// Return true when there is potentially a faster code sequence
+ /// for an instruction chain ending in <Root>. All potential patterns are
+ /// output in the <Pattern> array.
+ bool getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &P) const override;
+
+ bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+
+ bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const override;
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+
+ bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const override;
+
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
+
+
+ // Branch analysis.
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const override;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+ DebugLoc DL) const override;
+
+ // Select analysis.
+ bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
+ unsigned, unsigned, int &, int &, int &) const override;
+ void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc DL, unsigned DstReg, ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg) const override;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const override;
+
+ // If conversion by predication (only supported by some branch instructions).
+ // All of the profitability checks always return true; it is always
+ // profitable to use the predicated branches.
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ BranchProbability Probability) const override {
+ return true;
+ }
+
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ BranchProbability Probability) const override;
+
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ BranchProbability Probability) const override {
+ return true;
+ }
+
+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const override {
+ return false;
+ }
+
+ // Predication support.
+ bool isPredicated(const MachineInstr *MI) const override;
+
+ bool isUnpredicatedTerminator(const MachineInstr *MI) const override;
+
+ bool PredicateInstruction(MachineInstr *MI,
+ ArrayRef<MachineOperand> Pred) const override;
+
+ bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+ ArrayRef<MachineOperand> Pred2) const override;
+
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const override;
+
+ bool isPredicable(MachineInstr *MI) const override;
+
+ // Comparison optimization.
+
+
+ bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const override;
+
+ bool optimizeCompareInstr(MachineInstr *CmpInstr,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const override;
+
+ /// GetInstSize - Return the number of bytes of code the specified
+ /// instruction may be. This returns the maximum number of bytes.
+ ///
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ void getNoopForMachoTarget(MCInst &NopInst) const override;
+
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableBitmaskMachineOperandTargetFlags() const override;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
new file mode 100644
index 0000000..6c4364a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -0,0 +1,4127 @@
+//===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subset of the 32-bit PowerPC instruction set, as used
+// by the PowerPC instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+include "PPCInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific type constraints.
+//
+def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+def SDT_PPCvperm : SDTypeProfile<1, 3, [
+ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
+def SDT_PPCvcmp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
+]>;
+
+def SDT_PPCcondbr : SDTypeProfile<0, 3, [
+ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClbrx : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+def SDT_PPCstbrx : SDTypeProfile<0, 3, [
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
+ SDTCisPtrTy<0>, SDTCisVT<1, i32>
+]>;
+
+def tocentry32 : Operand<iPTR> {
+ let MIOperandInfo = (ops i32imm:$imm);
+}
+
+def SDT_PPCqvfperm : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<3>
+]>;
+def SDT_PPCqvgpci : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisInt<1>
+]>;
+def SDT_PPCqvaligni : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>
+]>;
+def SDT_PPCqvesplati : SDTypeProfile<1, 2, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
+]>;
+
+def SDT_PPCqbflt : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisVec<1>
+]>;
+
+def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific DAG Nodes.
+//
+
+def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>;
+def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
+
+def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;
+def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;
+def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>;
+def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>;
+def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
+def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
+def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
+def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
+ [SDNPHasChain, SDNPMayStore]>;
+def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+
+// Extract FPSCR (not modeled at the DAG level).
+def PPCmffs : SDNode<"PPCISD::MFFS",
+ SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
+
+// Perform FADD in round-to-zero mode.
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
+
+
+def PPCfsel : SDNode<"PPCISD::FSEL",
+ // Type constraint for fsel.
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
+
+def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
+def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
+ [SDNPMayLoad, SDNPMemOperand]>;
+def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
+def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+
+def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>;
+
+def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>;
+def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
+ [SDNPMayLoad]>;
+def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
+def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
+def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
+def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
+ SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
+def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
+def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
+def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
+def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR",
+ SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
+def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
+def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
+
+def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+
+def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
+def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
+def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
+def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
+
+def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
+
+def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
+ [SDNPHasChain, SDNPMayLoad]>;
+
+def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
+
+// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
+// amounts. These nodes are generated by the multi-precision shift code.
+def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
+def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
+def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC",
+ SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def PPCclrbhrb : SDNode<"PPCISD::CLRBHRB", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PPCmfbhrbe : SDNode<"PPCISD::MFBHRBE", SDTIntBinOp, [SDNPHasChain]>;
+def PPCrfebb : SDNode<"PPCISD::RFEBB", SDT_PPCsc,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
+def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
+
+def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to set/unset CR bit 6 for SVR4 vararg calls
+def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+// Instructions to support dynamic alloca.
+def SDTDynOp : SDTypeProfile<1, 2, []>;
+def SDTDynAreaOp : SDTypeProfile<1, 1, []>;
+def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+def PPCdynareaoffset : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific transformation functions and pattern fragments.
+//
+
+def SHL32 : SDNodeXForm<imm, [{
+ // Transformation function: 31 - imm
+ return getI32Imm(31 - N->getZExtValue(), SDLoc(N));
+}]>;
+
+def SRL32 : SDNodeXForm<imm, [{
+ // Transformation function: 32 - imm
+ return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue(), SDLoc(N))
+ : getI32Imm(0, SDLoc(N));
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+ // Transformation function: get the low 16 bits.
+ return getI32Imm((unsigned short)N->getZExtValue(), SDLoc(N));
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned)N->getZExtValue() >> 16, SDLoc(N));
+}]>;
+
+def HA16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ signed int Val = N->getZExtValue();
+ return getI32Imm((Val - (signed short)Val) >> 16, SDLoc(N));
+}]>;
+def MB : SDNodeXForm<imm, [{
+ // Transformation function: get the start bit of a mask
+ unsigned mb = 0, me;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(mb, SDLoc(N));
+}]>;
+
+def ME : SDNodeXForm<imm, [{
+ // Transformation function: get the end bit of a mask
+ unsigned mb, me = 0;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(me, SDLoc(N));
+}]>;
+def maskimm32 : PatLeaf<(imm), [{
+ // maskImm predicate - True if immediate is a run of ones.
+ unsigned mb, me;
+ if (N->getValueType(0) == MVT::i32)
+ return isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ else
+ return false;
+}]>;
+
+def imm32SExt16 : Operand<i32>, ImmLeaf<i32, [{
+ // imm32SExt16 predicate - True if the i32 immediate fits in a 16-bit
+ // sign extended field. Used by instructions like 'addi'.
+ return (int32_t)Imm == (short)Imm;
+}]>;
+def imm64SExt16 : Operand<i64>, ImmLeaf<i64, [{
+ // imm64SExt16 predicate - True if the i64 immediate fits in a 16-bit
+ // sign extended field. Used by instructions like 'addi'.
+ return (int64_t)Imm == (short)Imm;
+}]>;
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field. Used by instructions like 'ori'.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// imm16Shifted* - These match immediates where the low 16-bits are zero. There
+// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
+// identical in 32-bit mode, but in 64-bit mode, they return true if the
+// immediate fits into a sign/zero extended 32-bit immediate (with the low bits
+// clear).
+def imm16ShiftedZExt : PatLeaf<(imm), [{
+ // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'xoris'.
+ return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0;
+}], HI16>;
+
+def imm16ShiftedSExt : PatLeaf<(imm), [{
+ // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'addis'. Identical to
+ // imm16ShiftedZExt in 32-bit mode.
+ if (N->getZExtValue() & 0xFFFF) return false;
+ if (N->getValueType(0) == MVT::i32)
+ return true;
+ // For 64-bit, make sure it is sext right.
+ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
+}], HI16>;
+
+def imm64ZExt32 : Operand<i64>, ImmLeaf<i64, [{
+ // imm64ZExt32 predicate - True if the i64 immediate fits in a 32-bit
+ // zero extended field.
+ return isUInt<32>(Imm);
+}]>;
+
+// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// restricted memrix (4-aligned) constants are alignment sensitive. If these
+// offsets are hidden behind TOC entries than the values of the lower-order
+// bits cannot be checked directly. As a result, we need to also incorporate
+// an alignment check into the relevant patterns.
+
+def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4pre_store : PatFrag<
+ (ops node:$val, node:$base, node:$offset),
+ (pre_store node:$val, node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Flag Definitions.
+
+class isPPC64 { bit PPC64 = 1; }
+class isDOT { bit RC = 1; }
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Operand Definitions.
+
+// In the default PowerPC assembler syntax, registers are specified simply
+// by number, so they cannot be distinguished from immediate values (without
+// looking at the opcode). This means that the default operand matching logic
+// for the asm parser does not work, and we need to specify custom matchers.
+// Since those can only be specified with RegisterOperand classes and not
+// directly on the RegisterClass, all instructions patterns used by the asm
+// parser need to use a RegisterOperand (instead of a RegisterClass) for
+// all their register operands.
+// For this purpose, we define one RegisterOperand for each RegisterClass,
+// using the same name as the class, just in lower case.
+
+def PPCRegGPRCAsmOperand : AsmOperandClass {
+ let Name = "RegGPRC"; let PredicateMethod = "isRegNumber";
+}
+def gprc : RegisterOperand<GPRC> {
+ let ParserMatchClass = PPCRegGPRCAsmOperand;
+}
+def PPCRegG8RCAsmOperand : AsmOperandClass {
+ let Name = "RegG8RC"; let PredicateMethod = "isRegNumber";
+}
+def g8rc : RegisterOperand<G8RC> {
+ let ParserMatchClass = PPCRegG8RCAsmOperand;
+}
+def PPCRegGPRCNoR0AsmOperand : AsmOperandClass {
+ let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def gprc_nor0 : RegisterOperand<GPRC_NOR0> {
+ let ParserMatchClass = PPCRegGPRCNoR0AsmOperand;
+}
+def PPCRegG8RCNoX0AsmOperand : AsmOperandClass {
+ let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber";
+}
+def g8rc_nox0 : RegisterOperand<G8RC_NOX0> {
+ let ParserMatchClass = PPCRegG8RCNoX0AsmOperand;
+}
+def PPCRegF8RCAsmOperand : AsmOperandClass {
+ let Name = "RegF8RC"; let PredicateMethod = "isRegNumber";
+}
+def f8rc : RegisterOperand<F8RC> {
+ let ParserMatchClass = PPCRegF8RCAsmOperand;
+}
+def PPCRegF4RCAsmOperand : AsmOperandClass {
+ let Name = "RegF4RC"; let PredicateMethod = "isRegNumber";
+}
+def f4rc : RegisterOperand<F4RC> {
+ let ParserMatchClass = PPCRegF4RCAsmOperand;
+}
+def PPCRegVRRCAsmOperand : AsmOperandClass {
+ let Name = "RegVRRC"; let PredicateMethod = "isRegNumber";
+}
+def vrrc : RegisterOperand<VRRC> {
+ let ParserMatchClass = PPCRegVRRCAsmOperand;
+}
+def PPCRegCRBITRCAsmOperand : AsmOperandClass {
+ let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber";
+}
+def crbitrc : RegisterOperand<CRBITRC> {
+ let ParserMatchClass = PPCRegCRBITRCAsmOperand;
+}
+def PPCRegCRRCAsmOperand : AsmOperandClass {
+ let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber";
+}
+def crrc : RegisterOperand<CRRC> {
+ let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+def crrc0 : RegisterOperand<CRRC0> {
+ let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCU1ImmAsmOperand : AsmOperandClass {
+ let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u1imm : Operand<i32> {
+ let PrintMethod = "printU1ImmOperand";
+ let ParserMatchClass = PPCU1ImmAsmOperand;
+}
+
+def PPCU2ImmAsmOperand : AsmOperandClass {
+ let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u2imm : Operand<i32> {
+ let PrintMethod = "printU2ImmOperand";
+ let ParserMatchClass = PPCU2ImmAsmOperand;
+}
+
+def PPCU3ImmAsmOperand : AsmOperandClass {
+ let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u3imm : Operand<i32> {
+ let PrintMethod = "printU3ImmOperand";
+ let ParserMatchClass = PPCU3ImmAsmOperand;
+}
+
+def PPCU4ImmAsmOperand : AsmOperandClass {
+ let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u4imm : Operand<i32> {
+ let PrintMethod = "printU4ImmOperand";
+ let ParserMatchClass = PPCU4ImmAsmOperand;
+}
+def PPCS5ImmAsmOperand : AsmOperandClass {
+ let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
+ let RenderMethod = "addImmOperands";
+}
+def s5imm : Operand<i32> {
+ let PrintMethod = "printS5ImmOperand";
+ let ParserMatchClass = PPCS5ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<5>";
+}
+def PPCU5ImmAsmOperand : AsmOperandClass {
+ let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u5imm : Operand<i32> {
+ let PrintMethod = "printU5ImmOperand";
+ let ParserMatchClass = PPCU5ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<5>";
+}
+def PPCU6ImmAsmOperand : AsmOperandClass {
+ let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u6imm : Operand<i32> {
+ let PrintMethod = "printU6ImmOperand";
+ let ParserMatchClass = PPCU6ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<6>";
+}
+def PPCU10ImmAsmOperand : AsmOperandClass {
+ let Name = "U10Imm"; let PredicateMethod = "isU10Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u10imm : Operand<i32> {
+ let PrintMethod = "printU10ImmOperand";
+ let ParserMatchClass = PPCU10ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<10>";
+}
+def PPCU12ImmAsmOperand : AsmOperandClass {
+ let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u12imm : Operand<i32> {
+ let PrintMethod = "printU12ImmOperand";
+ let ParserMatchClass = PPCU12ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<12>";
+}
+def PPCS16ImmAsmOperand : AsmOperandClass {
+ let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
+ let RenderMethod = "addS16ImmOperands";
+}
+def s16imm : Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+}
+def PPCU16ImmAsmOperand : AsmOperandClass {
+ let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
+ let RenderMethod = "addU16ImmOperands";
+}
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCU16ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<16>";
+}
+def PPCS17ImmAsmOperand : AsmOperandClass {
+ let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
+ let RenderMethod = "addS16ImmOperands";
+}
+def s17imm : Operand<i32> {
+ // This operand type is used for addis/lis to allow the assembler parser
+ // to accept immediates in the range -65536..65535 for compatibility with
+ // the GNU assembler. The operand is treated as 16-bit otherwise.
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS17ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+}
+def PPCDirectBrAsmOperand : AsmOperandClass {
+ let Name = "DirectBr"; let PredicateMethod = "isDirectBr";
+ let RenderMethod = "addBranchTargetOperands";
+}
+def directbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def absdirectbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def PPCCondBrAsmOperand : AsmOperandClass {
+ let Name = "CondBr"; let PredicateMethod = "isCondBr";
+ let RenderMethod = "addBranchTargetOperands";
+}
+def condbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getCondBrEncoding";
+ let ParserMatchClass = PPCCondBrAsmOperand;
+}
+def abscondbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsCondBrEncoding";
+ let ParserMatchClass = PPCCondBrAsmOperand;
+}
+def calltarget : Operand<iPTR> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def abscalltarget : Operand<iPTR> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def PPCCRBitMaskOperand : AsmOperandClass {
+ let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask";
+}
+def crbitm: Operand<i8> {
+ let PrintMethod = "printcrbitm";
+ let EncoderMethod = "get_crbitm_encoding";
+ let DecoderMethod = "decodeCRBitMOperand";
+ let ParserMatchClass = PPCCRBitMaskOperand;
+}
+// Address operands
+// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
+def PPCRegGxRCNoR0Operand : AsmOperandClass {
+ let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> {
+ let ParserMatchClass = PPCRegGxRCNoR0Operand;
+}
+// A version of ptr_rc usable with the asm parser.
+def PPCRegGxRCOperand : AsmOperandClass {
+ let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
+ let ParserMatchClass = PPCRegGxRCOperand;
+}
+
+def PPCDispRIOperand : AsmOperandClass {
+ let Name = "DispRI"; let PredicateMethod = "isS16Imm";
+ let RenderMethod = "addS16ImmOperands";
+}
+def dispRI : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRIOperand;
+}
+def PPCDispRIXOperand : AsmOperandClass {
+ let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4";
+ let RenderMethod = "addImmOperands";
+}
+def dispRIX : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRIXOperand;
+}
+def PPCDispSPE8Operand : AsmOperandClass {
+ let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE8 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE8Operand;
+}
+def PPCDispSPE4Operand : AsmOperandClass {
+ let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE4 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE4Operand;
+}
+def PPCDispSPE2Operand : AsmOperandClass {
+ let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE2 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE2Operand;
+}
+
+def memri : Operand<iPTR> {
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRIEncoding";
+ let DecoderMethod = "decodeMemRIOperands";
+}
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
+}
+def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRIXEncoding";
+ let DecoderMethod = "decodeMemRIXOperands";
+}
+def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE8DisEncoding";
+}
+def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE4DisEncoding";
+}
+def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE2DisEncoding";
+}
+
+// A single-register address. This is used with the SjLj
+// pseudo-instructions.
+def memr : Operand<iPTR> {
+ let MIOperandInfo = (ops ptr_rc:$ptrreg);
+}
+def PPCTLSRegOperand : AsmOperandClass {
+ let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
+ let RenderMethod = "addTLSRegOperands";
+}
+def tlsreg32 : Operand<i32> {
+ let EncoderMethod = "getTLSRegEncoding";
+ let ParserMatchClass = PPCTLSRegOperand;
+}
+def tlsgd32 : Operand<i32> {}
+def tlscall32 : Operand<i32> {
+ let PrintMethod = "printTLSCall";
+ let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym);
+ let EncoderMethod = "getTLSCallEncoding";
+}
+
+// PowerPC Predicate operand.
+def pred : Operand<OtherVT> {
+ let PrintMethod = "printPredicateOperand";
+ let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg);
+}
+
+// Define PowerPC specific addressing mode.
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
+def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
+def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
+
+// The address in a single register. This is used with the SjLj
+// pseudo-instructions.
+def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
+
+/// This is just the offset part of iaddr, used for preinc.
+def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Predicate Definitions.
+def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">;
+def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">;
+def IsBookE : Predicate<"PPCSubTarget->isBookE()">;
+def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">;
+def HasOnlyMSYNC : Predicate<"PPCSubTarget->hasOnlyMSYNC()">;
+def HasSYNC : Predicate<"!PPCSubTarget->hasOnlyMSYNC()">;
+def IsPPC4xx : Predicate<"PPCSubTarget->isPPC4xx()">;
+def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
+def IsE500 : Predicate<"PPCSubTarget->isE500()">;
+def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
+def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
+def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
+def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
+def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
+def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
+def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Multiclass Definitions.
+
+multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XForm_10<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XForm_10<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_11<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XForm_11<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+// Multiclass for instructions for which the non record form is not cracked
+// and the record form is cracked (i.e. divw, mullw, etc.)
+multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel, PPC970_DGroup_First,
+ PPC970_DGroup_Cracked;
+ }
+}
+
+multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : MForm_2<opcode, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : MForm_2<opcode, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : MDForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : MDForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass MDSForm_1r<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : MDSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : MDSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_26<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : XForm_26<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_28<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : XForm_28<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : AForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : AForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : AForm_2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : AForm_2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : AForm_3<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : AForm_3<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Definitions.
+
+// Pseudo-instructions:
+
+let hasCtrlDep = 1 in {
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "#ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def UPDATE_VRSAVE : Pseudo<(outs gprc:$rD), (ins gprc:$rS),
+ "UPDATE_VRSAVE $rD, $rS", []>;
+}
+
+let Defs = [R1], Uses = [R1] in
+def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
+ [(set i32:$result,
+ (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
+def DYNAREAOFFSET : Pseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
+ [(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1, // Expanded after instruction selection.
+ PPC970_Single = 1 in {
+ // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
+ // because either operand might become the first operand in an isel, and
+ // that operand cannot be r0.
+ def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond,
+ gprc_nor0:$T, gprc_nor0:$F,
+ i32imm:$BROPC), "#SELECT_CC_I4",
+ []>;
+ def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond,
+ g8rc_nox0:$T, g8rc_nox0:$F,
+ i32imm:$BROPC), "#SELECT_CC_I8",
+ []>;
+ def SELECT_CC_F4 : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_F4",
+ []>;
+ def SELECT_CC_F8 : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_F8",
+ []>;
+ def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VRRC",
+ []>;
+
+ // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+ // register bit directly.
+ def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond,
+ gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4",
+ [(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>;
+ def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
+ g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8",
+ [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>;
+ def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
+ f4rc:$T, f4rc:$F), "#SELECT_F4",
+ [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
+ def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
+ f8rc:$T, f8rc:$F), "#SELECT_F8",
+ [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
+ def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+ vrrc:$T, vrrc:$F), "#SELECT_VRRC",
+ [(set v4i32:$dst,
+ (select i1:$cond, v4i32:$T, v4i32:$F))]>;
+}
+
+// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
+// scavenge a register for it.
+let mayStore = 1 in {
+def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
+ "#SPILL_CR", []>;
+def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F),
+ "#SPILL_CRBIT", []>;
+}
+
+// RESTORE_CR - Indicate that we're restoring the CR register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in {
+def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
+ "#RESTORE_CR", []>;
+def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F),
+ "#RESTORE_CRBIT", []>;
+}
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isReturn = 1, Uses = [LR, RM] in
+ def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
+ [(retflag)]>, Requires<[In32BitMode]>;
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>;
+
+ let isCodeGenOnly = 1 in {
+ def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
+
+ def BCCTR : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 12, $bi, 0", IIC_BrB, []>;
+ def BCCTRn : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 4, $bi, 0", IIC_BrB, []>;
+ }
+ }
+}
+
+let Defs = [LR] in
+ def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
+ PPC970_Unit_BRU;
+let Defs = [LR] in
+ def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>,
+ PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let isBarrier = 1 in {
+ def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
+ "b $dst", IIC_BrB,
+ [(br bb:$dst)]>;
+ def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst),
+ "ba $dst", IIC_BrB, []>;
+ }
+
+ // BCC represents an arbitrary conditional branch on a predicate.
+ // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ let isCodeGenOnly = 1 in {
+ def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
+ /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+ def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst),
+ "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">;
+
+ let isReturn = 1, Uses = [LR, RM] in
+ def BCCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}lr${cond:pm} ${cond:reg}", IIC_BrB, []>;
+ }
+
+ let isCodeGenOnly = 1 in {
+ let Pattern = [(brcond i1:$bi, bb:$dst)] in
+ def BC : BForm_4<16, 12, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
+ "bc 12, $bi, $dst">;
+
+ let Pattern = [(brcond (not i1:$bi), bb:$dst)] in
+ def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
+ "bc 4, $bi, $dst">;
+
+ let isReturn = 1, Uses = [LR, RM] in
+ def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi),
+ "bclr 12, $bi, 0", IIC_BrB, []>;
+ def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi),
+ "bclr 4, $bi, 0", IIC_BrB, []>;
+ }
+
+ let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
+ def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+ "bdzlr", IIC_BrB, []>;
+ def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+ "bdnzlr", IIC_BrB, []>;
+ def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins),
+ "bdzlr+", IIC_BrB, []>;
+ def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins),
+ "bdnzlr+", IIC_BrB, []>;
+ def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins),
+ "bdzlr-", IIC_BrB, []>;
+ def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins),
+ "bdnzlr-", IIC_BrB, []>;
+ }
+
+ let Defs = [CTR], Uses = [CTR] in {
+ def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
+ def BDZA : BForm_1<16, 18, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdza $dst">;
+ def BDNZA : BForm_1<16, 16, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdnza $dst">;
+ def BDZp : BForm_1<16, 27, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz+ $dst">;
+ def BDNZp: BForm_1<16, 25, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz+ $dst">;
+ def BDZAp : BForm_1<16, 27, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdza+ $dst">;
+ def BDNZAp: BForm_1<16, 25, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdnza+ $dst">;
+ def BDZm : BForm_1<16, 26, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz- $dst">;
+ def BDNZm: BForm_1<16, 24, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz- $dst">;
+ def BDZAm : BForm_1<16, 26, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdza- $dst">;
+ def BDNZAm: BForm_1<16, 24, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdnza- $dst">;
+ }
+}
+
+// The unconditional BCL used by the SjLj setjmp code.
+let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in {
+ let Defs = [LR], Uses = [RM] in {
+ def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bcl 20, 31, $dst">;
+ }
+}
+
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
+ def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
+ "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>;
+
+ let isCodeGenOnly = 1 in {
+ def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$func),
+ "bl $func", IIC_BrB, []>;
+ def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
+ def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
+ "b${cond:cc}la${cond:pm} ${cond:reg}, $dst">;
+
+ def BCL : BForm_4<16, 12, 0, 1, (outs),
+ (ins crbitrc:$bi, condbrtarget:$dst),
+ "bcl 12, $bi, $dst">;
+ def BCLn : BForm_4<16, 4, 0, 1, (outs),
+ (ins crbitrc:$bi, condbrtarget:$dst),
+ "bcl 4, $bi, $dst">;
+ }
+ }
+ let Uses = [CTR, RM] in {
+ def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
+ Requires<[In32BitMode]>;
+
+ let isCodeGenOnly = 1 in {
+ def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
+
+ def BCCTRL : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 12, $bi, 0", IIC_BrB, []>;
+ def BCCTRLn : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 4, $bi, 0", IIC_BrB, []>;
+ }
+ }
+ let Uses = [LR, RM] in {
+ def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins),
+ "blrl", IIC_BrB, []>;
+
+ let isCodeGenOnly = 1 in {
+ def BCCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
+
+ def BCLRL : XLForm_2_br2<19, 16, 12, 1, (outs), (ins crbitrc:$bi),
+ "bclrl 12, $bi, 0", IIC_BrB, []>;
+ def BCLRLn : XLForm_2_br2<19, 16, 4, 1, (outs), (ins crbitrc:$bi),
+ "bclrl 4, $bi, 0", IIC_BrB, []>;
+ }
+ }
+ let Defs = [CTR], Uses = [CTR, RM] in {
+ def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdzl $dst">;
+ def BDNZL : BForm_1<16, 16, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdnzl $dst">;
+ def BDZLA : BForm_1<16, 18, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdzla $dst">;
+ def BDNZLA : BForm_1<16, 16, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdnzla $dst">;
+ def BDZLp : BForm_1<16, 27, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdzl+ $dst">;
+ def BDNZLp: BForm_1<16, 25, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdnzl+ $dst">;
+ def BDZLAp : BForm_1<16, 27, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdzla+ $dst">;
+ def BDNZLAp: BForm_1<16, 25, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdnzla+ $dst">;
+ def BDZLm : BForm_1<16, 26, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdzl- $dst">;
+ def BDNZLm: BForm_1<16, 24, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdnzl- $dst">;
+ def BDZLAm : BForm_1<16, 26, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdzla- $dst">;
+ def BDNZLAm: BForm_1<16, 24, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdnzla- $dst">;
+ }
+ let Defs = [CTR], Uses = [CTR, LR, RM] in {
+ def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins),
+ "bdzlrl", IIC_BrB, []>;
+ def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins),
+ "bdnzlrl", IIC_BrB, []>;
+ def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins),
+ "bdzlrl+", IIC_BrB, []>;
+ def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins),
+ "bdnzlrl+", IIC_BrB, []>;
+ def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins),
+ "bdzlrl-", IIC_BrB, []>;
+ def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins),
+ "bdnzlrl-", IIC_BrB, []>;
+ }
+}
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset),
+ "#TC_RETURNd $dst $offset",
+ []>;
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+ "#TC_RETURNa $func $offset",
+ [(PPCtc_return (i32 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
+ "#TC_RETURNr $dst $offset",
+ []>;
+
+
+let isCodeGenOnly = 1 in {
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
+def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>, Requires<[In32BitMode]>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", IIC_BrB,
+ []>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
+ "ba $dst", IIC_BrB,
+ []>;
+
+}
+
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ let Defs = [CTR] in
+ def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP32",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In32BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP32",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In32BitMode]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+ "#EH_SjLj_Setup\t$dst", []>;
+}
+
+// System call.
+let PPC970_Unit = 7 in {
+ def SC : SCForm<17, 1, (outs), (ins i32imm:$lev),
+ "sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>;
+}
+
+// Branch history rolling buffer.
+def CLRBHRB : XForm_0<31, 430, (outs), (ins), "clrbhrb", IIC_BrB,
+ [(PPCclrbhrb)]>,
+ PPC970_DGroup_Single;
+// The $dmy argument used for MFBHRBE is not needed; however, including
+// it avoids automatic generation of PPCFastISel::fastEmit_i(), which
+// interferes with necessary special handling (see PPCFastISel.cpp).
+def MFBHRBE : XFXForm_3p<31, 302, (outs gprc:$rD),
+ (ins u10imm:$imm, u10imm:$dmy),
+ "mfbhrbe $rD, $imm", IIC_BrB,
+ [(set i32:$rD,
+ (PPCmfbhrbe imm:$imm, imm:$dmy))]>,
+ PPC970_DGroup_First;
+
+def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$imm), "rfebb $imm",
+ IIC_BrB, [(PPCrfebb (i32 imm:$imm))]>,
+ PPC970_DGroup_Single;
+
+// DCB* instructions.
+def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst",
+ IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), "dcbf $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in {
+def DCBT : DCB_Form_hint<278, (outs), (ins u5imm:$TH, memrr:$dst),
+ "dcbt $dst, $TH", IIC_LdStDCBF, []>,
+ PPC970_DGroup_Single;
+def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst),
+ "dcbtst $dst, $TH", IIC_LdStDCBF, []>,
+ PPC970_DGroup_Single;
+} // hasSideEffects = 0
+
+def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
+ "icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
+
+def : Pat<(int_ppc_dcbt xoaddr:$dst),
+ (DCBT 0, xoaddr:$dst)>;
+def : Pat<(int_ppc_dcbtst xoaddr:$dst),
+ (DCBTST 0, xoaddr:$dst)>;
+
+def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
+ (DCBT 0, xoaddr:$dst)>; // data prefetch for loads
+def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)),
+ (DCBTST 0, xoaddr:$dst)>; // data prefetch for stores
+def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
+ (ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read)
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+ let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
+ [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
+ [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
+ [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
+ [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
+ [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
+ [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
+ [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
+ [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
+ [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
+ [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
+ [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
+ [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
+ [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
+ [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
+ [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
+ [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
+ [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
+ [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
+ [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
+
+ def ATOMIC_SWAP_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
+ [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
+ [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
+ [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
+def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
+ "lbarx $rD, $src", IIC_LdStLWARX, []>,
+ Requires<[HasPartwordAtomics]>;
+
+def LHARX : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
+ "lharx $rD, $src", IIC_LdStLWARX, []>,
+ Requires<[HasPartwordAtomics]>;
+
+def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
+ "lwarx $rD, $src", IIC_LdStLWARX, []>;
+
+// Instructions to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LBARXL : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
+ "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ Requires<[HasPartwordAtomics]>;
+
+def LHARXL : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
+ "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ Requires<[HasPartwordAtomics]>;
+
+def LWARXL : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
+ "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT;
+}
+
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in {
+def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
+ "stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
+ isDOT, Requires<[HasPartwordAtomics]>;
+
+def STHCX : XForm_1<31, 726, (outs), (ins gprc:$rS, memrr:$dst),
+ "sthcx. $rS, $dst", IIC_LdStSTWCX, []>,
+ isDOT, Requires<[HasPartwordAtomics]>;
+
+def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
+ "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
+}
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
+
+def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm),
+ "twi $to, $rA, $imm", IIC_IntTrapW, []>;
+def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB),
+ "tw $to, $rA, $rB", IIC_IntTrapW, []>;
+def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm),
+ "tdi $to, $rA, $imm", IIC_IntTrapD, []>;
+def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
+ "td $to, $rA, $rB", IIC_IntTrapD, []>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Load Instructions.
+//
+
+// Unindexed (r+i) Loads.
+let PPC970_Unit = 2 in {
+def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
+ "lbz $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (zextloadi8 iaddr:$src))]>;
+def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src),
+ "lha $rD, $src", IIC_LdStLHA,
+ [(set i32:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src),
+ "lhz $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src),
+ "lwz $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (load iaddr:$src))]>;
+
+def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src),
+ "lfs $rD, $src", IIC_LdStLFD,
+ [(set f32:$rD, (load iaddr:$src))]>;
+def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
+ "lfd $rD, $src", IIC_LdStLFD,
+ [(set f64:$rD, (load iaddr:$src))]>;
+
+
+// Unindexed (r+i) Loads with Update (preinc).
+let mayLoad = 1, hasSideEffects = 0 in {
+def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lhau $rD, $addr", IIC_LdStLHAU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lfsu $rD, $addr", IIC_LdStLFDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lfdu $rD, $addr", IIC_LdStLFDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+
+// Indexed (r+r) Loads with Update (preinc).
+def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", IIC_LdStLHAUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lfsux $rD, $addr", IIC_LdStLFDUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lfdux $rD, $addr", IIC_LdStLFDUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+// Indexed (r+r) Loads.
+//
+let PPC970_Unit = 2 in {
+def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (zextloadi8 xaddr:$src))]>;
+def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src),
+ "lhax $rD, $src", IIC_LdStLHA,
+ [(set i32:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (load xaddr:$src))]>;
+
+
+def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src),
+ "lhbrx $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
+def LWBRX : XForm_1<31, 534, (outs gprc:$rD), (ins memrr:$src),
+ "lwbrx $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
+
+def LFSX : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src),
+ "lfsx $frD, $src", IIC_LdStLFD,
+ [(set f32:$frD, (load xaddr:$src))]>;
+def LFDX : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src),
+ "lfdx $frD, $src", IIC_LdStLFD,
+ [(set f64:$frD, (load xaddr:$src))]>;
+
+def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src),
+ "lfiwax $frD, $src", IIC_LdStLFD,
+ [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
+def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src),
+ "lfiwzx $frD, $src", IIC_LdStLFD,
+ [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
+}
+
+// Load Multiple
+def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
+ "lmw $rD, $src", IIC_LdStLMW, []>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Store Instructions.
+//
+
+// Unindexed (r+i) Stores.
+let PPC970_Unit = 2 in {
+def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
+ "stb $rS, $src", IIC_LdStStore,
+ [(truncstorei8 i32:$rS, iaddr:$src)]>;
+def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
+ "sth $rS, $src", IIC_LdStStore,
+ [(truncstorei16 i32:$rS, iaddr:$src)]>;
+def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
+ "stw $rS, $src", IIC_LdStStore,
+ [(store i32:$rS, iaddr:$src)]>;
+def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
+ "stfs $rS, $dst", IIC_LdStSTFD,
+ [(store f32:$rS, iaddr:$dst)]>;
+def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
+ "stfd $rS, $dst", IIC_LdStSTFD,
+ [(store f64:$rS, iaddr:$dst)]>;
+}
+
+// Unindexed (r+i) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
+ "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
+ "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
+ "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
+ "stfsu $rS, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst),
+ "stfdu $rS, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFSU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFDU $rS, iaddroff:$ptroff, $ptrreg)>;
+
+// Indexed (r+r) Stores.
+let PPC970_Unit = 2 in {
+def STBX : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst),
+ "stbx $rS, $dst", IIC_LdStStore,
+ [(truncstorei8 i32:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst),
+ "sthx $rS, $dst", IIC_LdStStore,
+ [(truncstorei16 i32:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst),
+ "stwx $rS, $dst", IIC_LdStStore,
+ [(store i32:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+
+def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst),
+ "sthbrx $rS, $dst", IIC_LdStStore,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
+ PPC970_DGroup_Cracked;
+def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst),
+ "stwbrx $rS, $dst", IIC_LdStStore,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
+ PPC970_DGroup_Cracked;
+
+def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst),
+ "stfiwx $frS, $dst", IIC_LdStSTFD,
+ [(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
+
+def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst),
+ "stfsx $frS, $dst", IIC_LdStSTFD,
+ [(store f32:$frS, xaddr:$dst)]>;
+def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
+ "stfdx $frS, $dst", IIC_LdStSTFD,
+ [(store f64:$frS, xaddr:$dst)]>;
+}
+
+// Indexed (r+r) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
+ "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
+ "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
+ "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst),
+ "stfsux $rS, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst),
+ "stfdux $rS, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFDUX $rS, $ptrreg, $ptroff)>;
+
+// Store Multiple
+def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
+ "stmw $rS, $dst", IIC_LdStLMW, []>;
+
+def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
+ "sync $L", IIC_LdStSync, []>;
+
+let isCodeGenOnly = 1 in {
+ def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
+ "msync", IIC_LdStSync, []> {
+ let L = 0;
+ }
+}
+
+def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Arithmetic Instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm),
+ "addi $rD, $rA, $imm", IIC_IntSimple,
+ [(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>;
+let BaseName = "addic" in {
+let Defs = [CARRY] in
+def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
+ "addic $rD, $rA, $imm", IIC_IntGeneral,
+ [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>,
+ RecFormRel, PPC970_DGroup_Cracked;
+let Defs = [CARRY, CR0] in
+def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
+ "addic. $rD, $rA, $imm", IIC_IntGeneral,
+ []>, isDOT, RecFormRel;
+}
+def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm),
+ "addis $rD, $rA, $imm", IIC_IntSimple,
+ [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
+let isCodeGenOnly = 1 in
+def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym),
+ "la $rD, $sym($rA)", IIC_IntGeneral,
+ [(set i32:$rD, (add i32:$rA,
+ (PPClo tglobaladdr:$sym, 0)))]>;
+def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
+ "mulli $rD, $rA, $imm", IIC_IntMulLI,
+ [(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>;
+let Defs = [CARRY] in
+def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
+ "subfic $rD, $rA, $imm", IIC_IntGeneral,
+ [(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm),
+ "li $rD, $imm", IIC_IntSimple,
+ [(set i32:$rD, imm32SExt16:$imm)]>;
+ def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm),
+ "lis $rD, $imm", IIC_IntSimple,
+ [(set i32:$rD, imm16ShiftedSExt:$imm)]>;
+}
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+let Defs = [CR0] in {
+def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IIC_IntGeneral,
+ [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IIC_IntGeneral,
+ [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
+ isDOT;
+}
+def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IIC_IntSimple,
+ [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
+def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IIC_IntSimple,
+ [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IIC_IntSimple,
+ [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
+def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IIC_IntSimple,
+ [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
+
+def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple,
+ []>;
+let isCodeGenOnly = 1 in {
+// The POWER6 and POWER7 have special group-terminating nops.
+def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins),
+ "ori 1, 1, 0", IIC_IntSimple, []>;
+def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins),
+ "ori 2, 2, 0", IIC_IntSimple, []>;
+}
+
+let isCompare = 1, hasSideEffects = 0 in {
+ def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
+ "cmpwi $crD, $rA, $imm", IIC_IntCompare>;
+ def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "cmplwi $dst, $src1, $src2", IIC_IntCompare>;
+}
+}
+
+let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
+let isCommutable = 1 in {
+defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "nand", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
+defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "and", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
+} // isCommutable
+defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "andc", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
+let isCommutable = 1 in {
+defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "or", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
+defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "nor", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
+} // isCommutable
+defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "orc", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
+let isCommutable = 1 in {
+defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "eqv", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
+defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "xor", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
+} // isCommutable
+defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "slw", "$rA, $rS, $rB", IIC_IntGeneral,
+ [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
+defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "srw", "$rA, $rS, $rB", IIC_IntGeneral,
+ [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
+defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "sraw", "$rA, $rS, $rB", IIC_IntShift,
+ [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+let hasSideEffects = 0 in {
+defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
+ "srawi", "$rA, $rS, $SH", IIC_IntShift,
+ [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
+defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS),
+ "cntlzw", "$rA, $rS", IIC_IntGeneral,
+ [(set i32:$rA, (ctlz i32:$rS))]>;
+defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
+ "extsb", "$rA, $rS", IIC_IntSimple,
+ [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
+defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
+ "extsh", "$rA, $rS", IIC_IntSimple,
+ [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
+
+let isCommutable = 1 in
+def CMPB : XForm_6<31, 508, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "cmpb $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i32:$rA, (PPCcmpb i32:$rS, i32:$rB))]>;
+}
+let isCompare = 1, hasSideEffects = 0 in {
+ def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
+ "cmpw $crD, $rA, $rB", IIC_IntCompare>;
+ def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
+ "cmplw $crD, $rA, $rB", IIC_IntCompare>;
+}
+}
+let PPC970_Unit = 3 in { // FPU Operations.
+//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
+// "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
+let isCompare = 1, hasSideEffects = 0 in {
+ def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
+ "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+ def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+ "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
+}
+
+let Uses = [RM] in {
+ let hasSideEffects = 0 in {
+ defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiw", "$frD, $frB", IIC_FPGeneral,
+ []>;
+ defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+
+ defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
+ "frsp", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fround f64:$frB))]>;
+
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+ defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frin", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (frnd f64:$frB))]>;
+ defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frin", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (frnd f32:$frB))]>;
+ }
+
+ let hasSideEffects = 0 in {
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+ defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frip", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (fceil f64:$frB))]>;
+ defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frip", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fceil f32:$frB))]>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+ defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB),
+ "friz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (ftrunc f64:$frB))]>;
+ defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB),
+ "friz", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (ftrunc f32:$frB))]>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+ defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frim", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (ffloor f64:$frB))]>;
+ defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frim", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (ffloor f32:$frB))]>;
+
+ defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fsqrt", "$frD, $frB", IIC_FPSqrtD,
+ [(set f64:$frD, (fsqrt f64:$frB))]>;
+ defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fsqrts", "$frD, $frB", IIC_FPSqrtS,
+ [(set f32:$frD, (fsqrt f32:$frB))]>;
+ }
+ }
+}
+
+/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
+/// often coalesced away and we don't want the dispatch group builder to think
+/// that they will fill slots (which could cause the load of a LSU reject to
+/// sneak into a d-group with a store).
+let hasSideEffects = 0 in
+defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fmr", "$frD, $frB", IIC_FPGeneral,
+ []>, // (set f32:$frD, f32:$frB)
+ PPC970_Unit_Pseudo;
+
+let PPC970_Unit = 3, hasSideEffects = 0 in { // FPU Operations.
+// These are artificially split into two different forms, for 4/8 byte FP.
+defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fabs", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fabs f32:$frB))]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fabs", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (fabs f64:$frB))]>;
+defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fnabs", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fnabs", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
+defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fneg", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fneg f32:$frB))]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fneg", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (fneg f64:$frB))]>;
+
+defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB),
+ "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB),
+ "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>;
+
+// Reciprocal estimates.
+defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fre", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfre f64:$frB))]>;
+defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fres", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (PPCfre f32:$frB))]>;
+defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frsqrte", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
+defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frsqrtes", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
+}
+
+// XL-Form instructions. condition register logical ops.
+//
+let hasSideEffects = 0 in
+def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA),
+ "mcrf $BF, $BFA", IIC_BrMCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+// FIXME: According to the ISA (section 2.5.1 of version 2.06), the
+// condition-register logical instructions have preferred forms. Specifically,
+// it is preferred that the bit specified by the BT field be in the same
+// condition register as that specified by the bit BB. We might want to account
+// for this via hinting the register allocator and anti-dep breakers, or we
+// could constrain the register class to force this constraint and then loosen
+// it during register allocation via convertToThreeAddress or some similar
+// mechanism.
+
+let isCommutable = 1 in {
+def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crand $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (and i1:$CRA, i1:$CRB))]>;
+
+def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crnand $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (and i1:$CRA, i1:$CRB)))]>;
+
+def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "cror $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (or i1:$CRA, i1:$CRB))]>;
+
+def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crxor $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (xor i1:$CRA, i1:$CRB))]>;
+
+def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crnor $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (or i1:$CRA, i1:$CRB)))]>;
+
+def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "creqv $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (xor i1:$CRA, i1:$CRB)))]>;
+} // isCommutable
+
+def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crandc $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (and i1:$CRA, (not i1:$CRB)))]>;
+
+def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crorc $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>;
+
+let isCodeGenOnly = 1 in {
+def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
+ "creqv $dst, $dst, $dst", IIC_BrCR,
+ [(set i1:$dst, 1)]>;
+
+def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
+ "crxor $dst, $dst, $dst", IIC_BrCR,
+ [(set i1:$dst, 0)]>;
+
+let Defs = [CR1EQ], CRD = 6 in {
+def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
+ "creqv 6, 6, 6", IIC_BrCR,
+ [(PPCcr6set)]>;
+
+def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
+ "crxor 6, 6, 6", IIC_BrCR,
+ [(PPCcr6unset)]>;
+}
+}
+
+// XFX-Form instructions. Instructions that deal with SPRs.
+//
+
+def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mfspr $RT, $SPR", IIC_SprMFSPR>;
+def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
+ "mtspr $SPR, $RT", IIC_SprMTSPR>;
+
+def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mftb $RT, $SPR", IIC_SprMFTB>;
+
+// A pseudo-instruction used to implement the read of the 64-bit cycle counter
+// on a 32-bit target.
+let hasSideEffects = 1, usesCustomInserter = 1 in
+def ReadTB : Pseudo<(outs gprc:$lo, gprc:$hi), (ins),
+ "#ReadTB", []>;
+
+let Uses = [CTR] in {
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
+ "mfctr $rT", IIC_SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
+ "mtctr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
+let Pattern = [(int_ppc_mtctr i32:$rS)] in
+def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
+ "mtctr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [LR] in {
+def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
+ "mtlr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR] in {
+def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
+ "mflr $rT", IIC_SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let isCodeGenOnly = 1 in {
+ // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed
+ // like a GPR on the PPC970. As such, copies in and out have the same
+ // performance characteristics as an OR instruction.
+ def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS),
+ "mtspr 256, $rS", IIC_IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins),
+ "mfspr $rT, 256", IIC_IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+
+ def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
+ (outs VRSAVERC:$reg), (ins gprc:$rS),
+ "mtspr 256, $rS", IIC_IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT),
+ (ins VRSAVERC:$reg),
+ "mfspr $rT, 256", IIC_IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
+// so we'll need to scavenge a register for it.
+let mayStore = 1 in
+def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+ "#SPILL_VRSAVE", []>;
+
+// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+ "#RESTORE_VRSAVE", []>;
+
+let hasSideEffects = 0 in {
+def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST),
+ "mtocrf $FXM, $ST", IIC_BrMCRX>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
+ "mtcrf $FXM, $rS", IIC_BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
+def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
+ "mfocrf $rT, $FXM", IIC_SprMFCRF>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
+ "mfcr $rT", IIC_SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasSideEffects = 0
+
+// Pseudo instruction to perform FADD in round-to-zero mode.
+let usesCustomInserter = 1, Uses = [RM] in {
+ def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
+ [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+}
+
+// The above pseudo gets expanded to make use of the following instructions
+// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
+let Uses = [RM], Defs = [RM] in {
+ def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+ "mtfsb0 $FM", IIC_IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+ "mtfsb1 $FM", IIC_IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ let isCodeGenOnly = 1 in
+ def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
+ "mtfsf $FM, $rT", IIC_IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+let Uses = [RM] in {
+ def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins),
+ "mffs $rT", IIC_IntMFFS,
+ [(set f64:$rT, (PPCmffs))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+
+ let Defs = [CR1] in
+ def MFFSo : XForm_42<63, 583, (outs f8rc:$rT), (ins),
+ "mffs. $rT", IIC_IntMFFS, []>, isDOT;
+}
+
+
+let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
+// XO-Form instructions. Arithmetic instructions that can set overflow bit
+let isCommutable = 1 in
+defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "add", "$rT, $rA, $rB", IIC_IntSimple,
+ [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+let isCodeGenOnly = 1 in
+def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB),
+ "add $rT, $rA, $rB", IIC_IntSimple,
+ [(set i32:$rT, (add i32:$rA, tglobaltlsaddr:$rB))]>;
+let isCommutable = 1 in
+defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "addc", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
+ PPC970_DGroup_Cracked;
+
+defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divw", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>;
+defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwu", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>;
+def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwe $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
+ Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwe. $rT, $rA, $rB", IIC_IntDivW,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ Requires<[HasExtDiv]>;
+def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divweu $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
+ Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divweu. $rT, $rA, $rB", IIC_IntDivW,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ Requires<[HasExtDiv]>;
+let isCommutable = 1 in {
+defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
+ [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
+defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU,
+ [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
+defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "mullw", "$rT, $rA, $rB", IIC_IntMulHW,
+ [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+} // isCommutable
+defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "subf", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
+defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
+ PPC970_DGroup_Cracked;
+defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "neg", "$rT, $rA", IIC_IntSimple,
+ [(set i32:$rT, (ineg i32:$rA))]>;
+let Uses = [CARRY] in {
+let isCommutable = 1 in
+defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "adde", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
+defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "addme", "$rT, $rA", IIC_IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, -1))]>;
+defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "addze", "$rT, $rA", IIC_IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, 0))]>;
+defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "subfe", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
+defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "subfme", "$rT, $rA", IIC_IntGeneral,
+ [(set i32:$rT, (sube -1, i32:$rA))]>;
+defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "subfze", "$rT, $rA", IIC_IntGeneral,
+ [(set i32:$rT, (sube 0, i32:$rA))]>;
+}
+}
+
+// A-Form instructions. Most of the instructions executed in the FPU are of
+// this type.
+//
+let PPC970_Unit = 3, hasSideEffects = 0 in { // FPU Operations.
+let Uses = [RM] in {
+let isCommutable = 1 in {
+ defm FMADD : AForm_1r<63, 29,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
+ defm FMADDS : AForm_1r<59, 29,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
+ defm FMSUB : AForm_1r<63, 28,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set f64:$FRT,
+ (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
+ defm FMSUBS : AForm_1r<59, 28,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT,
+ (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
+ defm FNMADD : AForm_1r<63, 31,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set f64:$FRT,
+ (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
+ defm FNMADDS : AForm_1r<59, 31,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT,
+ (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
+ defm FNMSUB : AForm_1r<63, 30,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
+ (fneg f64:$FRB))))]>;
+ defm FNMSUBS : AForm_1r<59, 30,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
+ (fneg f32:$FRB))))]>;
+} // isCommutable
+}
+// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
+// having 4 of these, force the comparison to always be an 8-byte double (code
+// should use an FMRSD if the input comparison value really wants to be a float)
+// and 4/8 byte forms for the result and operand type..
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm FSELD : AForm_1r<63, 23,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
+defm FSELS : AForm_1r<63, 23,
+ (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
+let Uses = [RM] in {
+ let isCommutable = 1 in {
+ defm FADD : AForm_2r<63, 21,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+ "fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub,
+ [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
+ defm FADDS : AForm_2r<59, 21,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+ "fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
+ } // isCommutable
+ defm FDIV : AForm_2r<63, 18,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+ "fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD,
+ [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
+ defm FDIVS : AForm_2r<59, 18,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+ "fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS,
+ [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
+ let isCommutable = 1 in {
+ defm FMUL : AForm_3r<63, 25,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC),
+ "fmul", "$FRT, $FRA, $FRC", IIC_FPFused,
+ [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
+ defm FMULS : AForm_3r<59, 25,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC),
+ "fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral,
+ [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
+ } // isCommutable
+ defm FSUB : AForm_2r<63, 20,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+ "fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub,
+ [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
+ defm FSUBS : AForm_2r<59, 20,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+ "fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
+ }
+}
+
+let hasSideEffects = 0 in {
+let PPC970_Unit = 1 in { // FXU Operations.
+ let isSelect = 1 in
+ def ISEL : AForm_4<31, 15,
+ (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
+ "isel $rT, $rA, $rB, $cond", IIC_IntISEL,
+ []>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+// M-Form instructions. rotate and mask instructions.
+//
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+defm RLWIMI : MForm_2r<20, (outs gprc:$rA),
+ (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME",
+ IIC_IntRotate, []>, PPC970_DGroup_Cracked,
+ RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">;
+}
+let BaseName = "rlwinm" in {
+def RLWINM : MForm_2<21,
+ (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
+ []>, RecFormRel;
+let Defs = [CR0] in
+def RLWINMo : MForm_2<21,
+ (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
+ []>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
+}
+defm RLWNM : MForm_2r<23, (outs gprc:$rA),
+ (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
+ "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral,
+ []>;
+}
+} // hasSideEffects = 0
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Patterns
+//
+
+// Arbitrary immediate support. Implement in terms of LIS/ORI.
+def : Pat<(i32 imm:$imm),
+ (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Implement the 'not' operation with the NOR instruction.
+def i32not : OutPatFrag<(ops node:$in),
+ (NOR $in, $in)>;
+def : Pat<(not i32:$in),
+ (i32not $in)>;
+
+// ADD an arbitrary immediate.
+def : Pat<(add i32:$in, imm:$imm),
+ (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+// OR an arbitrary immediate.
+def : Pat<(or i32:$in, imm:$imm),
+ (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// XOR an arbitrary immediate.
+def : Pat<(xor i32:$in, imm:$imm),
+ (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// SUBFIC
+def : Pat<(sub imm32SExt16:$imm, i32:$in),
+ (SUBFIC $in, imm:$imm)>;
+
+// SHL/SRL
+def : Pat<(shl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>;
+
+// ROTL
+def : Pat<(rotl i32:$in, i32:$sh),
+ (RLWNM $in, $sh, 0, 31)>;
+def : Pat<(rotl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, 31)>;
+
+// RLWNM
+def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
+ (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+
+// Calls
+def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
+ (BL tglobaladdr:$dst)>;
+def : Pat<(PPCcall (i32 texternalsym:$dst)),
+ (BL texternalsym:$dst)>;
+
+def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
+ (TCRETURNri CTRRC:$dst, imm:$imm)>;
+
+
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in),
+ (ADDIS $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in),
+ (ADDI $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS $in, tglobaladdr:$g)>;
+def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS $in, tconstpool:$g)>;
+def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS $in, tjumptable:$g)>;
+def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS $in, tblockaddress:$g)>;
+
+// Support for thread-local storage.
+def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
+ [(set i32:$rD, (PPCppc32GOT))]>;
+
+// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
+// This uses two output registers, the first as the real output, the second as a
+// temporary register, used internally in code generation.
+def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
+ []>, NoEncode<"$rT">;
+
+def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
+ "#LDgotTprelL32",
+ [(set i32:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
+def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
+ (ADD4TLS $in, tglobaltlsaddr:$g)>;
+
+def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDItlsgdL32",
+ [(set i32:$rD,
+ (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
+// LR is a true define, while the rest of the Defs are clobbers. R3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+ "GETtlsADDR32",
+ [(set i32:$rD,
+ (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
+// Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD),
+ (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
+ "#ADDItlsgdLADDR32",
+ [(set i32:$rD,
+ (PPCaddiTlsgdLAddr i32:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>;
+def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDItlsldL32",
+ [(set i32:$rD,
+ (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
+// LR is a true define, while the rest of the Defs are clobbers. R3 is
+// explicitly defined when this op is created, so not mentioned here.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+ "GETtlsldADDR32",
+ [(set i32:$rD,
+ (PPCgetTlsldAddr i32:$reg,
+ tglobaltlsaddr:$sym))]>;
+// Combined op for ADDItlsldL32 and GETtlsADDR32, late expanded. R3 and LR
+// are true defines while the rest of the Defs are clobbers.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
+def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD),
+ (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
+ "#ADDItlsldLADDR32",
+ [(set i32:$rD,
+ (PPCaddiTlsldLAddr i32:$reg,
+ tglobaltlsaddr:$disp,
+ tglobaltlsaddr:$sym))]>;
+def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDIdtprelL32",
+ [(set i32:$rD,
+ (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDISdtprelHA32",
+ [(set i32:$rD,
+ (PPCaddisDtprelHA i32:$reg,
+ tglobaltlsaddr:$disp))]>;
+
+// Support for Position-independent code
+def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
+ "#LWZtoc",
+ [(set i32:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+// Get Global (GOT) Base Register offset, from the word immediately preceding
+// the function label.
+def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
+// amounts.
+def : Pat<(sra i32:$rS, i32:$rB),
+ (SRAW $rS, $rB)>;
+def : Pat<(srl i32:$rS, i32:$rB),
+ (SRW $rS, $rB)>;
+def : Pat<(shl i32:$rS, i32:$rB),
+ (SLW $rS, $rB)>;
+
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX xaddr:$src)>;
+def : Pat<(f64 (extloadf32 iaddr:$src)),
+ (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
+def : Pat<(f64 (extloadf32 xaddr:$src)),
+ (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
+
+def : Pat<(f64 (fextend f32:$src)),
+ (COPY_TO_REGCLASS $src, F8RC)>;
+
+// Only seq_cst fences require the heavyweight sync (SYNC 0).
+// All others can use the lightweight sync (SYNC 1).
+// source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+// The rule for seq_cst is duplicated to work with both 64 bits and 32 bits
+// versions of Power.
+def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (imm), (imm)), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+
+// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+
+// FCOPYSIGN's operand types need not agree.
+def : Pat<(fcopysign f64:$frB, f32:$frA),
+ (FCPSGND (COPY_TO_REGCLASS $frA, F8RC), $frB)>;
+def : Pat<(fcopysign f32:$frB, f64:$frA),
+ (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>;
+
+include "PPCInstrAltivec.td"
+include "PPCInstrSPE.td"
+include "PPCInstr64Bit.td"
+include "PPCInstrVSX.td"
+include "PPCInstrQPX.td"
+include "PPCInstrHTM.td"
+
+def crnot : OutPatFrag<(ops node:$in),
+ (CRNOR $in, $in)>;
+def : Pat<(not i1:$in),
+ (crnot $in)>;
+
+// Patterns for arithmetic i1 operations.
+def : Pat<(add i1:$a, i1:$b),
+ (CRXOR $a, $b)>;
+def : Pat<(sub i1:$a, i1:$b),
+ (CRXOR $a, $b)>;
+def : Pat<(mul i1:$a, i1:$b),
+ (CRAND $a, $b)>;
+
+// We're sometimes asked to materialize i1 -1, which is just 1 in this case
+// (-1 is used to mean all bits set).
+def : Pat<(i1 -1), (CRSET)>;
+
+// i1 extensions, implemented in terms of isel.
+def : Pat<(i32 (zext i1:$in)),
+ (SELECT_I4 $in, (LI 1), (LI 0))>;
+def : Pat<(i32 (sext i1:$in)),
+ (SELECT_I4 $in, (LI -1), (LI 0))>;
+
+def : Pat<(i64 (zext i1:$in)),
+ (SELECT_I8 $in, (LI8 1), (LI8 0))>;
+def : Pat<(i64 (sext i1:$in)),
+ (SELECT_I8 $in, (LI8 -1), (LI8 0))>;
+
+// FIXME: We should choose either a zext or a sext based on other constants
+// already around.
+def : Pat<(i32 (anyext i1:$in)),
+ (SELECT_I4 $in, (LI 1), (LI 0))>;
+def : Pat<(i64 (anyext i1:$in)),
+ (SELECT_I8 $in, (LI8 1), (LI8 0))>;
+
+// match setcc on i1 variables.
+// CRANDC is:
+// 1 1 : F
+// 1 0 : T
+// 0 1 : F
+// 0 0 : F
+//
+// LT is:
+// -1 -1 : F
+// -1 0 : T
+// 0 -1 : F
+// 0 0 : F
+//
+// ULT is:
+// 1 1 : F
+// 1 0 : F
+// 0 1 : T
+// 0 0 : F
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)),
+ (CRANDC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)),
+ (CRANDC $s2, $s1)>;
+// CRORC is:
+// 1 1 : T
+// 1 0 : T
+// 0 1 : F
+// 0 0 : T
+//
+// LE is:
+// -1 -1 : T
+// -1 0 : T
+// 0 -1 : F
+// 0 0 : T
+//
+// ULE is:
+// 1 1 : T
+// 1 0 : F
+// 0 1 : T
+// 0 0 : T
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)),
+ (CRORC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)),
+ (CRORC $s2, $s1)>;
+
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)),
+ (CREQV $s1, $s2)>;
+
+// GE is:
+// -1 -1 : T
+// -1 0 : F
+// 0 -1 : T
+// 0 0 : T
+//
+// UGE is:
+// 1 1 : T
+// 1 0 : T
+// 0 1 : F
+// 0 0 : T
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)),
+ (CRORC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)),
+ (CRORC $s1, $s2)>;
+
+// GT is:
+// -1 -1 : F
+// -1 0 : F
+// 0 -1 : T
+// 0 0 : F
+//
+// UGT is:
+// 1 1 : F
+// 1 0 : T
+// 0 1 : F
+// 0 0 : F
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)),
+ (CRANDC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)),
+ (CRANDC $s1, $s2)>;
+
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)),
+ (CRXOR $s1, $s2)>;
+
+// match setcc on non-i1 (non-vector) variables. Note that SETUEQ, SETOGE,
+// SETOLE, SETONE, SETULT and SETUGT should be expanded by legalize for
+// floating-point types.
+
+multiclass CRNotPat<dag pattern, dag result> {
+ def : Pat<pattern, (crnot result)>;
+ def : Pat<(not pattern), result>;
+
+ // We can also fold the crnot into an extension:
+ def : Pat<(i32 (zext pattern)),
+ (SELECT_I4 result, (LI 0), (LI 1))>;
+ def : Pat<(i32 (sext pattern)),
+ (SELECT_I4 result, (LI 0), (LI -1))>;
+
+ // We can also fold the crnot into an extension:
+ def : Pat<(i64 (zext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 1))>;
+ def : Pat<(i64 (sext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 -1))>;
+
+ // FIXME: We should choose either a zext or a sext based on other constants
+ // already around.
+ def : Pat<(i32 (anyext pattern)),
+ (SELECT_I4 result, (LI 0), (LI 1))>;
+
+ def : Pat<(i64 (anyext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 1))>;
+}
+
+// FIXME: Because of what seems like a bug in TableGen's type-inference code,
+// we need to write imm:$imm in the output patterns below, not just $imm, or
+// else the resulting matcher will not correctly add the immediate operand
+// (making it a register operand instead).
+
+// extended SETCC.
+multiclass ExtSetCCPat<CondCode cc, PatFrag pfrag,
+ OutPatFrag rfrag, OutPatFrag rfrag8> {
+ def : Pat<(i32 (zext (i1 (pfrag i32:$s1, cc)))),
+ (rfrag $s1)>;
+ def : Pat<(i64 (zext (i1 (pfrag i64:$s1, cc)))),
+ (rfrag8 $s1)>;
+ def : Pat<(i64 (zext (i1 (pfrag i32:$s1, cc)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>;
+ def : Pat<(i32 (zext (i1 (pfrag i64:$s1, cc)))),
+ (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>;
+
+ def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, cc)))),
+ (rfrag $s1)>;
+ def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, cc)))),
+ (rfrag8 $s1)>;
+ def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, cc)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>;
+ def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, cc)))),
+ (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>;
+}
+
+// Note that we do all inversions below with i(32|64)not, instead of using
+// (xori x, 1) because on the A2 nor has single-cycle latency while xori
+// has 2-cycle latency.
+
+defm : ExtSetCCPat<SETEQ,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (CNTLZW $in), 27, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (CNTLZD $in), 58, 63)> >;
+
+defm : ExtSetCCPat<SETNE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not (CNTLZW $in)), 27, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not (CNTLZD $in)), 58, 63)> >;
+
+defm : ExtSetCCPat<SETLT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM $in, 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL $in, 1, 63)> >;
+
+defm : ExtSetCCPat<SETGE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (ANDC (NEG $in), $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (ANDC8 (NEG8 $in), $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (ORC $in, (NEG $in)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (ORC8 $in, (NEG8 $in)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (AND $in, (ADDI $in, 1)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (AND8 $in, (ADDI8 $in, 1)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (NAND $in, (ADDI $in, 1)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (NAND8 $in, (ADDI8 $in, 1)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM $in, 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL $in, 1, 63)> >;
+
+// SETCC for i32.
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULT)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLT)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGT)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGT)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
+
+// For non-equality comparisons, the default code would materialize the
+// constant, then compare against it, like this:
+// lis r2, 4660
+// ori r2, r2, 22136
+// cmpw cr0, r3, r2
+// beq cr0,L6
+// Since we are just comparing for equality, we can emit this instead:
+// xoris r0,r3,0x1234
+// cmplwi cr0,r0,0x5678
+// beq cr0,L6
+
+def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETUGT)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
+
+// SETCC for i64.
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLT)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGT)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGT)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>;
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>;
+
+// For non-equality comparisons, the default code would materialize the
+// constant, then compare against it, like this:
+// lis r2, 4660
+// ori r2, r2, 22136
+// cmpd cr0, r3, r2
+// beq cr0,L6
+// Since we are just comparing for equality, we can emit this instead:
+// xoris r0,r3,0x1234
+// cmpldi cr0,r0,0x5678
+// beq cr0,L6
+
+def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETULE)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+
+// SETCC for f32.
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+
+// SETCC for f64.
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+
+// match select on i1 variables:
+def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)),
+ (CROR (CRAND $cond , $tval),
+ (CRAND (crnot $cond), $fval))>;
+
+// match selectcc on i1 variables:
+// select (lhs == rhs), tval, fval is:
+// ((lhs == rhs) & tval) | (!(lhs == rhs) & fval)
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)),
+ (CROR (CRAND (CRANDC $lhs, $rhs), $tval),
+ (CRAND (CRORC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULT)),
+ (CROR (CRAND (CRANDC $rhs, $lhs), $tval),
+ (CRAND (CRORC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)),
+ (CROR (CRAND (CRORC $lhs, $rhs), $tval),
+ (CRAND (CRANDC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULE)),
+ (CROR (CRAND (CRORC $rhs, $lhs), $tval),
+ (CRAND (CRANDC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)),
+ (CROR (CRAND (CREQV $lhs, $rhs), $tval),
+ (CRAND (CRXOR $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)),
+ (CROR (CRAND (CRORC $rhs, $lhs), $tval),
+ (CRAND (CRANDC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGE)),
+ (CROR (CRAND (CRORC $lhs, $rhs), $tval),
+ (CRAND (CRANDC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)),
+ (CROR (CRAND (CRANDC $rhs, $lhs), $tval),
+ (CRAND (CRORC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGT)),
+ (CROR (CRAND (CRANDC $lhs, $rhs), $tval),
+ (CRAND (CRORC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)),
+ (CROR (CRAND (CREQV $lhs, $rhs), $fval),
+ (CRAND (CRXOR $lhs, $rhs), $tval))>;
+
+// match selectcc on i1 variables with non-i1 output.
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)),
+ (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULT)),
+ (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)),
+ (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULE)),
+ (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)),
+ (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)),
+ (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGE)),
+ (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)),
+ (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGT)),
+ (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)),
+ (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)),
+ (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULT)),
+ (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)),
+ (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULE)),
+ (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)),
+ (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)),
+ (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGE)),
+ (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)),
+ (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGT)),
+ (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)),
+ (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+ (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
+ (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+ (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
+ (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
+ (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+ (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
+ (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+ (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
+ (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
+ (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+ (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
+ (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+ (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
+ (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+ (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+ (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
+ (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+ (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
+ (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+ (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)),
+ (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULT)),
+ (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)),
+ (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULE)),
+ (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)),
+ (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)),
+ (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGE)),
+ (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)),
+ (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)),
+ (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
+ (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+let usesCustomInserter = 1 in {
+def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDIo_1_EQ_BIT",
+ [(set i1:$dst, (trunc (not i32:$in)))]>;
+def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDIo_1_GT_BIT",
+ [(set i1:$dst, (trunc i32:$in))]>;
+
+def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDIo_1_EQ_BIT8",
+ [(set i1:$dst, (trunc (not i64:$in)))]>;
+def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDIo_1_GT_BIT8",
+ [(set i1:$dst, (trunc i64:$in))]>;
+}
+
+def : Pat<(i1 (not (trunc i32:$in))),
+ (ANDIo_1_EQ_BIT $in)>;
+def : Pat<(i1 (not (trunc i64:$in))),
+ (ANDIo_1_EQ_BIT8 $in)>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instructions used for assembler/disassembler only
+//
+
+// FIXME: For B=0 or B > 8, the registers following RT are used.
+// WARNING: Do not add patterns for this instruction without fixing this.
+def LSWI : XForm_base_r3xo<31, 597, (outs gprc:$RT), (ins gprc:$A, u5imm:$B),
+ "lswi $RT, $A, $B", IIC_LdStLoad, []>;
+
+// FIXME: For B=0 or B > 8, the registers following RT are used.
+// WARNING: Do not add patterns for this instruction without fixing this.
+def STSWI : XForm_base_r3xo<31, 725, (outs), (ins gprc:$RT, gprc:$A, u5imm:$B),
+ "stswi $RT, $A, $B", IIC_LdStLoad, []>;
+
+def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
+ "isync", IIC_SprISYNC, []>;
+
+def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
+ "icbi $src", IIC_LdStICBI, []>;
+
+// We used to have EIEIO as value but E[0-9A-Z] is a reserved name
+def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins),
+ "eieio", IIC_LdStLoad, []>;
+
+def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
+ "wait $L", IIC_LdStLoad, []>;
+
+def MBAR : XForm_mbar<31, 854, (outs), (ins u5imm:$MO),
+ "mbar $MO", IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def MTSR: XForm_sr<31, 210, (outs), (ins gprc:$RS, u4imm:$SR),
+ "mtsr $SR, $RS", IIC_SprMTSR>;
+
+def MFSR: XForm_sr<31, 595, (outs gprc:$RS), (ins u4imm:$SR),
+ "mfsr $RS, $SR", IIC_SprMFSR>;
+
+def MTSRIN: XForm_srin<31, 242, (outs), (ins gprc:$RS, gprc:$RB),
+ "mtsrin $RS, $RB", IIC_SprMTSR>;
+
+def MFSRIN: XForm_srin<31, 659, (outs gprc:$RS), (ins gprc:$RB),
+ "mfsrin $RS, $RB", IIC_SprMFSR>;
+
+def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
+ "mtmsr $RS, $L", IIC_SprMTMSR>;
+
+def WRTEE: XForm_mtmsr<31, 131, (outs), (ins gprc:$RS),
+ "wrtee $RS", IIC_SprMTMSR>, Requires<[IsBookE]> {
+ let L = 0;
+}
+
+def WRTEEI: I<31, (outs), (ins i1imm:$E), "wrteei $E", IIC_SprMTMSR>,
+ Requires<[IsBookE]> {
+ bits<1> E;
+
+ let Inst{16} = E;
+ let Inst{21-30} = 163;
+}
+
+def DCCCI : XForm_tlb<454, (outs), (ins gprc:$A, gprc:$B),
+ "dccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+def ICCCI : XForm_tlb<966, (outs), (ins gprc:$A, gprc:$B),
+ "iccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"dci 0", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"dccci", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"ici 0", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
+
+def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
+ "mfmsr $RT", IIC_SprMFMSR, []>;
+
+def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
+ "mtmsrd $RS, $L", IIC_SprMTMSRD>;
+
+def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA),
+ "mcrfs $BF, $BFA", IIC_BrMCR>;
+
+def MTFSFI : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
+ "mtfsfi $BF, $U, $W", IIC_IntMFFS>;
+
+def MTFSFIo : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
+ "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isDOT;
+
+def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>;
+def : InstAlias<"mtfsfi. $BF, $U", (MTFSFIo crrc:$BF, i32imm:$U, 0)>;
+
+def MTFSF : XFLForm_1<63, 711, (outs),
+ (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
+ "mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>;
+def MTFSFo : XFLForm_1<63, 711, (outs),
+ (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
+ "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isDOT;
+
+def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>;
+def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSFo i32imm:$FLM, f8rc:$FRB, 0, 0)>;
+
+def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
+ "slbie $RB", IIC_SprSLBIE, []>;
+
+def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB),
+ "slbmte $RS, $RB", IIC_SprSLBMTE, []>;
+
+def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
+ "slbmfee $RT, $RB", IIC_SprSLBMFEE, []>;
+
+def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
+
+def TLBIA : XForm_0<31, 370, (outs), (ins),
+ "tlbia", IIC_SprTLBIA, []>;
+
+def TLBSYNC : XForm_0<31, 566, (outs), (ins),
+ "tlbsync", IIC_SprTLBSYNC, []>;
+
+def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
+ "tlbiel $RB", IIC_SprTLBIEL, []>;
+
+def TLBLD : XForm_16b<31, 978, (outs), (ins gprc:$RB),
+ "tlbld $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
+def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB),
+ "tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
+
+def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
+ "tlbie $RB,$RS", IIC_SprTLBIE, []>;
+
+def TLBSX : XForm_tlb<914, (outs), (ins gprc:$A, gprc:$B), "tlbsx $A, $B",
+ IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def TLBIVAX : XForm_tlb<786, (outs), (ins gprc:$A, gprc:$B), "tlbivax $A, $B",
+ IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def TLBRE : XForm_24_eieio<31, 946, (outs), (ins),
+ "tlbre", IIC_LdStLoad, []>, Requires<[IsBookE]>;
+
+def TLBWE : XForm_24_eieio<31, 978, (outs), (ins),
+ "tlbwe", IIC_LdStLoad, []>, Requires<[IsBookE]>;
+
+def TLBRE2 : XForm_tlbws<31, 946, (outs gprc:$RS), (ins gprc:$A, i1imm:$WS),
+ "tlbre $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+
+def TLBWE2 : XForm_tlbws<31, 978, (outs), (ins gprc:$RS, gprc:$A, i1imm:$WS),
+ "tlbwe $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+
+def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "tlbsx $RST, $A, $B", IIC_LdStLoad, []>,
+ Requires<[IsPPC4xx]>;
+def TLBSX2D : XForm_base_r3xo<31, 914, (outs),
+ (ins gprc:$RST, gprc:$A, gprc:$B),
+ "tlbsx. $RST, $A, $B", IIC_LdStLoad, []>,
+ Requires<[IsPPC4xx]>, isDOT;
+
+def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>;
+
+def RFI : XForm_0<19, 50, (outs), (ins), "rfi", IIC_SprRFI, []>,
+ Requires<[IsBookE]>;
+def RFCI : XForm_0<19, 51, (outs), (ins), "rfci", IIC_BrB, []>,
+ Requires<[IsBookE]>;
+
+def RFDI : XForm_0<19, 39, (outs), (ins), "rfdi", IIC_BrB, []>,
+ Requires<[IsE500]>;
+def RFMCI : XForm_0<19, 38, (outs), (ins), "rfmci", IIC_BrB, []>,
+ Requires<[IsE500]>;
+
+def MFDCR : XFXForm_1<31, 323, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mfdcr $RT, $SPR", IIC_SprMFSPR>, Requires<[IsPPC4xx]>;
+def MTDCR : XFXForm_1<31, 451, (outs), (ins gprc:$RT, i32imm:$SPR),
+ "mtdcr $SPR, $RT", IIC_SprMTSPR>, Requires<[IsPPC4xx]>;
+
+def ATTN : XForm_attn<0, 256, (outs), (ins), "attn", IIC_BrB>;
+
+def LBZCIX : XForm_base_r3xo<31, 853, (outs gprc:$RST), (ins gprc:$A, gprc:$B),
+ "lbzcix $RST, $A, $B", IIC_LdStLoad, []>;
+def LHZCIX : XForm_base_r3xo<31, 821, (outs gprc:$RST), (ins gprc:$A, gprc:$B),
+ "lhzcix $RST, $A, $B", IIC_LdStLoad, []>;
+def LWZCIX : XForm_base_r3xo<31, 789, (outs gprc:$RST), (ins gprc:$A, gprc:$B),
+ "lwzcix $RST, $A, $B", IIC_LdStLoad, []>;
+def LDCIX : XForm_base_r3xo<31, 885, (outs gprc:$RST), (ins gprc:$A, gprc:$B),
+ "ldcix $RST, $A, $B", IIC_LdStLoad, []>;
+
+def STBCIX : XForm_base_r3xo<31, 981, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "stbcix $RST, $A, $B", IIC_LdStLoad, []>;
+def STHCIX : XForm_base_r3xo<31, 949, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "sthcix $RST, $A, $B", IIC_LdStLoad, []>;
+def STWCIX : XForm_base_r3xo<31, 917, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "stwcix $RST, $A, $B", IIC_LdStLoad, []>;
+def STDCIX : XForm_base_r3xo<31, 1013, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "stdcix $RST, $A, $B", IIC_LdStLoad, []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Assembler Instruction Aliases
+//
+
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class PPCAsmPseudo<string asm, dag iops>
+ : Instruction {
+ let Namespace = "PPC";
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let OutOperandList = (outs);
+ let InOperandList = iops;
+ let Pattern = [];
+ let AsmString = asm;
+ let isAsmParserOnly = 1;
+ let isPseudo = 1;
+}
+
+def : InstAlias<"sc", (SC 0)>;
+
+def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"msync", (SYNC 0), 0>, Requires<[HasSYNC]>;
+def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>;
+def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>;
+
+def : InstAlias<"wait", (WAIT 0)>;
+def : InstAlias<"waitrsv", (WAIT 1)>;
+def : InstAlias<"waitimpl", (WAIT 2)>;
+
+def : InstAlias<"mbar", (MBAR 0)>, Requires<[IsBookE]>;
+
+def DCBTx : PPCAsmPseudo<"dcbt $dst", (ins memrr:$dst)>;
+def DCBTSTx : PPCAsmPseudo<"dcbtst $dst", (ins memrr:$dst)>;
+
+def DCBTCT : PPCAsmPseudo<"dcbtct $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTDS : PPCAsmPseudo<"dcbtds $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTT : PPCAsmPseudo<"dcbtt $dst", (ins memrr:$dst)>;
+
+def DCBTSTCT : PPCAsmPseudo<"dcbtstct $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTSTDS : PPCAsmPseudo<"dcbtstds $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
+def DCBTSTT : PPCAsmPseudo<"dcbtstt $dst", (ins memrr:$dst)>;
+
+def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
+def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
+def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
+def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
+
+def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
+def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
+
+def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>;
+def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>;
+
+def : InstAlias<"mtdscr $Rx", (MTSPR 17, gprc:$Rx)>;
+def : InstAlias<"mfdscr $Rx", (MFSPR gprc:$Rx, 17)>;
+
+def : InstAlias<"mtdsisr $Rx", (MTSPR 18, gprc:$Rx)>;
+def : InstAlias<"mfdsisr $Rx", (MFSPR gprc:$Rx, 18)>;
+
+def : InstAlias<"mtdar $Rx", (MTSPR 19, gprc:$Rx)>;
+def : InstAlias<"mfdar $Rx", (MFSPR gprc:$Rx, 19)>;
+
+def : InstAlias<"mtdec $Rx", (MTSPR 22, gprc:$Rx)>;
+def : InstAlias<"mfdec $Rx", (MFSPR gprc:$Rx, 22)>;
+
+def : InstAlias<"mtsdr1 $Rx", (MTSPR 25, gprc:$Rx)>;
+def : InstAlias<"mfsdr1 $Rx", (MFSPR gprc:$Rx, 25)>;
+
+def : InstAlias<"mtsrr0 $Rx", (MTSPR 26, gprc:$Rx)>;
+def : InstAlias<"mfsrr0 $Rx", (MFSPR gprc:$Rx, 26)>;
+
+def : InstAlias<"mtsrr1 $Rx", (MTSPR 27, gprc:$Rx)>;
+def : InstAlias<"mfsrr1 $Rx", (MFSPR gprc:$Rx, 27)>;
+
+def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtcfar $Rx", (MTSPR 28, gprc:$Rx)>;
+def : InstAlias<"mfcfar $Rx", (MFSPR gprc:$Rx, 28)>;
+
+def : InstAlias<"mtamr $Rx", (MTSPR 29, gprc:$Rx)>;
+def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>;
+
+def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>;
+def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>;
+
+def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
+
+def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>;
+def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>;
+
+def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"xnop", (XORI R0, R0, 0)>;
+
+def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mr. $rA, $rB", (OR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
+
+foreach BATR = 0-3 in {
+ def : InstAlias<"mtdbatu "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 536)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfdbatu $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 536)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtdbatl "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 537)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfdbatl $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 537)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtibatu "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 528)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfibatu $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 528)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtibatl "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 529)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfibatl $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 529)))>,
+ Requires<[IsPPC6xx]>;
+}
+
+foreach BR = 0-7 in {
+ def : InstAlias<"mfbr"#BR#" $Rx",
+ (MFDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+ def : InstAlias<"mtbr"#BR#" $Rx",
+ (MTDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+}
+
+def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtesr $Rx", (MTSPR 980, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfesr $Rx", (MFSPR gprc:$Rx, 980)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>;
+def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>;
+
+def : InstAlias<"mttcr $Rx", (MTSPR 986, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mftcr $Rx", (MFSPR gprc:$Rx, 986)>, Requires<[IsPPC4xx]>;
+
+def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
+
+def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBICo : PPCAsmPseudo<"subic. $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+
+def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+
+def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
+def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
+
+def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>;
+def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>;
+
+foreach SPRG = 0-3 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+}
+foreach SPRG = 4-7 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+}
+
+def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
+
+def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>;
+def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>;
+
+def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>;
+
+def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>;
+def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>;
+
+def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>;
+def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>;
+def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>;
+def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
+
+def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
+
+def : InstAlias<"tlbrehi $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 0)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbrelo $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 1)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbwehi $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 0)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>,
+ Requires<[IsPPC4xx]>;
+
+def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def EXTRWI : PPCAsmPseudo<"extrwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def EXTRWIo : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSLWI : PPCAsmPseudo<"inslwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSLWIo : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSRWI : PPCAsmPseudo<"insrwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSRWIo : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def ROTRWI : PPCAsmPseudo<"rotrwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def ROTRWIo : PPCAsmPseudo<"rotrwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SLWIo : PPCAsmPseudo<"slwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SRWIo : PPCAsmPseudo<"srwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def CLRRWI : PPCAsmPseudo<"clrrwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def CLRRWIo : PPCAsmPseudo<"clrrwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
+def CLRLSLWIo : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
+
+def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
+def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
+def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
+def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
+
+def : InstAlias<"cntlzw $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlzw. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+// The POWER variant
+def : MnemonicAlias<"cntlz", "cntlzw">;
+def : MnemonicAlias<"cntlz.", "cntlzw.">;
+
+def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def EXTRDI : PPCAsmPseudo<"extrdi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def EXTRDIo : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def INSRDI : PPCAsmPseudo<"insrdi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def INSRDIo : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def ROTRDI : PPCAsmPseudo<"rotrdi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def ROTRDIo : PPCAsmPseudo<"rotrdi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SLDIo : PPCAsmPseudo<"sldi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SRDIo : PPCAsmPseudo<"srdi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def CLRRDI : PPCAsmPseudo<"clrrdi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def CLRRDIo : PPCAsmPseudo<"clrrdi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def CLRLSLDI : PPCAsmPseudo<"clrlsldi $rA, $rS, $b, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
+def CLRLSLDIo : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
+
+def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
+def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
+def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
+def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
+def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+
+def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+
+// These generic branch instruction forms are used for the assembler parser only.
+// Defs and Uses are conservative, since we don't know the BO value.
+let PPC970_Unit = 7 in {
+ let Defs = [CTR], Uses = [CTR, RM] in {
+ def gBC : BForm_3<16, 0, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
+ "bc $bo, $bi, $dst">;
+ def gBCA : BForm_3<16, 1, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
+ "bca $bo, $bi, $dst">;
+ }
+ let Defs = [LR, CTR], Uses = [CTR, RM] in {
+ def gBCL : BForm_3<16, 0, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
+ "bcl $bo, $bi, $dst">;
+ def gBCLA : BForm_3<16, 1, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
+ "bcla $bo, $bi, $dst">;
+ }
+ let Defs = [CTR], Uses = [CTR, LR, RM] in
+ def gBCLR : XLForm_2<19, 16, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bclr $bo, $bi, $bh", IIC_BrB, []>;
+ let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
+ def gBCLRL : XLForm_2<19, 16, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bclrl $bo, $bi, $bh", IIC_BrB, []>;
+ let Defs = [CTR], Uses = [CTR, LR, RM] in
+ def gBCCTR : XLForm_2<19, 528, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bcctr $bo, $bi, $bh", IIC_BrB, []>;
+ let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
+ def gBCCTRL : XLForm_2<19, 528, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bcctrl $bo, $bi, $bh", IIC_BrB, []>;
+}
+def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>;
+def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>;
+def : InstAlias<"bcctr $bo, $bi", (gBCCTR u5imm:$bo, crbitrc:$bi, 0)>;
+def : InstAlias<"bcctrl $bo, $bi", (gBCCTRL u5imm:$bo, crbitrc:$bi, 0)>;
+
+multiclass BranchSimpleMnemonic1<string name, string pm, int bo> {
+ def : InstAlias<"b"#name#pm#" $bi, $dst", (gBC bo, crbitrc:$bi, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"a"#pm#" $bi, $dst", (gBCA bo, crbitrc:$bi, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"lr"#pm#" $bi", (gBCLR bo, crbitrc:$bi, 0)>;
+ def : InstAlias<"b"#name#"l"#pm#" $bi, $dst", (gBCL bo, crbitrc:$bi, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"la"#pm#" $bi, $dst", (gBCLA bo, crbitrc:$bi, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"lrl"#pm#" $bi", (gBCLRL bo, crbitrc:$bi, 0)>;
+}
+multiclass BranchSimpleMnemonic2<string name, string pm, int bo>
+ : BranchSimpleMnemonic1<name, pm, bo> {
+ def : InstAlias<"b"#name#"ctr"#pm#" $bi", (gBCCTR bo, crbitrc:$bi, 0)>;
+ def : InstAlias<"b"#name#"ctrl"#pm#" $bi", (gBCCTRL bo, crbitrc:$bi, 0)>;
+}
+defm : BranchSimpleMnemonic2<"t", "", 12>;
+defm : BranchSimpleMnemonic2<"f", "", 4>;
+defm : BranchSimpleMnemonic2<"t", "-", 14>;
+defm : BranchSimpleMnemonic2<"f", "-", 6>;
+defm : BranchSimpleMnemonic2<"t", "+", 15>;
+defm : BranchSimpleMnemonic2<"f", "+", 7>;
+defm : BranchSimpleMnemonic1<"dnzt", "", 8>;
+defm : BranchSimpleMnemonic1<"dnzf", "", 0>;
+defm : BranchSimpleMnemonic1<"dzt", "", 10>;
+defm : BranchSimpleMnemonic1<"dzf", "", 2>;
+
+multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> {
+ def : InstAlias<"b"#name#pm#" $cc, $dst",
+ (BCC bibo, crrc:$cc, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#pm#" $dst",
+ (BCC bibo, CR0, condbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"a"#pm#" $cc, $dst",
+ (BCCA bibo, crrc:$cc, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"a"#pm#" $dst",
+ (BCCA bibo, CR0, abscondbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"lr"#pm#" $cc",
+ (BCCLR bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"lr"#pm,
+ (BCCLR bibo, CR0)>;
+
+ def : InstAlias<"b"#name#"ctr"#pm#" $cc",
+ (BCCCTR bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"ctr"#pm,
+ (BCCCTR bibo, CR0)>;
+
+ def : InstAlias<"b"#name#"l"#pm#" $cc, $dst",
+ (BCCL bibo, crrc:$cc, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"l"#pm#" $dst",
+ (BCCL bibo, CR0, condbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"la"#pm#" $cc, $dst",
+ (BCCLA bibo, crrc:$cc, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"la"#pm#" $dst",
+ (BCCLA bibo, CR0, abscondbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"lrl"#pm#" $cc",
+ (BCCLRL bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"lrl"#pm,
+ (BCCLRL bibo, CR0)>;
+
+ def : InstAlias<"b"#name#"ctrl"#pm#" $cc",
+ (BCCCTRL bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"ctrl"#pm,
+ (BCCCTRL bibo, CR0)>;
+}
+multiclass BranchExtendedMnemonic<string name, int bibo> {
+ defm : BranchExtendedMnemonicPM<name, "", bibo>;
+ defm : BranchExtendedMnemonicPM<name, "-", !add(bibo, 2)>;
+ defm : BranchExtendedMnemonicPM<name, "+", !add(bibo, 3)>;
+}
+defm : BranchExtendedMnemonic<"lt", 12>;
+defm : BranchExtendedMnemonic<"gt", 44>;
+defm : BranchExtendedMnemonic<"eq", 76>;
+defm : BranchExtendedMnemonic<"un", 108>;
+defm : BranchExtendedMnemonic<"so", 108>;
+defm : BranchExtendedMnemonic<"ge", 4>;
+defm : BranchExtendedMnemonic<"nl", 4>;
+defm : BranchExtendedMnemonic<"le", 36>;
+defm : BranchExtendedMnemonic<"ng", 36>;
+defm : BranchExtendedMnemonic<"ne", 68>;
+defm : BranchExtendedMnemonic<"nu", 100>;
+defm : BranchExtendedMnemonic<"ns", 100>;
+
+def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm64:$imm)>;
+def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>;
+def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm64:$imm)>;
+def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>;
+
+def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm64:$imm)>;
+def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
+def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm64:$imm)>;
+def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
+
+multiclass TrapExtendedMnemonic<string name, int to> {
+ def : InstAlias<"td"#name#"i $rA, $imm", (TDI to, g8rc:$rA, s16imm:$imm)>;
+ def : InstAlias<"td"#name#" $rA, $rB", (TD to, g8rc:$rA, g8rc:$rB)>;
+ def : InstAlias<"tw"#name#"i $rA, $imm", (TWI to, gprc:$rA, s16imm:$imm)>;
+ def : InstAlias<"tw"#name#" $rA, $rB", (TW to, gprc:$rA, gprc:$rB)>;
+}
+defm : TrapExtendedMnemonic<"lt", 16>;
+defm : TrapExtendedMnemonic<"le", 20>;
+defm : TrapExtendedMnemonic<"eq", 4>;
+defm : TrapExtendedMnemonic<"ge", 12>;
+defm : TrapExtendedMnemonic<"gt", 8>;
+defm : TrapExtendedMnemonic<"nl", 12>;
+defm : TrapExtendedMnemonic<"ne", 24>;
+defm : TrapExtendedMnemonic<"ng", 20>;
+defm : TrapExtendedMnemonic<"llt", 2>;
+defm : TrapExtendedMnemonic<"lle", 6>;
+defm : TrapExtendedMnemonic<"lge", 5>;
+defm : TrapExtendedMnemonic<"lgt", 1>;
+defm : TrapExtendedMnemonic<"lnl", 5>;
+defm : TrapExtendedMnemonic<"lng", 6>;
+defm : TrapExtendedMnemonic<"u", 31>;
+
+// Atomic loads
+def : Pat<(atomic_load_8 iaddr:$src), (LBZ memri:$src)>;
+def : Pat<(atomic_load_16 iaddr:$src), (LHZ memri:$src)>;
+def : Pat<(atomic_load_32 iaddr:$src), (LWZ memri:$src)>;
+def : Pat<(atomic_load_8 xaddr:$src), (LBZX memrr:$src)>;
+def : Pat<(atomic_load_16 xaddr:$src), (LHZX memrr:$src)>;
+def : Pat<(atomic_load_32 xaddr:$src), (LWZX memrr:$src)>;
+
+// Atomic stores
+def : Pat<(atomic_store_8 iaddr:$ptr, i32:$val), (STB gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_16 iaddr:$ptr, i32:$val), (STH gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_32 iaddr:$ptr, i32:$val), (STW gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_8 xaddr:$ptr, i32:$val), (STBX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_16 xaddr:$ptr, i32:$val), (STHX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_32 xaddr:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
new file mode 100644
index 0000000..4312007
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -0,0 +1,1216 @@
+//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the QPX extension to the PowerPC instruction set.
+// Reference:
+// Book Q: QPX Architecture Definition. IBM (as updated in) 2011.
+//
+//===----------------------------------------------------------------------===//
+
+def PPCRegQFRCAsmOperand : AsmOperandClass {
+ let Name = "RegQFRC"; let PredicateMethod = "isRegNumber";
+}
+def qfrc : RegisterOperand<QFRC> {
+ let ParserMatchClass = PPCRegQFRCAsmOperand;
+}
+def PPCRegQSRCAsmOperand : AsmOperandClass {
+ let Name = "RegQSRC"; let PredicateMethod = "isRegNumber";
+}
+def qsrc : RegisterOperand<QSRC> {
+ let ParserMatchClass = PPCRegQSRCAsmOperand;
+}
+def PPCRegQBRCAsmOperand : AsmOperandClass {
+ let Name = "RegQBRC"; let PredicateMethod = "isRegNumber";
+}
+def qbrc : RegisterOperand<QBRC> {
+ let ParserMatchClass = PPCRegQBRCAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// QPXA1_Int - A AForm_1 intrinsic definition.
+class QPXA1_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_FPFused,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA1s_Int - A AForm_1 intrinsic definition (simple instructions).
+class QPXA1s_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_VecPerm,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA2_Int - A AForm_2 intrinsic definition.
+class QPXA2_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_2<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXA3_Int - A AForm_3 intrinsic definition.
+class QPXA3_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_3<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRC))]>;
+// QPXA4_Int - A AForm_4a intrinsic definition.
+class QPXA4_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_4a<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+// QPXX18_Int - A XForm_18 intrinsic definition.
+class QPXX18_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+ : XForm_18<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPCompare,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXX19_Int - A XForm_19 intrinsic definition.
+class QPXX19_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+ : XForm_19<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Frags.
+
+def extloadv4f32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (pre_truncst node:$val,
+ node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def fround_inexact : PatFrag<(ops node:$val), (fround node:$val), [{
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 0;
+}]>;
+
+def fround_exact : PatFrag<(ops node:$val), (fround node:$val), [{
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 1;
+}]>;
+
+let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs.
+ def u12 : ImmLeaf<i32, [{ return (Imm & 0xFFF) == Imm; }]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def HasQPX : Predicate<"PPCSubTarget->hasQPX()">;
+let Predicates = [HasQPX] in {
+let DecoderNamespace = "QPX" in {
+let hasSideEffects = 0 in { // QPX instructions don't have side effects.
+let Uses = [RM] in {
+ // Add Instructions
+ let isCommutable = 1 in {
+ def QVFADD : AForm_2<4, 21,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fadd v4f64:$FRA, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFADDS : QPXA2_Int<0, 21, "qvfadds", int_ppc_qpx_qvfadds>;
+ def QVFADDSs : AForm_2<0, 21,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fadd v4f32:$FRA, v4f32:$FRB))]>;
+ }
+ def QVFSUB : AForm_2<4, 20,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fsub v4f64:$FRA, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSUBS : QPXA2_Int<0, 20, "qvfsubs", int_ppc_qpx_qvfsubs>;
+ def QVFSUBSs : AForm_2<0, 20,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fsub v4f32:$FRA, v4f32:$FRB))]>;
+
+ // Estimate Instructions
+ def QVFRE : AForm_4a<4, 24, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfre $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (PPCfre v4f64:$FRB))]>;
+ def QVFRES : QPXA4_Int<0, 24, "qvfres", int_ppc_qpx_qvfres>;
+ let isCodeGenOnly = 1 in
+ def QVFRESs : AForm_4a<0, 24, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfres $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (PPCfre v4f32:$FRB))]>;
+
+ def QVFRSQRTE : AForm_4a<4, 26, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrsqrte $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (PPCfrsqrte v4f64:$FRB))]>;
+ def QVFRSQRTES : QPXA4_Int<0, 26, "qvfrsqrtes", int_ppc_qpx_qvfrsqrtes>;
+ let isCodeGenOnly = 1 in
+ def QVFRSQRTESs : AForm_4a<0, 26, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrsqrtes $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (PPCfrsqrte v4f32:$FRB))]>;
+
+ // Multiply Instructions
+ let isCommutable = 1 in {
+ def QVFMUL : AForm_3<4, 25,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+ "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fmul v4f64:$FRA, v4f64:$FRC))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMULS : QPXA3_Int<0, 25, "qvfmuls", int_ppc_qpx_qvfmuls>;
+ def QVFMULSs : AForm_3<0, 25,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC),
+ "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fmul v4f32:$FRA, v4f32:$FRC))]>;
+ }
+ def QVFXMUL : QPXA3_Int<4, 17, "qvfxmul", int_ppc_qpx_qvfxmul>;
+ def QVFXMULS : QPXA3_Int<0, 17, "qvfxmuls", int_ppc_qpx_qvfxmuls>;
+
+ // Multiply-add instructions
+ def QVFMADD : AForm_1<4, 29,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>;
+ def QVFMADDSs : AForm_1<0, 29,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>;
+ def QVFNMADD : AForm_1<4, 31,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+ v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>;
+ def QVFNMADDSs : AForm_1<0, 31,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+ v4f32:$FRB)))]>;
+ def QVFMSUB : AForm_1<4, 28,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC,
+ (fneg v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>;
+ def QVFMSUBSs : AForm_1<0, 28,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC,
+ (fneg v4f32:$FRB)))]>;
+ def QVFNMSUB : AForm_1<4, 30,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+ (fneg v4f64:$FRB))))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>;
+ def QVFNMSUBSs : AForm_1<0, 30,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+ (fneg v4f32:$FRB))))]>;
+ def QVFXMADD : QPXA1_Int<4, 9, "qvfxmadd", int_ppc_qpx_qvfxmadd>;
+ def QVFXMADDS : QPXA1_Int<0, 9, "qvfxmadds", int_ppc_qpx_qvfxmadds>;
+ def QVFXXNPMADD : QPXA1_Int<4, 11, "qvfxxnpmadd", int_ppc_qpx_qvfxxnpmadd>;
+ def QVFXXNPMADDS : QPXA1_Int<0, 11, "qvfxxnpmadds", int_ppc_qpx_qvfxxnpmadds>;
+ def QVFXXCPNMADD : QPXA1_Int<4, 3, "qvfxxcpnmadd", int_ppc_qpx_qvfxxcpnmadd>;
+ def QVFXXCPNMADDS : QPXA1_Int<0, 3, "qvfxxcpnmadds", int_ppc_qpx_qvfxxcpnmadds>;
+ def QVFXXMADD : QPXA1_Int<4, 1, "qvfxxmadd", int_ppc_qpx_qvfxxmadd>;
+ def QVFXXMADDS : QPXA1_Int<0, 1, "qvfxxmadds", int_ppc_qpx_qvfxxmadds>;
+
+ // Select Instruction
+ let isCodeGenOnly = 1 in
+ def QVFSEL : QPXA1s_Int<4, 23, "qvfsel", int_ppc_qpx_qvfsel>;
+ def QVFSELb : AForm_1<4, 23, (outs qfrc:$FRT),
+ (ins qbrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (vselect v4i1:$FRA,
+ v4f64:$FRC, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSELbs : AForm_1<4, 23, (outs qsrc:$FRT),
+ (ins qbrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (vselect v4i1:$FRA,
+ v4f32:$FRC, v4f32:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSELbb: AForm_1<4, 23, (outs qbrc:$FRT),
+ (ins qbrc:$FRA, qbrc:$FRB, qbrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4i1:$FRT, (vselect v4i1:$FRA,
+ v4i1:$FRC, v4i1:$FRB))]>;
+
+ // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+ // instruction selection into a branch sequence.
+ let usesCustomInserter = 1 in {
+ def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QFRC",
+ []>;
+ def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QSRC",
+ []>;
+ def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QBRC",
+ []>;
+
+ // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+ // register bit directly.
+ def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
+ qfrc:$T, qfrc:$F), "#SELECT_QFRC",
+ [(set v4f64:$dst,
+ (select i1:$cond, v4f64:$T, v4f64:$F))]>;
+ def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
+ qsrc:$T, qsrc:$F), "#SELECT_QSRC",
+ [(set v4f32:$dst,
+ (select i1:$cond, v4f32:$T, v4f32:$F))]>;
+ def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
+ qbrc:$T, qbrc:$F), "#SELECT_QBRC",
+ [(set v4i1:$dst,
+ (select i1:$cond, v4i1:$T, v4i1:$F))]>;
+ }
+
+ // Convert and Round Instructions
+ def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
+ let isCodeGenOnly = 1 in
+ def QVFCTIDb : XForm_19<4, 814, (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfctid $FRT, $FRB", IIC_FPGeneral, []>;
+
+ def QVFCTIDU : QPXX19_Int<4, 942, "qvfctidu", int_ppc_qpx_qvfctidu>;
+ def QVFCTIDZ : QPXX19_Int<4, 815, "qvfctidz", int_ppc_qpx_qvfctidz>;
+ def QVFCTIDUZ : QPXX19_Int<4, 943, "qvfctiduz", int_ppc_qpx_qvfctiduz>;
+ def QVFCTIW : QPXX19_Int<4, 14, "qvfctiw", int_ppc_qpx_qvfctiw>;
+ def QVFCTIWU : QPXX19_Int<4, 142, "qvfctiwu", int_ppc_qpx_qvfctiwu>;
+ def QVFCTIWZ : QPXX19_Int<4, 15, "qvfctiwz", int_ppc_qpx_qvfctiwz>;
+ def QVFCTIWUZ : QPXX19_Int<4, 143, "qvfctiwuz", int_ppc_qpx_qvfctiwuz>;
+ def QVFCFID : QPXX19_Int<4, 846, "qvfcfid", int_ppc_qpx_qvfcfid>;
+ let isCodeGenOnly = 1 in
+ def QVFCFIDb : XForm_19<4, 846, (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfcfid $FRT, $FRB", IIC_FPGeneral, []>;
+
+ def QVFCFIDU : QPXX19_Int<4, 974, "qvfcfidu", int_ppc_qpx_qvfcfidu>;
+ def QVFCFIDS : QPXX19_Int<0, 846, "qvfcfids", int_ppc_qpx_qvfcfids>;
+ def QVFCFIDUS : QPXX19_Int<0, 974, "qvfcfidus", int_ppc_qpx_qvfcfidus>;
+
+ let isCodeGenOnly = 1 in
+ def QVFRSP : QPXX19_Int<4, 12, "qvfrsp", int_ppc_qpx_qvfrsp>;
+ def QVFRSPs : XForm_19<4, 12,
+ (outs qsrc:$FRT), (ins qfrc:$FRB),
+ "qvfrsp $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fround_inexact v4f64:$FRB))]>;
+
+ def QVFRIZ : XForm_19<4, 424, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfriz $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (ftrunc v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIZs : XForm_19<4, 424, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfriz $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (ftrunc v4f32:$FRB))]>;
+
+ def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrin $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (frnd v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrin $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (frnd v4f32:$FRB))]>;
+
+ def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrip $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fceil v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIPs : XForm_19<4, 456, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrip $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fceil v4f32:$FRB))]>;
+
+ def QVFRIM : XForm_19<4, 488, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrim $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (ffloor v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIMs : XForm_19<4, 488, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrim $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (ffloor v4f32:$FRB))]>;
+
+ // Move Instructions
+ def QVFMR : XForm_19<4, 72,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4f64:$FRT, v4f64:$FRB) */]>;
+ let isCodeGenOnly = 1 in {
+ def QVFMRs : XForm_19<4, 72,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4f32:$FRT, v4f32:$FRB) */]>;
+ def QVFMRb : XForm_19<4, 72,
+ (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4i1:$FRT, v4i1:$FRB) */]>;
+ }
+ def QVFNEG : XForm_19<4, 40,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfneg $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fneg v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNEGs : XForm_19<4, 40,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfneg $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fneg v4f32:$FRB))]>;
+ def QVFABS : XForm_19<4, 264,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fabs v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFABSs : XForm_19<4, 264,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fabs v4f32:$FRB))]>;
+ def QVFNABS : XForm_19<4, 136,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfnabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fneg (fabs v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNABSs : XForm_19<4, 136,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfnabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fneg (fabs v4f32:$FRB)))]>;
+ def QVFCPSGN : XForm_18<4, 8,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fcopysign v4f64:$FRB, v4f64:$FRA))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCPSGNs : XForm_18<4, 8,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fcopysign v4f32:$FRB, v4f32:$FRA))]>;
+
+ def QVALIGNI : Z23Form_1<4, 5,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvaligni v4f64:$FRA, v4f64:$FRB,
+ (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVALIGNIs : Z23Form_1<4, 5,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvaligni v4f32:$FRA, v4f32:$FRB,
+ (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVALIGNIb : Z23Form_1<4, 5,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4i1:$FRT,
+ (PPCqvaligni v4i1:$FRA, v4i1:$FRB,
+ (i32 imm:$idx)))]>;
+
+ def QVESPLATI : Z23Form_2<4, 37,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvesplati v4f64:$FRA, (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVESPLATIs : Z23Form_2<4, 37,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvesplati v4f32:$FRA, (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVESPLATIb : Z23Form_2<4, 37,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4i1:$FRT,
+ (PPCqvesplati v4i1:$FRA, (i32 imm:$idx)))]>;
+
+ def QVFPERM : AForm_1<4, 6,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvfperm v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+ let isCodeGenOnly = 1 in
+ def QVFPERMs : AForm_1<4, 6,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qfrc:$FRC),
+ "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvfperm v4f32:$FRA, v4f32:$FRB, v4f64:$FRC))]>;
+
+ let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+ def QVGPCI : Z23Form_3<4, 133,
+ (outs qfrc:$FRT), (ins u12imm:$idx),
+ "qvgpci $FRT, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT, (PPCqvgpci (u12:$idx)))]>;
+
+ // Compare Instruction
+ let isCodeGenOnly = 1 in
+ def QVFTSTNAN : QPXX18_Int<4, 64, "qvftstnan", int_ppc_qpx_qvftstnan>;
+ def QVFTSTNANb : XForm_18<4, 64, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETUO))]>;
+ let isCodeGenOnly = 1 in
+ def QVFTSTNANbs : XForm_18<4, 64, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETUO))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPLT : QPXX18_Int<4, 96, "qvfcmplt", int_ppc_qpx_qvfcmplt>;
+ def QVFCMPLTb : XForm_18<4, 96, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOLT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPLTbs : XForm_18<4, 96, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOLT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPGT : QPXX18_Int<4, 32, "qvfcmpgt", int_ppc_qpx_qvfcmpgt>;
+ def QVFCMPGTb : XForm_18<4, 32, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOGT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPGTbs : XForm_18<4, 32, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOGT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPEQ : QPXX18_Int<4, 0, "qvfcmpeq", int_ppc_qpx_qvfcmpeq>;
+ def QVFCMPEQb : XForm_18<4, 0, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOEQ))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPEQbs : XForm_18<4, 0, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOEQ))]>;
+
+ let isCodeGenOnly = 1 in
+ def QVFLOGICAL : XForm_20<4, 4,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+ def QVFLOGICALb : XForm_20<4, 4,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+ let isCodeGenOnly = 1 in
+ def QVFLOGICALs : XForm_20<4, 4,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+
+ // Load indexed instructions
+ let mayLoad = 1 in {
+ def QVLFDX : XForm_1<31, 583,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfdx $FRT, $src", IIC_LdStLFD,
+ [(set v4f64:$FRT, (load xoaddr:$src))]>;
+ let isCodeGenOnly = 1 in
+ def QVLFDXb : XForm_1<31, 583,
+ (outs qbrc:$FRT), (ins memrr:$src),
+ "qvlfdx $FRT, $src", IIC_LdStLFD, []>;
+
+ let RC = 1 in
+ def QVLFDXA : XForm_1<31, 583,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfdxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFDUX : XForm_1<31, 615,
+ (outs qfrc:$FRT, ptr_rc_nor0:$ea_result),
+ (ins memrr:$src),
+ "qvlfdux $FRT, $src", IIC_LdStLFDU, []>,
+ RegConstraint<"$src.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+ let RC = 1 in
+ def QVLFDUXA : XForm_1<31, 615,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfduxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFSX : XForm_1<31, 519,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4f64:$FRT, (extloadv4f32 xoaddr:$src))]>;
+
+ let isCodeGenOnly = 1 in
+ def QVLFSXb : XForm_1<31, 519,
+ (outs qbrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4i1:$FRT, (PPCqvlfsb xoaddr:$src))]>;
+ let isCodeGenOnly = 1 in
+ def QVLFSXs : XForm_1<31, 519,
+ (outs qsrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4f32:$FRT, (load xoaddr:$src))]>;
+
+ let RC = 1 in
+ def QVLFSXA : XForm_1<31, 519,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFSUX : XForm_1<31, 551,
+ (outs qsrc:$FRT, ptr_rc_nor0:$ea_result),
+ (ins memrr:$src),
+ "qvlfsux $FRT, $src", IIC_LdStLFDU, []>,
+ RegConstraint<"$src.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+ let RC = 1 in
+ def QVLFSUXA : XForm_1<31, 551,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCDX : XForm_1<31, 71,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdx $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCDXA : XForm_1<31, 71,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCDUX : XForm_1<31, 103,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdux $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCDUXA : XForm_1<31, 103,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcduxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCSX : XForm_1<31, 7,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVLFCSXs : XForm_1<31, 7,
+ (outs qsrc:$FRT), (ins memrr:$src),
+ "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+
+ let RC = 1 in
+ def QVLFCSXA : XForm_1<31, 7,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCSUX : XForm_1<31, 39,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsux $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCSUXA : XForm_1<31, 39,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFIWAX : XForm_1<31, 871,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwax $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFIWAXA : XForm_1<31, 871,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwaxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFIWZX : XForm_1<31, 839,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwzx $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFIWZXA : XForm_1<31, 839,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwzxa $FRT, $src", IIC_LdStLFD, []>;
+ }
+
+
+ def QVLPCLDX : XForm_1<31, 582,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcldx $FRT, $src", IIC_LdStLFD, []>;
+ def QVLPCLSX : XForm_1<31, 518,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpclsx $FRT, $src", IIC_LdStLFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVLPCLSXint : XForm_11<31, 518,
+ (outs qfrc:$FRT), (ins G8RC:$src),
+ "qvlpclsx $FRT, 0, $src", IIC_LdStLFD, []>;
+ def QVLPCRDX : XForm_1<31, 70,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcrdx $FRT, $src", IIC_LdStLFD, []>;
+ def QVLPCRSX : XForm_1<31, 6,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcrsx $FRT, $src", IIC_LdStLFD, []>;
+
+ // Store indexed instructions
+ let mayStore = 1 in {
+ def QVSTFDX : XForm_8<31, 711,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdx $FRT, $dst", IIC_LdStSTFD,
+ [(store qfrc:$FRT, xoaddr:$dst)]>;
+ let isCodeGenOnly = 1 in
+ def QVSTFDXb : XForm_8<31, 711,
+ (outs), (ins qbrc:$FRT, memrr:$dst),
+ "qvstfdx $FRT, $dst", IIC_LdStSTFD, []>;
+
+ let RC = 1 in
+ def QVSTFDXA : XForm_8<31, 711,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDUX : XForm_8<31, 743, (outs ptr_rc_nor0:$ea_res),
+ (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+
+ let RC = 1 in
+ def QVSTFDUXA : XForm_8<31, 743,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDXI : XForm_8<31, 709,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFDXIA : XForm_8<31, 709,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDUXI : XForm_8<31, 741,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFDUXIA : XForm_8<31, 741,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSX : XForm_8<31, 647,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+ [(truncstorev4f32 qfrc:$FRT, xoaddr:$dst)]>;
+ let isCodeGenOnly = 1 in
+ def QVSTFSXs : XForm_8<31, 647,
+ (outs), (ins qsrc:$FRT, memrr:$dst),
+ "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+ [(store qsrc:$FRT, xoaddr:$dst)]>;
+
+ let RC = 1 in
+ def QVSTFSXA : XForm_8<31, 647,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSUX : XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+ (ins qsrc:$FRT, memrr:$dst),
+ "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+ let isCodeGenOnly = 1 in
+ def QVSTFSUXs: XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+ (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+
+ let RC = 1 in
+ def QVSTFSUXA : XForm_8<31, 679,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSXI : XForm_8<31, 645,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFSXIA : XForm_8<31, 645,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSUXI : XForm_8<31, 677,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFSUXIA : XForm_8<31, 677,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDX : XForm_8<31, 199,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdx $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDXA : XForm_8<31, 199,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSX : XForm_8<31, 135,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVSTFCSXs : XForm_8<31, 135,
+ (outs), (ins qsrc:$FRT, memrr:$dst),
+ "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+
+ let RC = 1 in
+ def QVSTFCSXA : XForm_8<31, 135,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDUX : XForm_8<31, 231,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdux $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDUXA : XForm_8<31, 231,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSUX : XForm_8<31, 167,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsux $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSUXA : XForm_8<31, 167,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDXI : XForm_8<31, 197,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDXIA : XForm_8<31, 197,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSXI : XForm_8<31, 133,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSXIA : XForm_8<31, 133,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDUXI : XForm_8<31, 229,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDUXIA : XForm_8<31, 229,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSUXI : XForm_8<31, 165,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSUXIA : XForm_8<31, 165,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFIWX : XForm_8<31, 967,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfiwx $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFIWXA : XForm_8<31, 967,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfiwxa $FRT, $dst", IIC_LdStSTFD, []>;
+ }
+}
+
+} // neverHasSideEffects
+}
+
+def : InstAlias<"qvfclr $FRT",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 0)>;
+def : InstAlias<"qvfand $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 1)>;
+def : InstAlias<"qvfandc $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 4)>;
+def : InstAlias<"qvfctfb $FRT, $FRA",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 5)>;
+def : InstAlias<"qvfxor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 6)>;
+def : InstAlias<"qvfor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 7)>;
+def : InstAlias<"qvfnor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 8)>;
+def : InstAlias<"qvfequ $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 9)>;
+def : InstAlias<"qvfnot $FRT, $FRA",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 10)>;
+def : InstAlias<"qvforc $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 13)>;
+def : InstAlias<"qvfnand $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 14)>;
+def : InstAlias<"qvfset $FRT",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 15)>;
+
+//===----------------------------------------------------------------------===//
+// Additional QPX Patterns
+//
+
+def : Pat<(v4f64 (scalar_to_vector f64:$A)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), $A, sub_64)>;
+def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>;
+
+def : Pat<(f64 (extractelt v4f64:$S, 0)),
+ (EXTRACT_SUBREG $S, sub_64)>;
+def : Pat<(f32 (extractelt v4f32:$S, 0)),
+ (EXTRACT_SUBREG $S, sub_64)>;
+
+def : Pat<(f64 (extractelt v4f64:$S, 1)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>;
+def : Pat<(f64 (extractelt v4f64:$S, 2)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>;
+def : Pat<(f64 (extractelt v4f64:$S, 3)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>;
+
+def : Pat<(f32 (extractelt v4f32:$S, 1)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>;
+def : Pat<(f32 (extractelt v4f32:$S, 2)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>;
+def : Pat<(f32 (extractelt v4f32:$S, 3)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>;
+
+def : Pat<(f64 (extractelt v4f64:$S, i64:$F)),
+ (EXTRACT_SUBREG (QVFPERM $S, $S,
+ (QVLPCLSXint (RLDICR $F, 2,
+ /* 63-2 = */ 61))),
+ sub_64)>;
+def : Pat<(f32 (extractelt v4f32:$S, i64:$F)),
+ (EXTRACT_SUBREG (QVFPERMs $S, $S,
+ (QVLPCLSXint (RLDICR $F, 2,
+ /* 63-2 = */ 61))),
+ sub_64)>;
+
+def : Pat<(int_ppc_qpx_qvfperm v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFPERM $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvfcpsgn v4f64:$A, v4f64:$B),
+ (QVFCPSGN $A, $B)>;
+
+// FCOPYSIGN's operand types need not agree.
+def : Pat<(fcopysign v4f64:$frB, v4f32:$frA),
+ (QVFCPSGN (COPY_TO_REGCLASS $frA, QFRC), $frB)>;
+def : Pat<(fcopysign QSRC:$frB, QFRC:$frA),
+ (QVFCPSGNs (COPY_TO_REGCLASS $frA, QSRC), $frB)>;
+
+def : Pat<(int_ppc_qpx_qvfneg v4f64:$A), (QVFNEG $A)>;
+def : Pat<(int_ppc_qpx_qvfabs v4f64:$A), (QVFABS $A)>;
+def : Pat<(int_ppc_qpx_qvfnabs v4f64:$A), (QVFNABS $A)>;
+
+def : Pat<(int_ppc_qpx_qvfriz v4f64:$A), (QVFRIZ $A)>;
+def : Pat<(int_ppc_qpx_qvfrin v4f64:$A), (QVFRIN $A)>;
+def : Pat<(int_ppc_qpx_qvfrip v4f64:$A), (QVFRIP $A)>;
+def : Pat<(int_ppc_qpx_qvfrim v4f64:$A), (QVFRIM $A)>;
+
+def : Pat<(int_ppc_qpx_qvfre v4f64:$A), (QVFRE $A)>;
+def : Pat<(int_ppc_qpx_qvfrsqrte v4f64:$A), (QVFRSQRTE $A)>;
+
+def : Pat<(int_ppc_qpx_qvfadd v4f64:$A, v4f64:$B),
+ (QVFADD $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfsub v4f64:$A, v4f64:$B),
+ (QVFSUB $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B),
+ (QVFMUL $A, $B)>;
+
+// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B),
+ (QVFNMSUB $A, $B, $C)>;
+def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B),
+ (QVFNMSUB $A, $B, $C)>;
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+ (QVFNMSUBSs $A, $B, $C)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+ (QVFNMSUBSs $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmadd v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFNMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfmsub v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFMSUB $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmsub v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFNMSUB $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvlfd xoaddr:$src),
+ (QVLFDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+ (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfs xoaddr:$src),
+ (QVLFSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+ (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcda xoaddr:$src),
+ (QVLFCDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcd xoaddr:$src),
+ (QVLFCDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcsa xoaddr:$src),
+ (QVLFCSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcs xoaddr:$src),
+ (QVLFCSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+ (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwaa xoaddr:$src),
+ (QVLFIWAXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwa xoaddr:$src),
+ (QVLFIWAX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwza xoaddr:$src),
+ (QVLFIWZXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwz xoaddr:$src),
+ (QVLFIWZX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+ (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcld xoaddr:$src),
+ (QVLPCLDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcls xoaddr:$src),
+ (QVLPCLSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrd xoaddr:$src),
+ (QVLPCRDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrs xoaddr:$src),
+ (QVLPCRSX xoaddr:$src)>;
+
+def : Pat<(int_ppc_qpx_qvstfd v4f64:$T, xoaddr:$dst),
+ (QVSTFDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfs v4f64:$T, xoaddr:$dst),
+ (QVSTFSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcda v4f64:$T, xoaddr:$dst),
+ (QVSTFCDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcd v4f64:$T, xoaddr:$dst),
+ (QVSTFCDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcsa v4f64:$T, xoaddr:$dst),
+ (QVSTFCSXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcs v4f64:$T, xoaddr:$dst),
+ (QVSTFCSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfda v4f64:$T, xoaddr:$dst),
+ (QVSTFDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiwa v4f64:$T, xoaddr:$dst),
+ (QVSTFIWXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiw v4f64:$T, xoaddr:$dst),
+ (QVSTFIWX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfsa v4f64:$T, xoaddr:$dst),
+ (QVSTFSXA $T, xoaddr:$dst)>;
+
+def : Pat<(pre_store v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFDUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store v4f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncstv4f32 v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFSUXs $rS, $ptrreg, $ptroff)>;
+
+def : Pat<(int_ppc_qpx_qvflogical v4f64:$A, v4f64:$B, (i32 imm:$idx)),
+ (QVFLOGICAL $A, $B, imm:$idx)>;
+def : Pat<(int_ppc_qpx_qvgpci (u12:$idx)),
+ (QVGPCI imm:$idx)>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOGE),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOLE),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETONE),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETO),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUEQ),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGT),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPLTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULT),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPGTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUNE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPEQb $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETEQ),
+ (QVFCMPEQb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGT),
+ (QVFCMPGTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGE),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFCMPLTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLT),
+ (QVFCMPLTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLE),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFCMPGTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETNE),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFCMPEQb $FRA, $FRB), (i32 10))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOGE),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOLE),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETONE),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETO),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUEQ),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGT),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPLTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULT),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPGTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUNE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPEQbs $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETEQ),
+ (QVFCMPEQbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGT),
+ (QVFCMPGTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGE),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFCMPLTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLT),
+ (QVFCMPLTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLE),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFCMPGTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETNE),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFCMPEQbs $FRA, $FRB), (i32 10))>;
+
+def : Pat<(and v4i1:$FRA, (not v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 4))>;
+def : Pat<(not (or v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 8))>;
+def : Pat<(not (xor v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 9))>;
+def : Pat<(or v4i1:$FRA, (not v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 13))>;
+def : Pat<(not (and v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 14))>;
+
+def : Pat<(and v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 1))>;
+def : Pat<(or v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 7))>;
+def : Pat<(xor v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 6))>;
+def : Pat<(not v4i1:$FRA),
+ (QVFLOGICALb $FRA, $FRA, (i32 10))>;
+
+def : Pat<(v4f64 (fextend v4f32:$src)),
+ (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f32 (fround_exact v4f64:$src)),
+ (COPY_TO_REGCLASS $src, QSRC)>;
+
+// Extract the underlying floating-point values from the
+// QPX (-1.0, 1.0) boolean representation.
+def : Pat<(v4f64 (PPCqbflt v4i1:$src)),
+ (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)),
+ (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULT)),
+ (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)),
+ (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULE)),
+ (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)),
+ (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)),
+ (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGE)),
+ (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)),
+ (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGT)),
+ (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)),
+ (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)),
+ (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULT)),
+ (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)),
+ (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULE)),
+ (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)),
+ (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)),
+ (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGE)),
+ (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)),
+ (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGT)),
+ (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)),
+ (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)),
+ (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULT)),
+ (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)),
+ (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULE)),
+ (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)),
+ (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)),
+ (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGE)),
+ (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)),
+ (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGT)),
+ (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)),
+ (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+} // end HasQPX
+
+let Predicates = [HasQPX, NoNaNsFPMath] in {
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFCMPLTb $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFCMPGTb $FRA, $FRB), $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFCMPLTbs $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFCMPGTbs $FRA, $FRB), $FRB, $FRA)>;
+}
+
+let Predicates = [HasQPX, NaNsFPMath] in {
+// When either of these operands is NaN, we should return the other operand.
+// QVFCMPLT/QVFCMPGT return false is either operand is NaN, which means we need
+// to explicitly or with a NaN test on the second operand.
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
new file mode 100644
index 0000000..cc3a4d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -0,0 +1,447 @@
+//=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Signal Processing Engine extension to
+// the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+class EVXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = [];
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-31} = xo;
+}
+
+class EVXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : EVXForm_1<xo, OOL, IOL, asmstr, itin> {
+ let RB = 0;
+}
+
+class EVXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<3> crD;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = [];
+
+ let Inst{6-8} = crD;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-31} = xo;
+}
+
+class EVXForm_D<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<21> D;
+
+ let Pattern = [];
+
+ let Inst{6-10} = RT;
+ let Inst{20} = D{0};
+ let Inst{19} = D{1};
+ let Inst{18} = D{2};
+ let Inst{17} = D{3};
+ let Inst{16} = D{4};
+ let Inst{15} = D{5};
+ let Inst{14} = D{6};
+ let Inst{13} = D{7};
+ let Inst{12} = D{8};
+ let Inst{11} = D{9};
+ let Inst{11-20} = D{0-9};
+ let Inst{21-31} = xo;
+}
+
+let Predicates = [HasSPE], isAsmParserOnly = 1 in {
+
+def EVLDD : EVXForm_D<769, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldd $RT, $dst", IIC_VecFP>;
+def EVLDW : EVXForm_D<771, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldw $RT, $dst", IIC_VecFP>;
+def EVLDH : EVXForm_D<773, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldh $RT, $dst", IIC_VecFP>;
+def EVLHHESPLAT : EVXForm_D<777, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhesplat $RT, $dst", IIC_VecFP>;
+def EVLHHOUSPLAT : EVXForm_D<781, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhousplat $RT, $dst", IIC_VecFP>;
+def EVLHHOSSPLAT : EVXForm_D<783, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhossplat $RT, $dst", IIC_VecFP>;
+def EVLWHE : EVXForm_D<785, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhe $RT, $dst", IIC_VecFP>;
+def EVLWHOU : EVXForm_D<789, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhou $RT, $dst", IIC_VecFP>;
+def EVLWHOS : EVXForm_D<791, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhos $RT, $dst", IIC_VecFP>;
+def EVLWWSPLAT : EVXForm_D<793, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwwsplat $RT, $dst", IIC_VecFP>;
+def EVLWHSPLAT : EVXForm_D<797, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhsplat $RT, $dst", IIC_VecFP>;
+
+def EVSTDD : EVXForm_D<801, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdd $RT, $dst", IIC_VecFP>;
+def EVSTDH : EVXForm_D<805, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdh $RT, $dst", IIC_VecFP>;
+def EVSTDW : EVXForm_D<803, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdw $RT, $dst", IIC_VecFP>;
+def EVSTWHE : EVXForm_D<817, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwhe $RT, $dst", IIC_VecFP>;
+def EVSTWHO : EVXForm_D<821, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwho $RT, $dst", IIC_VecFP>;
+def EVSTWWE : EVXForm_D<825, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwwe $RT, $dst", IIC_VecFP>;
+def EVSTWWO : EVXForm_D<829, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwwo $RT, $dst", IIC_VecFP>;
+
+def EVMRA : EVXForm_1<1220, (outs gprc:$RT), (ins gprc:$RA),
+ "evmra $RT, $RA", IIC_VecFP> {
+ let RB = 0;
+}
+
+def BRINC : EVXForm_1<527, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "brinc $RT, $RA, $RB", IIC_VecFP>;
+def EVABS : EVXForm_2<520, (outs gprc:$RT), (ins gprc:$RA),
+ "evabs $RT, $RA", IIC_VecFP>;
+
+def EVADDIW : EVXForm_1<514, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evaddiw $RT, $RB, $RA", IIC_VecFP>;
+def EVADDSMIAAW : EVXForm_2<1225, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddsmiaaw $RT, $RA", IIC_VecFP>;
+def EVADDSSIAAW : EVXForm_2<1217, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddssiaaw $RT, $RA", IIC_VecFP>;
+def EVADDUSIAAW : EVXForm_2<1216, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddusiaaw $RT, $RA", IIC_VecFP>;
+def EVADDUMIAAW : EVXForm_2<1224, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddumiaaw $RT, $RA", IIC_VecFP>;
+def EVADDW : EVXForm_1<512, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evaddw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVAND : EVXForm_1<529, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evand $RT, $RA, $RB", IIC_VecFP>;
+def EVANDC : EVXForm_1<530, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evandc $RT, $RA, $RB", IIC_VecFP>;
+
+def EVCMPEQ : EVXForm_3<564, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpeq $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPGTS : EVXForm_3<561, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpgts $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPGTU : EVXForm_3<560, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpgtu $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPLTS : EVXForm_3<563, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmplts $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPLTU : EVXForm_3<562, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpltu $crD, $RA, $RB", IIC_VecFP>;
+
+def EVCNTLSW : EVXForm_2<526, (outs gprc:$RT), (ins gprc:$RA),
+ "evcntlsw $RT, $RA", IIC_VecFP>;
+def EVCNTLZW : EVXForm_2<525, (outs gprc:$RT), (ins gprc:$RA),
+ "evcntlzw $RT, $RA", IIC_VecFP>;
+
+def EVDIVWS : EVXForm_1<1222, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evdivws $RT, $RA, $RB", IIC_VecFP>;
+def EVDIVWU : EVXForm_1<1223, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evdivwu $RT, $RA, $RB", IIC_VecFP>;
+
+def EVEQV : EVXForm_1<537, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "eveqv $RT, $RA, $RB", IIC_VecFP>;
+
+def EVEXTSB : EVXForm_2<522, (outs gprc:$RT), (ins gprc:$RA),
+ "evextsb $RT, $RA", IIC_VecFP>;
+def EVEXTSH : EVXForm_2<523, (outs gprc:$RT), (ins gprc:$RA),
+ "evextsh $RT, $RA", IIC_VecFP>;
+
+def EVLDDX : EVXForm_1<768, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlddx $RT, $RA, $RB", IIC_VecFP>;
+def EVLDWX : EVXForm_1<770, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evldwx $RT, $RA, $RB", IIC_VecFP>;
+def EVLDHX : EVXForm_1<772, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evldhx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHESPLATX : EVXForm_1<776, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhesplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHOUSPLATX : EVXForm_1<780, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhousplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHOSSPLATX : EVXForm_1<782, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhossplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHEX : EVXForm_1<784, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhex $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHOUX : EVXForm_1<788, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhoux $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHOSX : EVXForm_1<790, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhosx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWWSPLATX : EVXForm_1<792, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwwsplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHSPLATX : EVXForm_1<796, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhsplatx $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMERGEHI : EVXForm_1<556, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergehi $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGELO : EVXForm_1<557, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergelo $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGEHILO : EVXForm_1<558, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergehilo $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGELOHI : EVXForm_1<559, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergelohi $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMHEGSMFAA : EVXForm_1<1323, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMFAN : EVXForm_1<1451, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMIAA : EVXForm_1<1321, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMIAN : EVXForm_1<1449, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGUMIAA : EVXForm_1<1320, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGUMIAN : EVXForm_1<1448, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegumian $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMHESMF : EVXForm_1<1035, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFA : EVXForm_1<1067, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFAAW : EVXForm_1<1291, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFANW : EVXForm_1<1419, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMI : EVXForm_1<1033, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIA : EVXForm_1<1065, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIAAW : EVXForm_1<1289, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIANW : EVXForm_1<1417, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSF : EVXForm_1<1027, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFA : EVXForm_1<1059, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFAAW : EVXForm_1<1283, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFANW : EVXForm_1<1411, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSIAAW : EVXForm_1<1281, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSIANW : EVXForm_1<1409, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMI : EVXForm_1<1032, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIA : EVXForm_1<1064, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIAAW : EVXForm_1<1288, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIANW : EVXForm_1<1416, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUSIAAW : EVXForm_1<1280, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheusiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUSIANW : EVXForm_1<1408, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheusianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMFAA : EVXForm_1<1327, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMFAN : EVXForm_1<1455, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMIAA : EVXForm_1<1325, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMIAN : EVXForm_1<1453, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGUMIAA : EVXForm_1<1324, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGUMIAN : EVXForm_1<1452, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogumian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMF : EVXForm_1<1039, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFA : EVXForm_1<1071, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFAAW : EVXForm_1<1295, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFANW : EVXForm_1<1423, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMI : EVXForm_1<1037, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIA : EVXForm_1<1069, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIAAW : EVXForm_1<1293, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIANW : EVXForm_1<1421, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSF : EVXForm_1<1031, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFA : EVXForm_1<1063, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFAAW : EVXForm_1<1287, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFANW : EVXForm_1<1415, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSIAAW : EVXForm_1<1285, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSIANW : EVXForm_1<1413, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMI : EVXForm_1<1036, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIA : EVXForm_1<1068, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIAAW : EVXForm_1<1292, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIANW : EVXForm_1<1420, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUSIAAW : EVXForm_1<1284, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhousiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUSIANW : EVXForm_1<1412, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhousianw $RT, $RA, $RB", IIC_VecFP>;
+
+
+def EVMWHSMF : EVXForm_1<1103, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMFA : EVXForm_1<1135, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMI : EVXForm_1<1101, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMIA : EVXForm_1<1133, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSSF : EVXForm_1<1095, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhssf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSSFA : EVXForm_1<1127, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhssfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHUMI : EVXForm_1<1100, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHUMIA : EVXForm_1<1132, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSMIAAW : EVXForm_1<1353, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlsmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSMIANW : EVXForm_1<1481, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlsmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSSIAAW : EVXForm_1<1345, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlssiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSSIANW : EVXForm_1<1473, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlssianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMI : EVXForm_1<1096, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIA : EVXForm_1<1128, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIAAW : EVXForm_1<1352, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIANW : EVXForm_1<1480, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUSIAAW : EVXForm_1<1344, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlusiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUSIANW : EVXForm_1<1472, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlusianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMF : EVXForm_1<1115, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFA : EVXForm_1<1147, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFAA : EVXForm_1<1371, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFAN : EVXForm_1<1499, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMI : EVXForm_1<1113, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIA : EVXForm_1<1145, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIAA : EVXForm_1<1369, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIAN : EVXForm_1<1497, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSF : EVXForm_1<1107, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFA : EVXForm_1<1139, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFAA : EVXForm_1<1363, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFAN : EVXForm_1<1491, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMI : EVXForm_1<1112, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIA : EVXForm_1<1144, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIAA : EVXForm_1<1368, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIAN : EVXForm_1<1496, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumian $RT, $RA, $RB", IIC_VecFP>;
+
+
+def EVNAND : EVXForm_1<542, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evnand $RT, $RA, $RB", IIC_VecFP>;
+
+def EVNEG : EVXForm_2<521, (outs gprc:$RT), (ins gprc:$RA),
+ "evneg $RT, $RA", IIC_VecFP>;
+
+def EVNOR : EVXForm_1<536, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evnor $RT, $RA, $RB", IIC_VecFP>;
+def EVOR : EVXForm_1<535, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evor $RT, $RA, $RB", IIC_VecFP>;
+def EVORC : EVXForm_1<539, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evorc $RT, $RA, $RB", IIC_VecFP>;
+
+def EVRLWI : EVXForm_1<554, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evrlwi $RT, $RA, $RB", IIC_VecFP>;
+def EVRLW : EVXForm_1<552, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evrlw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVRNDW : EVXForm_2<524, (outs gprc:$RT), (ins gprc:$RA),
+ "evrndw $RT, $RA", IIC_VecFP>;
+
+def EVSLWI : EVXForm_1<550, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evslwi $RT, $RA, $RB", IIC_VecFP>;
+def EVSLW : EVXForm_1<548, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evslw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSPLATFI : EVXForm_2<555, (outs gprc:$RT), (ins i32imm:$RA),
+ "evsplatfi $RT, $RA", IIC_VecFP>;
+def EVSPLATI : EVXForm_2<553, (outs gprc:$RT), (ins i32imm:$RA),
+ "evsplati $RT, $RA", IIC_VecFP>;
+
+def EVSRWIS : EVXForm_1<547, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evsrwis $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWIU : EVXForm_1<546, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evsrwiu $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWS : EVXForm_1<545, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsrws $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWU : EVXForm_1<544, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsrwu $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSTDDX : EVXForm_1<800, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstddx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTDHX : EVXForm_1<804, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstdhx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTDWX : EVXForm_1<802, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstdwx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWHEX : EVXForm_1<816, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwhex $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWHOX : EVXForm_1<820, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwhox $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWWEX : EVXForm_1<824, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwwex $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWWOX : EVXForm_1<828, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwwox $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSUBFSSIAAW : EVXForm_2<1219, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfssiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFSMIAAW : EVXForm_2<1227, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfsmiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFUMIAAW : EVXForm_2<1226, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfumiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFUSIAAW : EVXForm_2<1218, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfusiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFW : EVXForm_1<516, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsubfw $RT, $RA, $RB", IIC_VecFP>;
+def EVSUBIFW : EVXForm_1<518, (outs gprc:$RT), (ins u5imm:$RA, gprc:$RB),
+ "evsubifw $RT, $RA, $RB", IIC_VecFP>;
+def EVXOR : EVXForm_1<534, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evxor $RT, $RA, $RB", IIC_VecFP>;
+
+} // HasSPE
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
new file mode 100644
index 0000000..df1142c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -0,0 +1,1785 @@
+//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the VSX extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+// *********************************** NOTE ***********************************
+// ** For POWER8 Little Endian, the VSX swap optimization relies on knowing **
+// ** which VMX and VSX instructions are lane-sensitive and which are not. **
+// ** A lane-sensitive instruction relies, implicitly or explicitly, on **
+// ** whether lanes are numbered from left to right. An instruction like **
+// ** VADDFP is not lane-sensitive, because each lane of the result vector **
+// ** relies only on the corresponding lane of the source vectors. However, **
+// ** an instruction like VMULESB is lane-sensitive, because "even" and **
+// ** "odd" lanes are different for big-endian and little-endian numbering. **
+// ** **
+// ** When adding new VMX and VSX instructions, please consider whether they **
+// ** are lane-sensitive. If so, they must be added to a switch statement **
+// ** in PPCVSXSwapRemoval::gatherVectorInstructions(). **
+// ****************************************************************************
+
+def PPCRegVSRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsrc : RegisterOperand<VSRC> {
+ let ParserMatchClass = PPCRegVSRCAsmOperand;
+}
+
+def PPCRegVSFRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsfrc : RegisterOperand<VSFRC> {
+ let ParserMatchClass = PPCRegVSFRCAsmOperand;
+}
+
+def PPCRegVSSRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vssrc : RegisterOperand<VSSRC> {
+ let ParserMatchClass = PPCRegVSSRCAsmOperand;
+}
+
+// Little-endian-specific nodes.
+def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
+ SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstxvd2x : SDTypeProfile<0, 2, [
+ SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCxxswapd : SDTypeProfile<1, 1, [
+ SDTCisSameAs<0, 1>
+]>;
+
+def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
+ [SDNPHasChain, SDNPMayStore]>;
+def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
+def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
+def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
+def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
+
+multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
+ string asmstr, InstrItinClass itin, Intrinsic Int,
+ ValueType OutTy, ValueType InTy> {
+ let BaseName = asmbase in {
+ def NAME : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ [(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;
+ let Defs = [CR6] in
+ def o : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ [(set InTy:$XT,
+ (InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>,
+ isDOT;
+ }
+}
+
+def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
+def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
+def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
+
+let Predicates = [HasVSX] in {
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+let hasSideEffects = 0 in { // VSX instructions don't have side effects.
+let Uses = [RM] in {
+
+ // Load indexed instructions
+ let mayLoad = 1 in {
+ def LXSDX : XX1Form<31, 588,
+ (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsdx $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (load xoaddr:$src))]>;
+
+ def LXVD2X : XX1Form<31, 844,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvd2x $XT, $src", IIC_LdStLFD,
+ [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>;
+
+ def LXVDSX : XX1Form<31, 332,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvdsx $XT, $src", IIC_LdStLFD, []>;
+
+ def LXVW4X : XX1Form<31, 780,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvw4x $XT, $src", IIC_LdStLFD,
+ [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>;
+ } // mayLoad
+
+ // Store indexed instructions
+ let mayStore = 1 in {
+ def STXSDX : XX1Form<31, 716,
+ (outs), (ins vsfrc:$XT, memrr:$dst),
+ "stxsdx $XT, $dst", IIC_LdStSTFD,
+ [(store f64:$XT, xoaddr:$dst)]>;
+
+ def STXVD2X : XX1Form<31, 972,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvd2x $XT, $dst", IIC_LdStSTFD,
+ [(store v2f64:$XT, xoaddr:$dst)]>;
+
+ def STXVW4X : XX1Form<31, 908,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvw4x $XT, $dst", IIC_LdStSTFD,
+ [(store v4i32:$XT, xoaddr:$dst)]>;
+
+ } // mayStore
+
+ // Add/Mul Instructions
+ let isCommutable = 1 in {
+ def XSADDDP : XX3Form<60, 32,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>;
+ def XSMULDP : XX3Form<60, 48,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>;
+
+ def XVADDDP : XX3Form<60, 96,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>;
+
+ def XVADDSP : XX3Form<60, 64,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvaddsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>;
+
+ def XVMULDP : XX3Form<60, 112,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>;
+
+ def XVMULSP : XX3Form<60, 80,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmulsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>;
+ }
+
+ // Subtract Instructions
+ def XSSUBDP : XX3Form<60, 40,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xssubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>;
+
+ def XVSUBDP : XX3Form<60, 104,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>;
+ def XVSUBSP : XX3Form<60, 72,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>;
+
+ // FMA Instructions
+ let BaseName = "XSMADDADP" in {
+ let isCommutable = 1 in
+ def XSMADDADP : XX3Form<60, 33,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMADDMDP : XX3Form<60, 41,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSMSUBADP" in {
+ let isCommutable = 1 in
+ def XSMSUBADP : XX3Form<60, 49,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMSUBMDP : XX3Form<60, 57,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMADDADP" in {
+ let isCommutable = 1 in
+ def XSNMADDADP : XX3Form<60, 161,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMADDMDP : XX3Form<60, 169,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMSUBADP" in {
+ let isCommutable = 1 in
+ def XSNMSUBADP : XX3Form<60, 177,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMSUBMDP : XX3Form<60, 185,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMADDADP" in {
+ let isCommutable = 1 in
+ def XVMADDADP : XX3Form<60, 97,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMADDMDP : XX3Form<60, 105,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMADDASP" in {
+ let isCommutable = 1 in
+ def XVMADDASP : XX3Form<60, 65,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMADDMSP : XX3Form<60, 73,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMSUBADP" in {
+ let isCommutable = 1 in
+ def XVMSUBADP : XX3Form<60, 113,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMSUBMDP : XX3Form<60, 121,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMSUBASP" in {
+ let isCommutable = 1 in
+ def XVMSUBASP : XX3Form<60, 81,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMSUBMSP : XX3Form<60, 89,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMADDADP" in {
+ let isCommutable = 1 in
+ def XVNMADDADP : XX3Form<60, 225,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMADDMDP : XX3Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMADDASP" in {
+ let isCommutable = 1 in
+ def XVNMADDASP : XX3Form<60, 193,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMADDMSP : XX3Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMSUBADP" in {
+ let isCommutable = 1 in
+ def XVNMSUBADP : XX3Form<60, 241,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMSUBMDP : XX3Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMSUBASP" in {
+ let isCommutable = 1 in
+ def XVNMSUBASP : XX3Form<60, 209,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMSUBMSP : XX3Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ // Division Instructions
+ def XSDIVDP : XX3Form<60, 56,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsdivdp $XT, $XA, $XB", IIC_FPDivD,
+ [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>;
+ def XSSQRTDP : XX2Form<60, 75,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xssqrtdp $XT, $XB", IIC_FPSqrtD,
+ [(set f64:$XT, (fsqrt f64:$XB))]>;
+
+ def XSREDP : XX2Form<60, 90,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsredp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfre f64:$XB))]>;
+ def XSRSQRTEDP : XX2Form<60, 74,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
+
+ def XSTDIVDP : XX3Form_1<60, 61,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XSTSQRTDP : XX2Form_1<60, 106,
+ (outs crrc:$crD), (ins vsfrc:$XB),
+ "xstsqrtdp $crD, $XB", IIC_FPCompare, []>;
+
+ def XVDIVDP : XX3Form<60, 120,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivdp $XT, $XA, $XB", IIC_FPDivD,
+ [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>;
+ def XVDIVSP : XX3Form<60, 88,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivsp $XT, $XA, $XB", IIC_FPDivS,
+ [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>;
+
+ def XVSQRTDP : XX2Form<60, 203,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtdp $XT, $XB", IIC_FPSqrtD,
+ [(set v2f64:$XT, (fsqrt v2f64:$XB))]>;
+ def XVSQRTSP : XX2Form<60, 139,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtsp $XT, $XB", IIC_FPSqrtS,
+ [(set v4f32:$XT, (fsqrt v4f32:$XB))]>;
+
+ def XVTDIVDP : XX3Form_1<60, 125,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XVTDIVSP : XX3Form_1<60, 93,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+ def XVTSQRTDP : XX2Form_1<60, 234,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>;
+ def XVTSQRTSP : XX2Form_1<60, 170,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>;
+
+ def XVREDP : XX2Form<60, 218,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvredp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfre v2f64:$XB))]>;
+ def XVRESP : XX2Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvresp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfre v4f32:$XB))]>;
+
+ def XVRSQRTEDP : XX2Form<60, 202,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>;
+ def XVRSQRTESP : XX2Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtesp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>;
+
+ // Compare Instructions
+ def XSCMPODP : XX3Form_1<60, 43,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpodp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XSCMPUDP : XX3Form_1<60, 35,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+ defm XVCMPEQDP : XX3Form_Rcr<60, 99,
+ "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
+ defm XVCMPEQSP : XX3Form_Rcr<60, 67,
+ "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ int_ppc_vsx_xvcmpeqsp, v4i32, v4f32>;
+ defm XVCMPGEDP : XX3Form_Rcr<60, 115,
+ "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ int_ppc_vsx_xvcmpgedp, v2i64, v2f64>;
+ defm XVCMPGESP : XX3Form_Rcr<60, 83,
+ "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ int_ppc_vsx_xvcmpgesp, v4i32, v4f32>;
+ defm XVCMPGTDP : XX3Form_Rcr<60, 107,
+ "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ int_ppc_vsx_xvcmpgtdp, v2i64, v2f64>;
+ defm XVCMPGTSP : XX3Form_Rcr<60, 75,
+ "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>;
+
+ // Move Instructions
+ def XSABSDP : XX2Form<60, 345,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fabs f64:$XB))]>;
+ def XSNABSDP : XX2Form<60, 361,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsnabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fabs f64:$XB)))]>;
+ def XSNEGDP : XX2Form<60, 377,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsnegdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg f64:$XB))]>;
+ def XSCPSGNDP : XX3Form<60, 176,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>;
+
+ def XVABSDP : XX2Form<60, 473,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fabs v2f64:$XB))]>;
+
+ def XVABSSP : XX2Form<60, 409,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fabs v4f32:$XB))]>;
+
+ def XVCPSGNDP : XX3Form<60, 240,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>;
+ def XVCPSGNSP : XX3Form<60, 208,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>;
+
+ def XVNABSDP : XX2Form<60, 489,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>;
+ def XVNABSSP : XX2Form<60, 425,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>;
+
+ def XVNEGDP : XX2Form<60, 505,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg v2f64:$XB))]>;
+ def XVNEGSP : XX2Form<60, 441,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegsp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg v4f32:$XB))]>;
+
+ // Conversion Instructions
+ def XSCVDPSP : XX2Form<60, 265,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPSXDS : XX2Form<60, 344,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsxds $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctidz f64:$XB))]>;
+ def XSCVDPSXWS : XX2Form<60, 88,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsxws $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiwz f64:$XB))]>;
+ def XSCVDPUXDS : XX2Form<60, 328,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpuxds $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiduz f64:$XB))]>;
+ def XSCVDPUXWS : XX2Form<60, 72,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpuxws $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiwuz f64:$XB))]>;
+ def XSCVSPDP : XX2Form<60, 329,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvspdp $XT, $XB", IIC_VecFP, []>;
+ def XSCVSXDDP : XX2Form<60, 376,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvsxddp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfcfid f64:$XB))]>;
+ def XSCVUXDDP : XX2Form<60, 360,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvuxddp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfcfidu f64:$XB))]>;
+
+ def XVCVDPSP : XX2Form<60, 393,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPSXDS : XX2Form<60, 472,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxds $XT, $XB", IIC_VecFP,
+ [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>;
+ def XVCVDPSXWS : XX2Form<60, 216,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPUXDS : XX2Form<60, 456,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxds $XT, $XB", IIC_VecFP,
+ [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>;
+ def XVCVDPUXWS : XX2Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxws $XT, $XB", IIC_VecFP, []>;
+
+ def XVCVSPDP : XX2Form<60, 457,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXDS : XX2Form<60, 408,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXWS : XX2Form<60, 152,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXDS : XX2Form<60, 392,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXWS : XX2Form<60, 136,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXDDP : XX2Form<60, 504,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxddp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>;
+ def XVCVSXDSP : XX2Form<60, 440,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWDP : XX2Form<60, 248,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWSP : XX2Form<60, 184,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXDDP : XX2Form<60, 488,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxddp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>;
+ def XVCVUXDSP : XX2Form<60, 424,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWDP : XX2Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWSP : XX2Form<60, 168,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwsp $XT, $XB", IIC_VecFP, []>;
+
+ // Rounding Instructions
+ def XSRDPI : XX2Form<60, 73,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpi $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (frnd f64:$XB))]>;
+ def XSRDPIC : XX2Form<60, 107,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpic $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ def XSRDPIM : XX2Form<60, 121,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpim $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ffloor f64:$XB))]>;
+ def XSRDPIP : XX2Form<60, 105,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpip $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fceil f64:$XB))]>;
+ def XSRDPIZ : XX2Form<60, 89,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpiz $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ftrunc f64:$XB))]>;
+
+ def XVRDPI : XX2Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpi $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (frnd v2f64:$XB))]>;
+ def XVRDPIC : XX2Form<60, 235,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpic $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ def XVRDPIM : XX2Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpim $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ffloor v2f64:$XB))]>;
+ def XVRDPIP : XX2Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpip $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fceil v2f64:$XB))]>;
+ def XVRDPIZ : XX2Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpiz $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ftrunc v2f64:$XB))]>;
+
+ def XVRSPI : XX2Form<60, 137,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspi $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (frnd v4f32:$XB))]>;
+ def XVRSPIC : XX2Form<60, 171,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspic $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ def XVRSPIM : XX2Form<60, 185,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspim $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ffloor v4f32:$XB))]>;
+ def XVRSPIP : XX2Form<60, 169,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspip $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fceil v4f32:$XB))]>;
+ def XVRSPIZ : XX2Form<60, 153,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspiz $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ftrunc v4f32:$XB))]>;
+
+ // Max/Min Instructions
+ let isCommutable = 1 in {
+ def XSMAXDP : XX3Form<60, 160,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
+ def XSMINDP : XX3Form<60, 168,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;
+
+ def XVMAXDP : XX3Form<60, 224,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
+ def XVMINDP : XX3Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;
+
+ def XVMAXSP : XX3Form<60, 192,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
+ def XVMINSP : XX3Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvminsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
+ } // isCommutable
+} // Uses = [RM]
+
+ // Logical Instructions
+ let isCommutable = 1 in
+ def XXLAND : XX3Form<60, 130,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxland $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (and v4i32:$XA, v4i32:$XB))]>;
+ def XXLANDC : XX3Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlandc $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (and v4i32:$XA,
+ (vnot_ppc v4i32:$XB)))]>;
+ let isCommutable = 1 in {
+ def XXLNOR : XX3Form<60, 162,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlnor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (vnot_ppc (or v4i32:$XA,
+ v4i32:$XB)))]>;
+ def XXLOR : XX3Form<60, 146,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>;
+ let isCodeGenOnly = 1 in
+ def XXLORf: XX3Form<60, 146,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLXOR : XX3Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlxor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
+ } // isCommutable
+
+ // Permutation Instructions
+ def XXMRGHW : XX3Form<60, 18,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>;
+ def XXMRGLW : XX3Form<60, 50,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>;
+
+ def XXPERMDI : XX3Form_2<60, 10,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
+ "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
+ def XXSEL : XX4Form<60, 3,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
+ "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
+
+ def XXSLDWI : XX3Form_2<60, 2,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),
+ "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>;
+ def XXSPLTW : XX2Form_2<60, 164,
+ (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
+ "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
+} // hasSideEffects
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1, // Expanded after instruction selection.
+ PPC970_Single = 1 in {
+
+ def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst),
+ (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
+ "#SELECT_CC_VSRC",
+ []>;
+ def SELECT_VSRC: Pseudo<(outs vsrc:$dst),
+ (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
+ "#SELECT_VSRC",
+ [(set v2f64:$dst,
+ (select i1:$cond, v2f64:$T, v2f64:$F))]>;
+ def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst),
+ (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSFRC",
+ []>;
+ def SELECT_VSFRC: Pseudo<(outs f8rc:$dst),
+ (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
+ "#SELECT_VSFRC",
+ [(set f64:$dst,
+ (select i1:$cond, f64:$T, f64:$F))]>;
+ def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst),
+ (ins crrc:$cond, f4rc:$T, f4rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSSRC",
+ []>;
+ def SELECT_VSSRC: Pseudo<(outs f4rc:$dst),
+ (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
+ "#SELECT_VSSRC",
+ [(set f32:$dst,
+ (select i1:$cond, f32:$T, f32:$F))]>;
+} // usesCustomInserter
+} // AddedComplexity
+
+def : InstAlias<"xvmovdp $XT, $XB",
+ (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+def : InstAlias<"xvmovsp $XT, $XB",
+ (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+def : InstAlias<"xxmrghd $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
+def : InstAlias<"xxmrgld $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
+
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+
+let Predicates = [IsBigEndian] in {
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
+
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+}
+
+let Predicates = [IsLittleEndian] in {
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
+ (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
+
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+}
+
+// Additional fnmsub patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+
+def : Pat<(v2f64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4f32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2i64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4f32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2f64 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v2i64 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2f64 (bitconvert v1i128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v1i128 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+// sign extension patterns
+// To extend "in place" from v2i32 to v2i64, we have input data like:
+// | undef | i32 | undef | i32 |
+// but xvcvsxwdp expects the input in big-Endian format:
+// | i32 | undef | i32 | undef |
+// so we need to shift everything to the left by one i32 (word) before
+// the conversion.
+def : Pat<(sext_inreg v2i64:$C, v2i32),
+ (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>;
+def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
+ (XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+
+// Loads.
+def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
+// Stores.
+def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+
+// Permutes.
+def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
+
+// Selects.
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
+ (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)),
+ (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
+ (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)),
+ (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
+ (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
+ (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)),
+ (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
+ (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)),
+ (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
+ (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+ (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
+ (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+ (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
+ (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+ (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+ (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
+ (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+ (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
+ (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+ (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+// Divides.
+def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
+ (XVDIVSP $A, $B)>;
+def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
+ (XVDIVDP $A, $B)>;
+
+// Reciprocal estimate
+def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
+ (XVRESP $A)>;
+def : Pat<(int_ppc_vsx_xvredp v2f64:$A),
+ (XVREDP $A)>;
+
+// Recip. square root estimate
+def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
+ (XVRSQRTESP $A)>;
+def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
+ (XVRSQRTEDP $A)>;
+
+} // AddedComplexity
+} // HasVSX
+
+// The following VSX instructions were introduced in Power ISA 2.07
+/* FIXME: if the operands are v2i64, these patterns will not match.
+ we should define new patterns or otherwise match the same patterns
+ when the elements are larger than i32.
+*/
+def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
+def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
+let Predicates = [HasP8Vector] in {
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+ let isCommutable = 1 in {
+ def XXLEQV : XX3Form<60, 186,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxleqv $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>;
+ def XXLNAND : XX3Form<60, 178,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlnand $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
+ v4i32:$XB)))]>;
+ } // isCommutable
+
+ def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
+ (XXLEQV $A, $B)>;
+
+ def XXLORC : XX3Form<60, 170,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
+
+ // VSX scalar loads introduced in ISA 2.07
+ let mayLoad = 1 in {
+ def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
+ "lxsspx $XT, $src", IIC_LdStLFD,
+ [(set f32:$XT, (load xoaddr:$src))]>;
+ def LXSIWAX : XX1Form<31, 76, (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsiwax $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+ def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsiwzx $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
+ } // mayLoad
+
+ // VSX scalar stores introduced in ISA 2.07
+ let mayStore = 1 in {
+ def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
+ "stxsspx $XT, $dst", IIC_LdStSTFD,
+ [(store f32:$XT, xoaddr:$dst)]>;
+ def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
+ "stxsiwx $XT, $dst", IIC_LdStSTFD,
+ [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
+ } // mayStore
+
+ def : Pat<(f64 (extloadf32 xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
+ def : Pat<(f64 (fextend f32:$src)),
+ (COPY_TO_REGCLASS $src, VSFRC)>;
+
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+ (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
+ (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+ (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
+ (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
+ (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+ (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
+ (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+ (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
+ (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+ def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
+ (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+ // VSX Elementary Scalar FP arithmetic (SP)
+ let isCommutable = 1 in {
+ def XSADDSP : XX3Form<60, 0,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xsaddsp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>;
+ def XSMULSP : XX3Form<60, 16,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xsmulsp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>;
+ } // isCommutable
+
+ def XSDIVSP : XX3Form<60, 24,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xsdivsp $XT, $XA, $XB", IIC_FPDivS,
+ [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>;
+ def XSRESP : XX2Form<60, 26,
+ (outs vssrc:$XT), (ins vssrc:$XB),
+ "xsresp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfre f32:$XB))]>;
+ def XSSQRTSP : XX2Form<60, 11,
+ (outs vssrc:$XT), (ins vssrc:$XB),
+ "xssqrtsp $XT, $XB", IIC_FPSqrtS,
+ [(set f32:$XT, (fsqrt f32:$XB))]>;
+ def XSRSQRTESP : XX2Form<60, 10,
+ (outs vssrc:$XT), (ins vssrc:$XB),
+ "xsrsqrtesp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
+ def XSSUBSP : XX3Form<60, 8,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xssubsp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
+
+ // FMA Instructions
+ let BaseName = "XSMADDASP" in {
+ let isCommutable = 1 in
+ def XSMADDASP : XX3Form<60, 1,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMADDMSP : XX3Form<60, 9,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSMSUBASP" in {
+ let isCommutable = 1 in
+ def XSMSUBASP : XX3Form<60, 17,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fma f32:$XA, f32:$XB,
+ (fneg f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMSUBMSP : XX3Form<60, 25,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMADDASP" in {
+ let isCommutable = 1 in
+ def XSNMADDASP : XX3Form<60, 129,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMADDMSP : XX3Form<60, 137,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMSUBASP" in {
+ let isCommutable = 1 in
+ def XSNMSUBASP : XX3Form<60, 145,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ (fneg f32:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMSUBMSP : XX3Form<60, 153,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ // Single Precision Conversions (FP <-> INT)
+ def XSCVSXDSP : XX2Form<60, 312,
+ (outs vssrc:$XT), (ins vsfrc:$XB),
+ "xscvsxdsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfcfids f64:$XB))]>;
+ def XSCVUXDSP : XX2Form<60, 296,
+ (outs vssrc:$XT), (ins vsfrc:$XB),
+ "xscvuxdsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfcfidus f64:$XB))]>;
+
+ // Conversions between vector and scalar single precision
+ def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB),
+ "xscvdpspn $XT, $XB", IIC_VecFP, []>;
+ def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
+ "xscvspdpn $XT, $XB", IIC_VecFP, []>;
+
+} // AddedComplexity = 400
+} // HasP8Vector
+
+let Predicates = [HasDirectMove, HasVSX] in {
+ // VSX direct move instructions
+ def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
+ "mfvsrd $rA, $XT", IIC_VecGeneral,
+ [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
+ Requires<[In64BitMode]>;
+ def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
+ "mfvsrwz $rA, $XT", IIC_VecGeneral,
+ [(set i32:$rA, (PPCmfvsr f64:$XT))]>;
+ def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
+ "mtvsrd $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i64:$rA))]>,
+ Requires<[In64BitMode]>;
+ def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
+ "mtvsrwa $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i32:$rA))]>;
+ def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
+ "mtvsrwz $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
+} // HasDirectMove, HasVSX
+
+/* Direct moves of various widths from GPR's into VSR's. Each move lines
+ the value up into element 0 (both BE and LE). Namely, entities smaller than
+ a doubleword are shifted left and moved for BE. For LE, they're moved, then
+ swapped to go into the least significant element of the VSR.
+*/
+def MovesToVSR {
+ dag BE_BYTE_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7));
+ dag BE_HALF_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15));
+ dag BE_WORD_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31));
+ dag BE_DWORD_0 = (MTVSRD $A);
+
+ dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32));
+ dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ LE_MTVSRW, sub_64));
+ dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2);
+ dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ BE_DWORD_0, sub_64));
+ dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2);
+}
+
+/* Patterns for extracting elements out of vectors. Integer elements are
+ extracted using direct move operations. Patterns for extracting elements
+ whose indices are not available at compile time are also provided with
+ various _VARIABLE_ patterns.
+ The numbering for the DAG's is for LE, but when used on BE, the correct
+ LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13).
+*/
+def VectorExtractions {
+ // Doubleword extraction
+ dag LE_DWORD_0 =
+ (MFVSRD
+ (EXTRACT_SUBREG
+ (XXPERMDI (COPY_TO_REGCLASS $S, VSRC),
+ (COPY_TO_REGCLASS $S, VSRC), 2), sub_64));
+ dag LE_DWORD_1 = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+
+ // Word extraction
+ dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64));
+ dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64));
+ dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+ dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64));
+
+ // Halfword extraction
+ dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32));
+ dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32));
+ dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32));
+ dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32));
+ dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32));
+ dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32));
+ dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32));
+ dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32));
+
+ // Byte extraction
+ dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32));
+ dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32));
+ dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32));
+ dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32));
+ dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32));
+ dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32));
+ dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32));
+ dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32));
+ dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32));
+ dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32));
+ dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32));
+ dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32));
+ dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32));
+ dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32));
+ dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32));
+ dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32));
+
+ /* Variable element number (BE and LE patterns must be specified separately)
+ This is a rather involved process.
+
+ Conceptually, this is how the move is accomplished:
+ 1. Identify which doubleword contains the element
+ 2. Shift in the VMX register so that the correct doubleword is correctly
+ lined up for the MFVSRD
+ 3. Perform the move so that the element (along with some extra stuff)
+ is in the GPR
+ 4. Right shift within the GPR so that the element is right-justified
+
+ Of course, the index is an element number which has a different meaning
+ on LE/BE so the patterns have to be specified separately.
+
+ Note: The final result will be the element right-justified with high
+ order bits being arbitrarily defined (namely, whatever was in the
+ vector register to the left of the value originally).
+ */
+
+ /* LE variable byte
+ Number 1. above:
+ - For elements 0-7, we shift left by 8 bytes since they're on the right
+ - For elements 8-15, we need not shift (shift left by zero bytes)
+ This is accomplished by inverting the bits of the index and AND-ing
+ with 0x8 (i.e. clearing all bits of the index and inverting bit 60).
+ */
+ dag LE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDC8 (LI8 8), $Idx));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VBYTE_PERMUTE = (VPERM $S, $S, LE_VBYTE_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VBYTE = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)),
+ sub_64));
+
+ /* Number 4. above:
+ - Truncate the element number to the range 0-7 (8-15 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 8 as we need to shift right by the number of bits, not bytes
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60),
+ sub_32);
+ dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT),
+ sub_32);
+
+ /* LE variable halfword
+ Number 1. above:
+ - For elements 0-3, we shift left by 8 since they're on the right
+ - For elements 4-7, we need not shift (shift left by zero bytes)
+ Similarly to the byte pattern, we invert the bits of the index, but we
+ AND with 0x4 (i.e. clear all bits of the index and invert bit 61).
+ Of course, the shift is still by 8 bytes, so we must multiply by 2.
+ */
+ dag LE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VHALF_PERMUTE = (VPERM $S, $S, LE_VHALF_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VHALF = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)),
+ sub_64));
+
+ /* Number 4. above:
+ - Truncate the element number to the range 0-3 (4-7 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 16 as we need to shift right by the number of bits
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59),
+ sub_32);
+ dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT),
+ sub_32);
+
+ /* LE variable word
+ Number 1. above:
+ - For elements 0-1, we shift left by 8 since they're on the right
+ - For elements 2-3, we need not shift
+ */
+ dag LE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VWORD_PERMUTE = (VPERM $S, $S, LE_VWORD_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VWORD = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)),
+ sub_64));
+
+ /* Number 4. above:
+ - Truncate the element number to the range 0-1 (2-3 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 32 as we need to shift right by the number of bits
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58),
+ sub_32);
+ dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT),
+ sub_32);
+
+ /* LE variable doubleword
+ Number 1. above:
+ - For element 0, we shift left by 8 since it's on the right
+ - For element 1, we need not shift
+ */
+ dag LE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VDWORD_PERMUTE = (VPERM $S, $S, LE_VDWORD_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ // - Number 4. is not needed for the doubleword as the value is 64-bits
+ dag LE_VARIABLE_DWORD =
+ (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)),
+ sub_64));
+
+ /* LE variable float
+ - Shift the vector to line up the desired element to BE Word 0
+ - Convert 32-bit float to a 64-bit single precision float
+ */
+ dag LE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR (XOR8 (LI8 3), $Idx), 2, 61));
+ dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC);
+ dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE);
+
+ /* LE variable double
+ Same as the LE doubleword except there is no move.
+ */
+ dag LE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC),
+ (COPY_TO_REGCLASS $S, VRRC),
+ LE_VDWORD_PERM_VEC);
+ dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC);
+
+ /* BE variable byte
+ The algorithm here is the same as the LE variable byte except:
+ - The shift in the VMX register is by 0/8 for opposite element numbers so
+ we simply AND the element number with 0x8
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-7
+ */
+ dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8));
+ dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC);
+ dag BE_MV_VBYTE = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60),
+ sub_32);
+ dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT),
+ sub_32);
+
+ /* BE variable halfword
+ The algorithm here is the same as the LE variable halfword except:
+ - The shift in the VMX register is by 0/8 for opposite element numbers so
+ we simply AND the element number with 0x4 and multiply by 2
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-3
+ */
+ dag BE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 4), 1, 62));
+ dag BE_VHALF_PERMUTE = (VPERM $S, $S, BE_VHALF_PERM_VEC);
+ dag BE_MV_VHALF = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59),
+ sub_32);
+ dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT),
+ sub_32);
+
+ /* BE variable word
+ The algorithm is the same as the LE variable word except:
+ - The shift in the VMX register happens for opposite element numbers
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-1
+ */
+ dag BE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 2), 2, 61));
+ dag BE_VWORD_PERMUTE = (VPERM $S, $S, BE_VWORD_PERM_VEC);
+ dag BE_MV_VWORD = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58),
+ sub_32);
+ dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT),
+ sub_32);
+
+ /* BE variable doubleword
+ Same as the LE doubleword except we shift in the VMX register for opposite
+ element indices.
+ */
+ dag BE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 1), 3, 60));
+ dag BE_VDWORD_PERMUTE = (VPERM $S, $S, BE_VDWORD_PERM_VEC);
+ dag BE_VARIABLE_DWORD =
+ (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)),
+ sub_64));
+
+ /* BE variable float
+ - Shift the vector to line up the desired element to BE Word 0
+ - Convert 32-bit float to a 64-bit single precision float
+ */
+ dag BE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR $Idx, 2, 61));
+ dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC);
+ dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE);
+
+ /* BE variable double
+ Same as the BE doubleword except there is no move.
+ */
+ dag BE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC),
+ (COPY_TO_REGCLASS $S, VRRC),
+ BE_VDWORD_PERM_VEC);
+ dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
+}
+
+// v4f32 scalar <-> vector conversions (BE)
+let Predicates = [IsBigEndian, HasP8Vector] in {
+ def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XSCVDPSPN $A))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN $S))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.BE_VARIABLE_FLOAT)>;
+} // IsBigEndian, HasP8Vector
+
+// Variable index vector_extract for v2f64 does not require P8Vector
+let Predicates = [IsBigEndian, HasVSX] in
+ def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>;
+
+let Predicates = [IsBigEndian, HasDirectMove] in {
+ // v16i8 scalar <-> vector conversions (BE)
+ def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
+ def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
+ def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
+ def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_BYTE)>;
+
+ // v8i16 scalar <-> vector conversions (BE)
+ def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_HALF)>;
+
+ // v4i32 scalar <-> vector conversions (BE)
+ def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_WORD)>;
+
+ // v2i64 scalar <-> vector conversions (BE)
+ def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.BE_VARIABLE_DWORD)>;
+} // IsBigEndian, HasDirectMove
+
+// v4f32 scalar <-> vector conversions (LE)
+let Predicates = [IsLittleEndian, HasP8Vector] in {
+ def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN $S))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.LE_VARIABLE_FLOAT)>;
+} // IsLittleEndian, HasP8Vector
+
+// Variable index vector_extract for v2f64 does not require P8Vector
+let Predicates = [IsLittleEndian, HasVSX] in
+ def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
+
+let Predicates = [IsLittleEndian, HasDirectMove] in {
+ // v16i8 scalar <-> vector conversions (LE)
+ def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
+ def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
+ def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 MovesToVSR.LE_WORD_0)>;
+ def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 MovesToVSR.LE_DWORD_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_BYTE)>;
+
+ // v8i16 scalar <-> vector conversions (LE)
+ def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_HALF)>;
+
+ // v4i32 scalar <-> vector conversions (LE)
+ def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_WORD)>;
+
+ // v2i64 scalar <-> vector conversions (LE)
+ def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.LE_VARIABLE_DWORD)>;
+} // IsLittleEndian, HasDirectMove
+
+let Predicates = [HasDirectMove, HasVSX] in {
+// bitconvert f32 -> i32
+// (convert to 32-bit fp single, shift right 1 word, move to GPR)
+def : Pat<(i32 (bitconvert f32:$S)),
+ (i32 (MFVSRWZ (EXTRACT_SUBREG
+ (XXSLDWI (XSCVDPSPN $S),(XSCVDPSPN $S), 3),
+ sub_64)))>;
+// bitconvert i32 -> f32
+// (move to FPR, shift left 1 word, convert to 64-bit fp single)
+def : Pat<(f32 (bitconvert i32:$A)),
+ (f32 (XSCVSPDPN
+ (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>;
+
+// bitconvert f64 -> i64
+// (move to GPR, nothing else needed)
+def : Pat<(i64 (bitconvert f64:$S)),
+ (i64 (MFVSRD $S))>;
+
+// bitconvert i64 -> f64
+// (move to FPR, nothing else needed)
+def : Pat<(f64 (bitconvert i64:$S)),
+ (f64 (MTVSRD $S))>;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
new file mode 100644
index 0000000..e3a35d5
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -0,0 +1,233 @@
+//===-------- PPCLoopDataPrefetch.cpp - Loop Data Prefetching Pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Loop Data Prefetching Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-loop-data-prefetch"
+#include "PPC.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+// By default, we limit this to creating 16 PHIs (which is a little over half
+// of the allocatable register set).
+static cl::opt<bool>
+PrefetchWrites("ppc-loop-prefetch-writes", cl::Hidden, cl::init(false),
+ cl::desc("Prefetch write addresses"));
+
+// This seems like a reasonable default for the BG/Q (this pass is enabled, by
+// default, only on the BG/Q).
+static cl::opt<unsigned>
+PrefDist("ppc-loop-prefetch-distance", cl::Hidden, cl::init(300),
+ cl::desc("The loop prefetch distance"));
+
+static cl::opt<unsigned>
+CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
+ cl::desc("The loop prefetch cache line size"));
+
+namespace llvm {
+ void initializePPCLoopDataPrefetchPass(PassRegistry&);
+}
+
+namespace {
+
+ class PPCLoopDataPrefetch : public FunctionPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ PPCLoopDataPrefetch() : FunctionPass(ID) {
+ initializePPCLoopDataPrefetchPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ // FIXME: For some reason, preserving SE here breaks LSR (even if
+ // this pass changes nothing).
+ // AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+ bool runOnLoop(Loop *L);
+
+ private:
+ AssumptionCache *AC;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ const TargetTransformInfo *TTI;
+ const DataLayout *DL;
+ };
+}
+
+char PPCLoopDataPrefetch::ID = 0;
+INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
+ "PPC Loop Data Prefetch", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
+ "PPC Loop Data Prefetch", false, false)
+
+FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPrefetch(); }
+
+bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ DL = &F.getParent()->getDataLayout();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ bool MadeChange = false;
+
+ for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+ for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+ MadeChange |= runOnLoop(*L);
+
+ return MadeChange;
+}
+
+bool PPCLoopDataPrefetch::runOnLoop(Loop *L) {
+ bool MadeChange = false;
+
+ // Only prefetch in the inner-most loop
+ if (!L->empty())
+ return MadeChange;
+
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AC, EphValues);
+
+ // Calculate the number of iterations ahead to prefetch
+ CodeMetrics Metrics;
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I) {
+
+ // If the loop already has prefetches, then assume that the user knows
+ // what he or she is doing and don't add any more.
+ for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
+ J != JE; ++J)
+ if (CallInst *CI = dyn_cast<CallInst>(J))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::prefetch)
+ return MadeChange;
+
+ Metrics.analyzeBasicBlock(*I, *TTI, EphValues);
+ }
+ unsigned LoopSize = Metrics.NumInsts;
+ if (!LoopSize)
+ LoopSize = 1;
+
+ unsigned ItersAhead = PrefDist/LoopSize;
+ if (!ItersAhead)
+ ItersAhead = 1;
+
+ SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads;
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I) {
+ for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
+ J != JE; ++J) {
+ Value *PtrValue;
+ Instruction *MemI;
+
+ if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
+ MemI = LMemI;
+ PtrValue = LMemI->getPointerOperand();
+ } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
+ if (!PrefetchWrites) continue;
+ MemI = SMemI;
+ PtrValue = SMemI->getPointerOperand();
+ } else continue;
+
+ unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
+ if (PtrAddrSpace)
+ continue;
+
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE->getSCEV(PtrValue);
+ const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LSCEVAddRec)
+ continue;
+
+ // We don't want to double prefetch individual cache lines. If this load
+ // is known to be within one cache line of some other load that has
+ // already been prefetched, then don't prefetch this one as well.
+ bool DupPref = false;
+ for (SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>,
+ 16>::iterator K = PrefLoads.begin(), KE = PrefLoads.end();
+ K != KE; ++K) {
+ const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second);
+ if (const SCEVConstant *ConstPtrDiff =
+ dyn_cast<SCEVConstant>(PtrDiff)) {
+ int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
+ if (PD < (int64_t) CacheLineSize) {
+ DupPref = true;
+ break;
+ }
+ }
+ }
+ if (DupPref)
+ continue;
+
+ const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr(
+ SE->getConstant(LSCEVAddRec->getType(), ItersAhead),
+ LSCEVAddRec->getStepRecurrence(*SE)));
+ if (!isSafeToExpand(NextLSCEV, *SE))
+ continue;
+
+ PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
+
+ Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace);
+ SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr");
+ Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
+
+ IRBuilder<> Builder(MemI);
+ Module *M = (*I)->getParent()->getParent();
+ Type *I32 = Type::getInt32Ty((*I)->getContext());
+ Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch);
+ Builder.CreateCall(
+ PrefetchFunc,
+ {PrefPtrValue,
+ ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1),
+ ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
+
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
new file mode 100644
index 0000000..5e18826
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -0,0 +1,437 @@
+//===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to prepare loops for pre-increment addressing
+// modes. Additional PHIs are created for loop induction variables used by
+// load/store instructions so that the pre-increment forms can be used.
+// Generically, this means transforming loops like this:
+// for (int i = 0; i < n; ++i)
+// array[i] = c;
+// to look like this:
+// T *p = array[-1];
+// for (int i = 0; i < n; ++i)
+// *++p = c;
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-loop-preinc-prep"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+// By default, we limit this to creating 16 PHIs (which is a little over half
+// of the allocatable register set).
+static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
+ cl::Hidden, cl::init(16),
+ cl::desc("Potential PHI threshold for PPC preinc loop prep"));
+
+namespace llvm {
+ void initializePPCLoopPreIncPrepPass(PassRegistry&);
+}
+
+namespace {
+
+ class PPCLoopPreIncPrep : public FunctionPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ PPCLoopPreIncPrep() : FunctionPass(ID), TM(nullptr) {
+ initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
+ }
+ PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
+ initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ bool runOnLoop(Loop *L);
+ void simplifyLoopLatch(Loop *L);
+ bool rotateLoop(Loop *L);
+
+ private:
+ PPCTargetMachine *TM;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ bool PreserveLCSSA;
+ };
+}
+
+char PPCLoopPreIncPrep::ID = 0;
+static const char *name = "Prepare loop for pre-inc. addressing modes";
+INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+
+FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
+ return new PPCLoopPreIncPrep(TM);
+}
+
+namespace {
+ struct BucketElement {
+ BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
+ BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
+
+ const SCEVConstant *Offset;
+ Instruction *Instr;
+ };
+
+ struct Bucket {
+ Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
+ Elements(1, BucketElement(I)) {}
+
+ const SCEV *BaseSCEV;
+ SmallVector<BucketElement, 16> Elements;
+ };
+}
+
+static bool IsPtrInBounds(Value *BasePtr) {
+ Value *StrippedBasePtr = BasePtr;
+ while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
+ StrippedBasePtr = BC->getOperand(0);
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr))
+ return GEP->isInBounds();
+
+ return false;
+}
+
+static Value *GetPointerOperand(Value *MemI) {
+ if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
+ return LMemI->getPointerOperand();
+ } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
+ return SMemI->getPointerOperand();
+ } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
+ if (IMemI->getIntrinsicID() == Intrinsic::prefetch)
+ return IMemI->getArgOperand(0);
+ }
+
+ return 0;
+}
+
+bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ bool MadeChange = false;
+
+ for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+ for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+ MadeChange |= runOnLoop(*L);
+
+ return MadeChange;
+}
+
+bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
+ bool MadeChange = false;
+
+ // Only prep. the inner-most loop
+ if (!L->empty())
+ return MadeChange;
+
+ DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
+
+ BasicBlock *Header = L->getHeader();
+
+ const PPCSubtarget *ST =
+ TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;
+
+ unsigned HeaderLoopPredCount =
+ std::distance(pred_begin(Header), pred_end(Header));
+
+ // Collect buckets of comparable addresses used by loads and stores.
+ SmallVector<Bucket, 16> Buckets;
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I) {
+ for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
+ J != JE; ++J) {
+ Value *PtrValue;
+ Instruction *MemI;
+
+ if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
+ MemI = LMemI;
+ PtrValue = LMemI->getPointerOperand();
+ } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
+ MemI = SMemI;
+ PtrValue = SMemI->getPointerOperand();
+ } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) {
+ if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
+ MemI = IMemI;
+ PtrValue = IMemI->getArgOperand(0);
+ } else continue;
+ } else continue;
+
+ unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
+ if (PtrAddrSpace)
+ continue;
+
+ // There are no update forms for Altivec vector load/stores.
+ if (ST && ST->hasAltivec() &&
+ PtrValue->getType()->getPointerElementType()->isVectorTy())
+ continue;
+
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
+ if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) {
+ if (LARSCEV->getLoop() != L)
+ continue;
+ } else {
+ continue;
+ }
+
+ bool FoundBucket = false;
+ for (auto &B : Buckets) {
+ const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
+ if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
+ B.Elements.push_back(BucketElement(CDiff, MemI));
+ FoundBucket = true;
+ break;
+ }
+ }
+
+ if (!FoundBucket) {
+ if (Buckets.size() == MaxVars)
+ return MadeChange;
+ Buckets.push_back(Bucket(LSCEV, MemI));
+ }
+ }
+ }
+
+ if (Buckets.empty())
+ return MadeChange;
+
+ BasicBlock *LoopPredecessor = L->getLoopPredecessor();
+ // If there is no loop predecessor, or the loop predecessor's terminator
+ // returns a value (which might contribute to determining the loop's
+ // iteration space), insert a new preheader for the loop.
+ if (!LoopPredecessor ||
+ !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
+ LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+ if (LoopPredecessor)
+ MadeChange = true;
+ }
+ if (!LoopPredecessor)
+ return MadeChange;
+
+ DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n");
+
+ SmallSet<BasicBlock *, 16> BBChanged;
+ for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
+ // The base address of each bucket is transformed into a phi and the others
+ // are rewritten as offsets of that variable.
+
+ // We have a choice now of which instruction's memory operand we use as the
+ // base for the generated PHI. Always picking the first instruction in each
+ // bucket does not work well, specifically because that instruction might
+ // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
+ // the choice is somewhat arbitrary, because the backend will happily
+ // generate direct offsets from both the pre-incremented and
+ // post-incremented pointer values. Thus, we'll pick the first non-prefetch
+ // instruction in each bucket, and adjust the recurrence and other offsets
+ // accordingly.
+ for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) {
+ if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr))
+ if (II->getIntrinsicID() == Intrinsic::prefetch)
+ continue;
+
+ // If we'd otherwise pick the first element anyway, there's nothing to do.
+ if (j == 0)
+ break;
+
+ // If our chosen element has no offset from the base pointer, there's
+ // nothing to do.
+ if (!Buckets[i].Elements[j].Offset ||
+ Buckets[i].Elements[j].Offset->isZero())
+ break;
+
+ const SCEV *Offset = Buckets[i].Elements[j].Offset;
+ Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset);
+ for (auto &E : Buckets[i].Elements) {
+ if (E.Offset)
+ E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
+ else
+ E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
+ }
+
+ std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]);
+ break;
+ }
+
+ const SCEVAddRecExpr *BasePtrSCEV =
+ cast<SCEVAddRecExpr>(Buckets[i].BaseSCEV);
+ if (!BasePtrSCEV->isAffine())
+ continue;
+
+ DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+ assert(BasePtrSCEV->getLoop() == L &&
+ "AddRec for the wrong loop?");
+
+ // The instruction corresponding to the Bucket's BaseSCEV must be the first
+ // in the vector of elements.
+ Instruction *MemI = Buckets[i].Elements.begin()->Instr;
+ Value *BasePtr = GetPointerOperand(MemI);
+ assert(BasePtr && "No pointer operand");
+
+ Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
+ Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
+ BasePtr->getType()->getPointerAddressSpace());
+
+ const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
+ if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
+ continue;
+
+ const SCEVConstant *BasePtrIncSCEV =
+ dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
+ if (!BasePtrIncSCEV)
+ continue;
+ BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
+ if (!isSafeToExpand(BasePtrStartSCEV, *SE))
+ continue;
+
+ DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
+ PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
+ MemI->hasName() ? MemI->getName() + ".phi" : "",
+ Header->getFirstNonPHI());
+
+ SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
+ Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
+ LoopPredecessor->getTerminator());
+
+ // Note that LoopPredecessor might occur in the predecessor list multiple
+ // times, and we need to add it the right number of times.
+ for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+ PI != PE; ++PI) {
+ if (*PI != LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
+ }
+
+ Instruction *InsPoint = &*Header->getFirstInsertionPt();
+ GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+ MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
+ PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
+ for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+ PI != PE; ++PI) {
+ if (*PI == LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(PtrInc, *PI);
+ }
+
+ Instruction *NewBasePtr;
+ if (PtrInc->getType() != BasePtr->getType())
+ NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
+ PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint);
+ else
+ NewBasePtr = PtrInc;
+
+ if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
+ BBChanged.insert(IDel->getParent());
+ BasePtr->replaceAllUsesWith(NewBasePtr);
+ RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
+
+ // Keep track of the replacement pointer values we've inserted so that we
+ // don't generate more pointer values than necessary.
+ SmallPtrSet<Value *, 16> NewPtrs;
+ NewPtrs.insert( NewBasePtr);
+
+ for (auto I = std::next(Buckets[i].Elements.begin()),
+ IE = Buckets[i].Elements.end(); I != IE; ++I) {
+ Value *Ptr = GetPointerOperand(I->Instr);
+ assert(Ptr && "No pointer operand");
+ if (NewPtrs.count(Ptr))
+ continue;
+
+ Instruction *RealNewPtr;
+ if (!I->Offset || I->Offset->getValue()->isZero()) {
+ RealNewPtr = NewBasePtr;
+ } else {
+ Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
+ if (PtrIP && isa<Instruction>(NewBasePtr) &&
+ cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
+ PtrIP = 0;
+ else if (isa<PHINode>(PtrIP))
+ PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
+ else if (!PtrIP)
+ PtrIP = I->Instr;
+
+ GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
+ I8Ty, PtrInc, I->Offset->getValue(),
+ I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP);
+ if (!PtrIP)
+ NewPtr->insertAfter(cast<Instruction>(PtrInc));
+ NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
+ RealNewPtr = NewPtr;
+ }
+
+ if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
+ BBChanged.insert(IDel->getParent());
+
+ Instruction *ReplNewPtr;
+ if (Ptr->getType() != RealNewPtr->getType()) {
+ ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
+ Ptr->hasName() ? Ptr->getName() + ".cast" : "");
+ ReplNewPtr->insertAfter(RealNewPtr);
+ } else
+ ReplNewPtr = RealNewPtr;
+
+ Ptr->replaceAllUsesWith(ReplNewPtr);
+ RecursivelyDeleteTriviallyDeadInstructions(Ptr);
+
+ NewPtrs.insert(RealNewPtr);
+ }
+
+ MadeChange = true;
+ }
+
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I) {
+ if (BBChanged.count(*I))
+ DeleteDeadPHIs(*I);
+ }
+
+ return MadeChange;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
new file mode 100644
index 0000000..44a692d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -0,0 +1,219 @@
+//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PPC MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "MCTargetDesc/PPCMCExpr.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+using namespace llvm;
+
+static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
+ return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+ const TargetMachine &TM = AP.TM;
+ Mangler *Mang = AP.Mang;
+ const DataLayout &DL = AP.getDataLayout();
+ MCContext &Ctx = AP.OutContext;
+ bool isDarwin = TM.getTargetTriple().isOSDarwin();
+
+ SmallString<128> Name;
+ StringRef Suffix;
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB) {
+ if (isDarwin)
+ Suffix = "$stub";
+ } else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
+ Suffix = "$non_lazy_ptr";
+
+ if (!Suffix.empty())
+ Name += DL.getPrivateGlobalPrefix();
+
+ unsigned PrefixLen = Name.size();
+
+ if (!MO.isGlobal()) {
+ assert(MO.isSymbol() && "Isn't a symbol reference");
+ Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ TM.getNameWithPrefix(Name, GV, *Mang);
+ }
+
+ unsigned OrigLen = Name.size() - PrefixLen;
+
+ Name += Suffix;
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
+ StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
+
+ // If the target flags on the operand changes the name of the symbol, do that
+ // before we return the symbol.
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && isDarwin) {
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI(AP).getFnStubEntry(Sym);
+ if (StubSym.getPointer())
+ return Sym;
+
+ if (MO.isGlobal()) {
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(AP.getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ } else {
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Ctx.getOrCreateSymbol(OrigName), false);
+ }
+ return Sym;
+ }
+
+ // If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
+ // then add the suffix.
+ if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
+ MachineModuleInfoMachO &MachO = getMachOMMI(AP);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ?
+ MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym);
+
+ if (!StubSym.getPointer()) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(AP.getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+
+ return Sym;
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+ AsmPrinter &Printer, bool isDarwin) {
+ MCContext &Ctx = Printer.OutContext;
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+ unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK;
+
+ switch (access) {
+ case PPCII::MO_TPREL_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_TPREL_LO;
+ break;
+ case PPCII::MO_TPREL_HA:
+ RefKind = MCSymbolRefExpr::VK_PPC_TPREL_HA;
+ break;
+ case PPCII::MO_DTPREL_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_DTPREL_LO;
+ break;
+ case PPCII::MO_TLSLD_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO;
+ break;
+ case PPCII::MO_TOC_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_TOC_LO;
+ break;
+ case PPCII::MO_TLS:
+ RefKind = MCSymbolRefExpr::VK_PPC_TLS;
+ break;
+ }
+
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
+ RefKind = MCSymbolRefExpr::VK_PLT;
+
+ const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(MO.getOffset(), Ctx),
+ Ctx);
+
+ // Subtract off the PIC base if required.
+ if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
+ const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+
+ const MCExpr *PB = MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
+ Expr = MCBinaryExpr::createSub(Expr, PB, Ctx);
+ }
+
+ // Add ha16() / lo16() markers if required.
+ switch (access) {
+ case PPCII::MO_LO:
+ Expr = PPCMCExpr::createLo(Expr, isDarwin, Ctx);
+ break;
+ case PPCII::MO_HA:
+ Expr = PPCMCExpr::createHa(Expr, isDarwin, Ctx);
+ break;
+ }
+
+ return MCOperand::createExpr(Expr);
+}
+
+void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP, bool isDarwin) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ assert(MO.getReg() > PPC::NoRegister &&
+ MO.getReg() < PPC::NUM_TARGET_REGS &&
+ "Invalid register for this target!");
+ MCOp = MCOperand::createReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
+ MO.getMBB()->getSymbol(), AP.OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin);
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin);
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin);
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP,
+ isDarwin);
+ break;
+ case MachineOperand::MO_RegisterMask:
+ continue;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
new file mode 100644
index 0000000..fe339d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -0,0 +1,230 @@
+//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass performs peephole optimizations to clean up ugly code
+// sequences at the MachineInstruction layer. It runs at the end of
+// the SSA phases, following VSX swap removal. A pass of dead code
+// elimination follows this one for quick clean-up of any dead
+// instructions introduced here. Although we could do this as callbacks
+// from the generic peephole pass, this would have a couple of bad
+// effects: it might remove optimization opportunities for VSX swap
+// removal, and it would miss cleanups made possible following VSX
+// swap removal.
+//
+//===---------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-mi-peepholes"
+
+namespace llvm {
+ void initializePPCMIPeepholePass(PassRegistry&);
+}
+
+namespace {
+
+struct PPCMIPeephole : public MachineFunctionPass {
+
+ static char ID;
+ const PPCInstrInfo *TII;
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+
+ PPCMIPeephole() : MachineFunctionPass(ID) {
+ initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize class variables.
+ void initialize(MachineFunction &MFParm);
+
+ // Perform peepholes.
+ bool simplifyCode(void);
+
+ // Find the "true" register represented by SrcReg (following chains
+ // of copies and subreg_to_reg operations).
+ unsigned lookThruCopyLike(unsigned SrcReg);
+
+public:
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ initialize(MF);
+ return simplifyCode();
+ }
+};
+
+// Initialize class variables.
+void PPCMIPeephole::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ MRI = &MF->getRegInfo();
+ TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
+ DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
+ DEBUG(MF->dump());
+}
+
+// Perform peephole optimizations.
+bool PPCMIPeephole::simplifyCode(void) {
+ bool Simplified = false;
+ MachineInstr* ToErase = nullptr;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+
+ // If the previous instruction was marked for elimination,
+ // remove it now.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ // Ignore debug instructions.
+ if (MI.isDebugValue())
+ continue;
+
+ // Per-opcode peepholes.
+ switch (MI.getOpcode()) {
+
+ default:
+ break;
+
+ case PPC::XXPERMDI: {
+ // Perform simplifications of 2x64 vector swaps and splats.
+ // A swap is identified by an immediate value of 2, and a splat
+ // is identified by an immediate value of 0 or 3.
+ int Immed = MI.getOperand(3).getImm();
+
+ if (Immed != 1) {
+
+ // For each of these simplifications, we need the two source
+ // regs to match. Unfortunately, MachineCSE ignores COPY and
+ // SUBREG_TO_REG, so for example we can see
+ // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
+ // We have to look through chains of COPY and SUBREG_TO_REG
+ // to find the real source values for comparison.
+ unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
+ unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
+
+ if (TrueReg1 == TrueReg2
+ && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
+ MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+
+ // If this is a splat or a swap fed by another splat, we
+ // can replace it with a copy.
+ if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
+ unsigned FeedImmed = DefMI->getOperand(3).getImm();
+ unsigned FeedReg1
+ = lookThruCopyLike(DefMI->getOperand(1).getReg());
+ unsigned FeedReg2
+ = lookThruCopyLike(DefMI->getOperand(2).getReg());
+
+ if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs()
+ << "Optimizing splat/swap or splat/splat "
+ "to splat/copy: ");
+ DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(),
+ TII->get(PPC::COPY), MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+
+ // If this is a splat fed by a swap, we can simplify modify
+ // the splat to splat the other value from the swap's input
+ // parameter.
+ else if ((Immed == 0 || Immed == 3)
+ && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
+ DEBUG(MI.dump());
+ MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
+ MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
+ MI.getOperand(3).setImm(3 - Immed);
+ Simplified = true;
+ }
+
+ // If this is a swap fed by a swap, we can replace it
+ // with a copy from the first swap's input.
+ else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
+ DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(),
+ TII->get(PPC::COPY), MI.getOperand(0).getReg())
+ .addOperand(DefMI->getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+ }
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ // If the last instruction was marked for elimination,
+ // remove it now.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+ }
+
+ return Simplified;
+}
+
+// This is used to find the "true" source register for an
+// XXPERMDI instruction, since MachineCSE does not handle the
+// "copy-like" operations (Copy and SubregToReg). Returns
+// the original SrcReg unless it is the target of a copy-like
+// operation, in which case we chain backwards through all
+// such operations to the ultimate source register. If a
+// physical register is encountered, we stop the search.
+unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
+
+ while (true) {
+
+ MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ if (!MI->isCopyLike())
+ return SrcReg;
+
+ unsigned CopySrcReg;
+ if (MI->isCopy())
+ CopySrcReg = MI->getOperand(1).getReg();
+ else {
+ assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ }
+
+ if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+ return CopySrcReg;
+
+ SrcReg = CopySrcReg;
+ }
+}
+
+} // end default namespace
+
+INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
+ "PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
+ "PowerPC MI Peephole Optimization", false, false)
+
+char PPCMIPeephole::ID = 0;
+FunctionPass*
+llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
new file mode 100644
index 0000000..95f1631
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -0,0 +1,25 @@
+//===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachineFunctionInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+void PPCFunctionInfo::anchor() { }
+
+MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
+ const DataLayout &DL = MF.getDataLayout();
+ return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ Twine(MF.getFunctionNumber()) +
+ "$poff");
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
new file mode 100644
index 0000000..607cdf6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -0,0 +1,205 @@
+//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PPCFunctionInfo - This class is derived from MachineFunction private
+/// PowerPC target-specific information for each MachineFunction.
+class PPCFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// FramePointerSaveIndex - Frame index of where the old frame pointer is
+ /// stored. Also used as an anchor for instructions that need to be altered
+ /// when using frame pointers (dyna_add, dyna_sub.)
+ int FramePointerSaveIndex;
+
+ /// ReturnAddrSaveIndex - Frame index of where the return address is stored.
+ ///
+ int ReturnAddrSaveIndex;
+
+ /// Frame index where the old base pointer is stored.
+ int BasePointerSaveIndex;
+
+ /// Frame index where the old PIC base pointer is stored.
+ int PICBasePointerSaveIndex;
+
+ /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
+ /// function. This is only valid after the initial scan of the function by
+ /// PEI.
+ bool MustSaveLR;
+
+ /// Does this function have any stack spills.
+ bool HasSpills;
+
+ /// Does this function spill using instructions with only r+r (not r+i)
+ /// forms.
+ bool HasNonRISpills;
+
+ /// SpillsCR - Indicates whether CR is spilled in the current function.
+ bool SpillsCR;
+
+ /// Indicates whether VRSAVE is spilled in the current function.
+ bool SpillsVRSAVE;
+
+ /// LRStoreRequired - The bool indicates whether there is some explicit use of
+ /// the LR/LR8 stack slot that is not obvious from scanning the code. This
+ /// requires that the code generator produce a store of LR to the stack on
+ /// entry, even though LR may otherwise apparently not be used.
+ bool LRStoreRequired;
+
+ /// This function makes use of the PPC64 ELF TOC base pointer (register r2).
+ bool UsesTOCBasePtr;
+
+ /// MinReservedArea - This is the frame size that is at least reserved in a
+ /// potential caller (parameter+linkage area).
+ unsigned MinReservedArea;
+
+ /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
+ /// amount the stack pointer is adjusted to make the frame bigger for tail
+ /// calls. Used for creating an area before the register spill area.
+ int TailCallSPDelta;
+
+ /// HasFastCall - Does this function contain a fast call. Used to determine
+ /// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
+ bool HasFastCall;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+ /// VarArgsStackOffset - StackOffset for start of stack
+ /// arguments.
+ int VarArgsStackOffset;
+ /// VarArgsNumGPR - Index of the first unused integer
+ /// register for parameter passing.
+ unsigned VarArgsNumGPR;
+ /// VarArgsNumFPR - Index of the first unused double
+ /// register for parameter passing.
+ unsigned VarArgsNumFPR;
+
+ /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
+ int CRSpillFrameIndex;
+
+ /// If any of CR[2-4] need to be saved in the prologue and restored in the
+ /// epilogue then they are added to this array. This is used for the
+ /// 64-bit SVR4 ABI.
+ SmallVector<unsigned, 3> MustSaveCRs;
+
+ /// Hold onto our MachineFunction context.
+ MachineFunction &MF;
+
+ /// Whether this uses the PIC Base register or not.
+ bool UsesPICBase;
+
+public:
+ explicit PPCFunctionInfo(MachineFunction &MF)
+ : FramePointerSaveIndex(0),
+ ReturnAddrSaveIndex(0),
+ BasePointerSaveIndex(0),
+ PICBasePointerSaveIndex(0),
+ HasSpills(false),
+ HasNonRISpills(false),
+ SpillsCR(false),
+ SpillsVRSAVE(false),
+ LRStoreRequired(false),
+ UsesTOCBasePtr(false),
+ MinReservedArea(0),
+ TailCallSPDelta(0),
+ HasFastCall(false),
+ VarArgsFrameIndex(0),
+ VarArgsStackOffset(0),
+ VarArgsNumGPR(0),
+ VarArgsNumFPR(0),
+ CRSpillFrameIndex(0),
+ MF(MF),
+ UsesPICBase(0) {}
+
+ int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+ void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+
+ int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
+ void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
+
+ int getBasePointerSaveIndex() const { return BasePointerSaveIndex; }
+ void setBasePointerSaveIndex(int Idx) { BasePointerSaveIndex = Idx; }
+
+ int getPICBasePointerSaveIndex() const { return PICBasePointerSaveIndex; }
+ void setPICBasePointerSaveIndex(int Idx) { PICBasePointerSaveIndex = Idx; }
+
+ unsigned getMinReservedArea() const { return MinReservedArea; }
+ void setMinReservedArea(unsigned size) { MinReservedArea = size; }
+
+ int getTailCallSPDelta() const { return TailCallSPDelta; }
+ void setTailCallSPDelta(int size) { TailCallSPDelta = size; }
+
+ /// MustSaveLR - This is set when the prolog/epilog inserter does its initial
+ /// scan of the function. It is true if the LR/LR8 register is ever explicitly
+ /// defined/clobbered in the machine function (e.g. by calls and movpctolr,
+ /// which is used in PIC generation), or if the LR stack slot is explicitly
+ /// referenced by builtin_return_address.
+ void setMustSaveLR(bool U) { MustSaveLR = U; }
+ bool mustSaveLR() const { return MustSaveLR; }
+
+ void setHasSpills() { HasSpills = true; }
+ bool hasSpills() const { return HasSpills; }
+
+ void setHasNonRISpills() { HasNonRISpills = true; }
+ bool hasNonRISpills() const { return HasNonRISpills; }
+
+ void setSpillsCR() { SpillsCR = true; }
+ bool isCRSpilled() const { return SpillsCR; }
+
+ void setSpillsVRSAVE() { SpillsVRSAVE = true; }
+ bool isVRSAVESpilled() const { return SpillsVRSAVE; }
+
+ void setLRStoreRequired() { LRStoreRequired = true; }
+ bool isLRStoreRequired() const { return LRStoreRequired; }
+
+ void setUsesTOCBasePtr() { UsesTOCBasePtr = true; }
+ bool usesTOCBasePtr() const { return UsesTOCBasePtr; }
+
+ void setHasFastCall() { HasFastCall = true; }
+ bool hasFastCall() const { return HasFastCall;}
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ int getVarArgsStackOffset() const { return VarArgsStackOffset; }
+ void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+
+ unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; }
+ void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; }
+
+ unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
+ void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+
+ int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
+ void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
+
+ const SmallVectorImpl<unsigned> &
+ getMustSaveCRs() const { return MustSaveCRs; }
+ void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
+
+ void setUsesPICBase(bool uses) { UsesPICBase = uses; }
+ bool usesPICBase() const { return UsesPICBase; }
+
+ MCSymbol *getPICOffsetSymbol() const;
+};
+
+} // end of namespace llvm
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
new file mode 100644
index 0000000..8a1d680
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -0,0 +1,6591 @@
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle without using vperm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H
+
+// 31 entries have cost 0
+// 292 entries have cost 1
+// 1384 entries have cost 2
+// 3061 entries have cost 3
+// 1733 entries have cost 4
+// 60 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 202162278U, // <0,0,0,0>: Cost 1 vspltisw0 LHS
+ 1140850790U, // <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS
+ 2617247181U, // <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0>
+ 2635163787U, // <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0>
+ 1543507254U, // <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS
+ 2281701705U, // <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5>
+ 2617250133U, // <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0>
+ 2659054575U, // <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,0,u>: Cost 1 vspltisw0 LHS
+ 1141686282U, // <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1>
+ 67944550U, // <0,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 1685241958U, // <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2215870716U, // <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1141727570U, // <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 2215428562U, // <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7>
+ 2215428589U, // <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7>
+ 2659062768U, // <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1>
+ 67945117U, // <0,0,1,u>: Cost 1 vmrghw LHS, LHS
+ 2684356045U, // <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0>
+ 2216009830U, // <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2216009901U, // <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2>
+ 2698290853U, // <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0>
+ 3289751890U, // <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5>
+ 3758098275U, // <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1>
+ 2684356538U, // <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7>
+ 3758098410U, // <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1>
+ 2216010397U, // <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2702272651U, // <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0>
+ 2216656998U, // <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 3844669704U, // <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3>
+ 2216657148U, // <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0>
+ 2684357122U, // <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6>
+ 3732820066U, // <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0>
+ 3778005624U, // <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7>
+ 3374713464U, // <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7>
+ 2216657565U, // <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217361408U, // <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0>
+ 1143619686U, // <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 3291103405U, // <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2>
+ 3827269988U, // <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5>
+ 1143619922U, // <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1610616118U, // <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 3758099833U, // <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2>
+ 3854107016U, // <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5>
+ 1143620253U, // <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2284396544U, // <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0>
+ 2218025062U, // <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS
+ 3758100203U, // <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3>
+ 3395966100U, // <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3>
+ 3804549052U, // <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5>
+ 2302314964U, // <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5>
+ 2785821138U, // <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 3395966428U, // <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7>
+ 2787148260U, // <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7>
+ 2684358997U, // <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0>
+ 2218631270U, // <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2684359162U, // <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3>
+ 3758101042U, // <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5>
+ 3732843830U, // <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS
+ 3758101227U, // <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1>
+ 2684359480U, // <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6>
+ 2724836173U, // <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0>
+ 2725499806U, // <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0>
+ 2726163439U, // <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 2219311206U, // <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS
+ 3868557900U, // <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3>
+ 3377400112U, // <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3>
+ 2684360038U, // <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6>
+ 3732852834U, // <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0>
+ 3871507060U, // <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7>
+ 2303658616U, // <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7>
+ 2726163439U, // <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,u,0>: Cost 1 vspltisw0 LHS
+ 72589414U, // <0,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 1685242525U, // <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2220073212U, // <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1146331474U, // <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 1610619034U, // <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 2785821138U, // <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 2659120119U, // <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u>
+ 72589981U, // <0,0,u,u>: Cost 1 vmrghw LHS, LHS
+ 2698297344U, // <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0>
+ 1624555622U, // <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 2758984428U, // <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1>
+ 2635237524U, // <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0>
+ 2693652818U, // <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5>
+ 2281701714U, // <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5>
+ 2698297846U, // <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7>
+ 2659128312U, // <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0>
+ 1624556189U, // <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 1543585802U, // <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1>
+ 1141728052U, // <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1141728150U, // <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2295644334U, // <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3>
+ 1543589174U, // <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS
+ 2290999634U, // <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5>
+ 2617332135U, // <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1>
+ 2617332720U, // <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1>
+ 1142171004U, // <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0>
+ 1561509990U, // <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS
+ 2623308516U, // <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2>
+ 2698298984U, // <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1561513270U, // <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS
+ 2647199304U, // <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2>
+ 2698299322U, // <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7>
+ 1585402874U, // <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2698299540U, // <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0>
+ 3290399540U, // <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1>
+ 2698299720U, // <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0>
+ 2698299804U, // <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3>
+ 2698299906U, // <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6>
+ 3832726521U, // <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0>
+ 2724842160U, // <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0>
+ 2706926275U, // <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1>
+ 2698300190U, // <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2>
+ 2635268198U, // <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS
+ 2217362228U, // <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1>
+ 2217362326U, // <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0>
+ 2635270296U, // <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4>
+ 2635271478U, // <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS
+ 1624558902U, // <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2659160910U, // <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1>
+ 2659161084U, // <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4>
+ 1624559145U, // <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 3832726639U, // <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1>
+ 2714889871U, // <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1>
+ 2302314646U, // <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 3834717321U, // <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0>
+ 3832726679U, // <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5>
+ 2717544403U, // <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1>
+ 2718208036U, // <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1>
+ 3792613493U, // <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1>
+ 2719535302U, // <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1>
+ 2659172454U, // <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS
+ 3832726735U, // <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7>
+ 2724844026U, // <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3>
+ 3775361608U, // <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0>
+ 2659175734U, // <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS
+ 3832726771U, // <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7>
+ 2724844344U, // <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6>
+ 1651102542U, // <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1>
+ 1651766175U, // <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1>
+ 2724844536U, // <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0>
+ 3377397770U, // <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1>
+ 2698302636U, // <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0>
+ 2728162531U, // <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1>
+ 2724844902U, // <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6>
+ 3377398098U, // <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5>
+ 2724845076U, // <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0>
+ 2724845164U, // <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7>
+ 2724845186U, // <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2>
+ 1561559142U, // <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS
+ 1146331956U, // <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1146332054U, // <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1561562422U, // <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS
+ 1624561818U, // <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2220074191U, // <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7>
+ 1585452032U, // <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2214593997U, // <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0>
+ 2214675999U, // <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1>
+ 2214594152U, // <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2>
+ 1207959654U, // <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 3709054262U, // <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS
+ 3375350836U, // <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5>
+ 2214594490U, // <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7>
+ 3288336362U, // <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1>
+ 1207959659U, // <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 2215871994U, // <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2215470623U, // <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1141728872U, // <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1141728934U, // <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2215872323U, // <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2215872405U, // <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1141729210U, // <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2215430122U, // <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1141729368U, // <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3>
+ 3289736698U, // <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0>
+ 3289744927U, // <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1>
+ 2216011368U, // <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2>
+ 2216019622U, // <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1>
+ 3289769795U, // <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5>
+ 3289778069U, // <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6>
+ 2216044474U, // <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7>
+ 3732960259U, // <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2>
+ 2216061016U, // <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3>
+ 2758985382U, // <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1>
+ 2758985392U, // <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2>
+ 3290400360U, // <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2>
+ 2758985408U, // <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0>
+ 2758985422U, // <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5>
+ 2785822424U, // <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6>
+ 3290400698U, // <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7>
+ 2765915876U, // <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0>
+ 2758985453U, // <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0>
+ 3291104762U, // <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0>
+ 2217362979U, // <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2217363048U, // <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2>
+ 2217363110U, // <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1>
+ 3291105087U, // <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1>
+ 3291105173U, // <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6>
+ 2217363386U, // <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7>
+ 3788639688U, // <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0>
+ 2217363515U, // <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1>
+ 3376054371U, // <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0>
+ 3788639888U, // <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2>
+ 3376055912U, // <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2>
+ 2302312550U, // <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3376054375U, // <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4>
+ 3374728244U, // <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5>
+ 3805229154U, // <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0>
+ 3376055512U, // <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7>
+ 2302312555U, // <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3709100134U, // <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS
+ 3709100950U, // <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0>
+ 3709102010U, // <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7>
+ 2758985658U, // <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7>
+ 3709103414U, // <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS
+ 3732992098U, // <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0>
+ 3292374970U, // <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7>
+ 3798594383U, // <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2>
+ 2758985703U, // <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7>
+ 3788641274U, // <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2>
+ 3377398508U, // <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1>
+ 3377398590U, // <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2>
+ 2303656038U, // <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 3709111606U, // <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS
+ 3377398836U, // <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5>
+ 3803903447U, // <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2>
+ 3293054954U, // <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1>
+ 2303656043U, // <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2220074490U, // <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2220074527U, // <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1146332776U, // <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1146332838U, // <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2220074819U, // <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2220074901U, // <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1146333114U, // <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2220074986U, // <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1146333243U, // <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1>
+ 2629410816U, // <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0>
+ 2753530006U, // <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2>
+ 2629412301U, // <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0>
+ 2214594972U, // <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3>
+ 2758985908U, // <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5>
+ 3733016674U, // <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0>
+ 3777364488U, // <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7>
+ 2281703354U, // <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7>
+ 2758985941U, // <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2>
+ 1141729430U, // <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2215471334U, // <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2215471425U, // <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1141729692U, // <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1141729794U, // <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2215430738U, // <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5>
+ 2215430776U, // <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7>
+ 2295646138U, // <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7>
+ 1141730078U, // <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2758986032U, // <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3>
+ 3709141910U, // <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0>
+ 3289753921U, // <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2>
+ 2770929992U, // <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0>
+ 3289754114U, // <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6>
+ 3362095460U, // <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5>
+ 3832727910U, // <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3>
+ 3365414842U, // <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7>
+ 2771298677U, // <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0>
+ 2216659094U, // <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2>
+ 3290409190U, // <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1>
+ 2703624496U, // <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3>
+ 2216683932U, // <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3>
+ 2216692226U, // <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6>
+ 3733041250U, // <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0>
+ 3832727988U, // <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0>
+ 3374712762U, // <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7>
+ 2216725278U, // <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2>
+ 2217363606U, // <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2>
+ 3291105510U, // <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1>
+ 3291105601U, // <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2>
+ 2217363868U, // <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3>
+ 2217363970U, // <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6>
+ 2758986242U, // <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6>
+ 3727077685U, // <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4>
+ 3364767674U, // <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7>
+ 2217364254U, // <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2>
+ 3832728102U, // <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6>
+ 3405916003U, // <0,3,5,1>: Cost 4 vmrglw <u,4,0,5>, <2,5,3,1>
+ 3376055840U, // <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2>
+ 3376055679U, // <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3>
+ 3376055194U, // <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4>
+ 3859565138U, // <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5>
+ 2727514210U, // <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 3376056250U, // <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7>
+ 2727514210U, // <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 2758986360U, // <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 3709174678U, // <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0>
+ 3795284411U, // <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3>
+ 3709175980U, // <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6>
+ 3833096860U, // <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7>
+ 3376728235U, // <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5>
+ 3859565229U, // <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6>
+ 2773879472U, // <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0>
+ 2758986360U, // <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 2303656854U, // <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0>
+ 3807229018U, // <0,3,7,1>: Cost 4 vsldoi8 <u,2,0,3>, <7,1,2,u>
+ 2727515284U, // <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3>
+ 3377399410U, // <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3>
+ 3377398682U, // <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4>
+ 3801257409U, // <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7>
+ 3377399980U, // <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6>
+ 3375409082U, // <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7>
+ 2731497082U, // <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3>
+ 1146333334U, // <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2220075238U, // <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2220075329U, // <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1146333596U, // <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1146333698U, // <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2758986566U, // <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6>
+ 2803739472U, // <0,3,u,6>: Cost 3 vsldoi12 <u,6,7,0>, <3,u,6,7>
+ 2295703482U, // <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7>
+ 1146333982U, // <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2214595473U, // <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0>
+ 2693677158U, // <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS
+ 3839437689U, // <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3>
+ 3709200559U, // <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0>
+ 2693677394U, // <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5>
+ 1140854070U, // <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 3767419409U, // <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7>
+ 3854109604U, // <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1>
+ 1140854313U, // <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 1141689234U, // <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2215431114U, // <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3>
+ 2215431221U, // <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635466928U, // <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1>
+ 1141689552U, // <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 67947830U, // <0,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2215431545U, // <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2659357716U, // <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1>
+ 67948073U, // <0,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 3767420369U, // <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4>
+ 3767420451U, // <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5>
+ 3767420520U, // <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2>
+ 2698323625U, // <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4>
+ 3709218102U, // <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS
+ 2216013110U, // <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767420858U, // <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7>
+ 3774719981U, // <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4>
+ 2216013353U, // <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767421078U, // <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2>
+ 3776710880U, // <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4>
+ 3833097325U, // <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4>
+ 3767421340U, // <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3>
+ 3767421442U, // <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6>
+ 2216660278U, // <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 3833097361U, // <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4>
+ 3780692678U, // <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4>
+ 2216660521U, // <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2617573416U, // <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4>
+ 2217364450U, // <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0>
+ 3691316771U, // <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5>
+ 3709233331U, // <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4>
+ 2785823952U, // <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4>
+ 1143622966U, // <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 3691319723U, // <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5>
+ 3854109932U, // <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5>
+ 1143623209U, // <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2635497574U, // <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS
+ 2635498390U, // <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0>
+ 3709240936U, // <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2>
+ 2635499700U, // <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5>
+ 2635500854U, // <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS
+ 2785824044U, // <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6>
+ 1685245238U, // <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659390488U, // <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5>
+ 1685245256U, // <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 3839438161U, // <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7>
+ 3798610347U, // <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5>
+ 3798610426U, // <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3>
+ 3795956237U, // <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4>
+ 3733138742U, // <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS
+ 2218634550U, // <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS
+ 3798610744U, // <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6>
+ 2724868945U, // <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4>
+ 2725532578U, // <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4>
+ 3383371465U, // <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0>
+ 3800601668U, // <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4>
+ 3775386826U, // <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3>
+ 3801928934U, // <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4>
+ 3721202998U, // <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS
+ 2780368328U, // <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 3383372686U, // <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6>
+ 3854110170U, // <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0>
+ 2780368328U, // <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 1146334098U, // <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2220076002U, // <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0>
+ 2220076085U, // <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635524279U, // <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u>
+ 1146334416U, // <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 72592694U, // <0,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 1685245481U, // <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659415067U, // <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u>
+ 72592937U, // <0,4,u,u>: Cost 1 vmrghw LHS, RHS
+ 2281704337U, // <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0>
+ 2704965734U, // <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 3778707666U, // <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3>
+ 3778707708U, // <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0>
+ 2687050057U, // <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5>
+ 2214596612U, // <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5>
+ 2785824372U, // <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1>
+ 3854110332U, // <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0>
+ 2704966301U, // <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 1567768678U, // <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS
+ 2312236570U, // <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1>
+ 2215431915U, // <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641512598U, // <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2>
+ 1567771538U, // <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1>
+ 1141690372U, // <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1141690466U, // <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2641515514U, // <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2>
+ 1141690615U, // <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5>
+ 3772736973U, // <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0>
+ 3778709024U, // <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2>
+ 3778709096U, // <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2>
+ 3778709158U, // <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1>
+ 3772737275U, // <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5>
+ 3859566351U, // <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3>
+ 3778709434U, // <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7>
+ 3805251562U, // <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1>
+ 3775391807U, // <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5>
+ 2704967830U, // <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2>
+ 3776719073U, // <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5>
+ 3777382706U, // <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5>
+ 3778709887U, // <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1>
+ 2704968148U, // <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5>
+ 3857428317U, // <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0>
+ 3364096514U, // <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6>
+ 3780700871U, // <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5>
+ 2707622680U, // <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5>
+ 2728856466U, // <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1>
+ 3697361674U, // <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4>
+ 3697362601U, // <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4>
+ 3364766635U, // <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3>
+ 2217365428U, // <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6>
+ 2704969014U, // <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 2785824700U, // <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5>
+ 3364766963U, // <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7>
+ 2704969257U, // <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 3846148050U, // <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0>
+ 2326203282U, // <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1>
+ 3291746027U, // <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3>
+ 3376054482U, // <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3>
+ 3790655366U, // <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5>
+ 2785824772U, // <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5>
+ 2724876386U, // <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0>
+ 3858903057U, // <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0>
+ 2736820484U, // <0,5,5,u>: Cost 3 vsldoi8 <u,7,0,5>, <5,u,7,0>
+ 2659467366U, // <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS
+ 3859566643U, // <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7>
+ 3798618618U, // <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3>
+ 3852857410U, // <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4>
+ 2659470646U, // <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS
+ 2659471458U, // <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 3832729696U, // <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7>
+ 1712083042U, // <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0>
+ 1712156779U, // <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0>
+ 2731512826U, // <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2>
+ 3859566717U, // <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0>
+ 3798619284U, // <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3>
+ 3778712803U, // <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1>
+ 2728858936U, // <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5>
+ 3859566753U, // <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0>
+ 3377398135U, // <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6>
+ 3798619686U, // <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0>
+ 2731513468U, // <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5>
+ 1567826022U, // <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS
+ 2704971566U, // <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 2220076779U, // <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641569942U, // <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2>
+ 1567828889U, // <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u>
+ 1146335236U, // <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1146335330U, // <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 1713410308U, // <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0>
+ 1713484045U, // <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0>
+ 2214596949U, // <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0>
+ 2214678951U, // <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1>
+ 2214597114U, // <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3>
+ 3852857653U, // <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4>
+ 3832729919U, // <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5>
+ 3721293427U, // <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0>
+ 2214597432U, // <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6>
+ 1207962934U, // <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 1207962935U, // <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 2215432481U, // <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2>
+ 2215432615U, // <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1141690874U, // <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2215432754U, // <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2215432817U, // <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5>
+ 2215432939U, // <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1>
+ 1141691192U, // <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221905718U, // <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 1221905719U, // <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 3852857787U, // <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3>
+ 3289764265U, // <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3>
+ 3289690618U, // <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3>
+ 3862589907U, // <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0>
+ 3733253430U, // <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS
+ 3733254242U, // <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0>
+ 3777390522U, // <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7>
+ 2785825274U, // <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3>
+ 2785825283U, // <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3>
+ 3777390742U, // <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2>
+ 3863106066U, // <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0>
+ 3777390899U, // <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6>
+ 3290436146U, // <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5>
+ 3779381762U, // <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6>
+ 3779381798U, // <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6>
+ 3733262920U, // <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0>
+ 2300972342U, // <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2300972343U, // <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 3802606482U, // <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1>
+ 2217365931U, // <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2217366010U, // <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3>
+ 3291107890U, // <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5>
+ 3291099805U, // <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4>
+ 3777391926U, // <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS
+ 2217366328U, // <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6>
+ 2291027254U, // <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 2291027255U, // <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 3852858033U, // <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6>
+ 3395964532U, // <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1>
+ 3864507069U, // <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0>
+ 3376056678U, // <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3>
+ 3721334070U, // <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS
+ 3395964860U, // <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5>
+ 3864802017U, // <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0>
+ 2302315830U, // <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 2302315831U, // <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 3852858108U, // <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0>
+ 3398624745U, // <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1>
+ 2218668538U, // <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3>
+ 3292418610U, // <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5>
+ 3733286198U, // <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS
+ 3797299889U, // <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6>
+ 2785825592U, // <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6>
+ 2785825602U, // <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7>
+ 2785825611U, // <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7>
+ 2785825614U, // <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1>
+ 2758988632U, // <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2>
+ 3377400084U, // <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2>
+ 2792166248U, // <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0>
+ 2785825654U, // <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5>
+ 2785825664U, // <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 3859567493U, // <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2>
+ 2303659318U, // <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303659319U, // <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2785825695U, // <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1>
+ 2220077479U, // <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1146335738U, // <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2792829881U, // <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0>
+ 2785825735U, // <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5>
+ 2785825664U, // <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 1146336056U, // <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221963062U, // <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 1221963063U, // <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 2653593600U, // <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0>
+ 2706309222U, // <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 3709421498U, // <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7>
+ 2281705978U, // <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3>
+ 2785825816U, // <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5>
+ 2785825826U, // <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6>
+ 2653598037U, // <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0>
+ 2214598252U, // <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7>
+ 2706309789U, // <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 1141691386U, // <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2215433290U, // <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1>
+ 2706310038U, // <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0>
+ 2322190842U, // <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3>
+ 1141691750U, // <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2215433654U, // <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5>
+ 2653606230U, // <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1>
+ 1141692012U, // <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1141692034U, // <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 2785825940U, // <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3>
+ 3768108576U, // <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2>
+ 3780052584U, // <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2>
+ 2794820780U, // <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0>
+ 3859641528U, // <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3>
+ 3733327970U, // <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0>
+ 3778062266U, // <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7>
+ 3733328944U, // <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2>
+ 2795189465U, // <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0>
+ 2324861026U, // <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0>
+ 3780053233U, // <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3>
+ 3780053296U, // <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3>
+ 3778062725U, // <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7>
+ 3780053506U, // <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6>
+ 3803941469U, // <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7>
+ 2706311800U, // <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7>
+ 3398603586U, // <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7>
+ 2707639066U, // <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7>
+ 2217366522U, // <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2>
+ 3727369110U, // <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0>
+ 3291108500U, // <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3>
+ 3727370872U, // <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7>
+ 2217366886U, // <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6>
+ 2706312502U, // <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 3786026321U, // <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7>
+ 2217367148U, // <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7>
+ 2706312745U, // <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2322223202U, // <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0>
+ 3399946987U, // <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1>
+ 3291780244U, // <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3>
+ 3727378582U, // <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2>
+ 3727379766U, // <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS
+ 3859568054U, // <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5>
+ 2785826241U, // <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7>
+ 3395965762U, // <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7>
+ 2787153363U, // <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7>
+ 2785826268U, // <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7>
+ 3780055420U, // <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3>
+ 3859568110U, // <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7>
+ 3874534903U, // <0,7,6,3>: Cost 4 vsldoi12 <u,2,3,0>, <7,6,3,7>
+ 3859641856U, // <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7>
+ 3733360738U, // <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0>
+ 3859568145U, // <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6>
+ 2797770260U, // <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0>
+ 2797843997U, // <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0>
+ 2785826342U, // <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0>
+ 3727393686U, // <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0>
+ 3868563003U, // <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3>
+ 3377397988U, // <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3>
+ 2219349350U, // <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6>
+ 3859568217U, // <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6>
+ 2730202588U, // <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7>
+ 2785826412U, // <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7>
+ 2731529854U, // <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7>
+ 1146336250U, // <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2706315054U, // <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 2653660845U, // <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u>
+ 2322248186U, // <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3>
+ 1146336614U, // <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2706315418U, // <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2653663581U, // <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u>
+ 1146336876U, // <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1146336898U, // <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 202162278U, // <0,u,0,0>: Cost 1 vspltisw0 LHS
+ 1624612966U, // <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS
+ 2629780986U, // <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0>
+ 1207959708U, // <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 1544097078U, // <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS
+ 1140856986U, // <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 2698355253U, // <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7>
+ 1207962952U, // <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 202162278U, // <0,u,0,u>: Cost 1 vspltisw0 LHS
+ 1142134483U, // <0,u,1,0>: Cost 2 vmrghw LHS, <u,0,1,2>
+ 67950382U, // <0,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 1142175624U, // <0,u,1,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 1142175676U, // <0,u,1,3>: Cost 2 vmrghw LHS, <u,3,0,1>
+ 1142134847U, // <0,u,1,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 67950746U, // <0,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1142175952U, // <0,u,1,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221905736U, // <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 67950949U, // <0,u,1,u>: Cost 1 vmrghw LHS, LHS
+ 1562026086U, // <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS
+ 2216015662U, // <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2698356328U, // <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2>
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1562029366U, // <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS
+ 2216016026U, // <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 2698356666U, // <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7>
+ 1585919033U, // <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2758989756U, // <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,0,1>
+ 2216662830U, // <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2703665461U, // <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u>
+ 2758989782U, // <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,3,0>
+ 2758989796U, // <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,4,5>
+ 2216663194U, // <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2706319993U, // <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u>
+ 2300972360U, // <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2216663397U, // <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217367251U, // <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, <u,0,1,2>
+ 1143625518U, // <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2217367432U, // <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, <u,2,3,3>
+ 2217367484U, // <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, <u,3,0,1>
+ 1143619922U, // <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1143625882U, // <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2217367760U, // <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, <u,6,3,7>
+ 2291027272U, // <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 1143626085U, // <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2635792486U, // <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS
+ 2635793302U, // <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0>
+ 2302314646U, // <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 2635794648U, // <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5>
+ 2635795766U, // <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS
+ 2717601754U, // <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u>
+ 1685248154U, // <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2302315848U, // <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 1685248172U, // <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2759358645U, // <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, <u,6,0,7>
+ 2218637102U, // <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2724901370U, // <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3>
+ 2758990032U, // <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,6,3,7>
+ 2659691830U, // <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS
+ 2659471458U, // <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 2724901688U, // <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6>
+ 1651159893U, // <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u>
+ 1651823526U, // <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u>
+ 2785827072U, // <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,0,1>
+ 2803964168U, // <0,u,7,1>: Cost 3 vsldoi12 <u,7,1,0>, <u,7,1,0>
+ 2727556249U, // <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u>
+ 2303656092U, // <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2785827112U, // <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,4,5>
+ 2785827122U, // <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,5,6>
+ 2730210781U, // <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u>
+ 2303659336U, // <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303656097U, // <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 202162278U, // <0,u,u,0>: Cost 1 vspltisw0 LHS
+ 72595246U, // <0,u,u,1>: Cost 1 vmrghw LHS, LHS
+ 1146337160U, // <0,u,u,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1146337343U, // <0,u,u,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 72595610U, // <0,u,u,5>: Cost 1 vmrghw LHS, RHS
+ 1146337488U, // <0,u,u,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221963080U, // <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2756853760U, // <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0>
+ 1677803530U, // <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1>
+ 3759497387U, // <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0>
+ 2686419196U, // <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0>
+ 2751766565U, // <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1>
+ 2687746462U, // <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0>
+ 3776086518U, // <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7>
+ 2689073728U, // <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0>
+ 1678319689U, // <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1>
+ 2287091712U, // <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0>
+ 1147568230U, // <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS
+ 1683112038U, // <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 3294970108U, // <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0>
+ 2623892790U, // <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS
+ 2647781007U, // <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1>
+ 2791948430U, // <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 3721524218U, // <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2>
+ 1683112092U, // <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2222112768U, // <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0>
+ 1148371046U, // <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 3356862524U, // <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2>
+ 2702345894U, // <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1>
+ 2222113106U, // <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5>
+ 2299709908U, // <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 3760162746U, // <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7>
+ 3369470584U, // <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7>
+ 1148371613U, // <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 2686421142U, // <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2>
+ 2283128486U, // <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1>
+ 3296305326U, // <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3>
+ 3760163199U, // <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1>
+ 3760163330U, // <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6>
+ 3779406377U, // <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0>
+ 3865690416U, // <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7>
+ 3366824568U, // <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7>
+ 2707655452U, // <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0>
+ 2734861202U, // <1,0,4,0>: Cost 3 vsldoi8 <u,4,1,0>, <4,0,5,1>
+ 2756854098U, // <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5>
+ 3830595931U, // <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5>
+ 3296968960U, // <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4>
+ 3830595949U, // <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5>
+ 2686422326U, // <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 3297378806U, // <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7>
+ 3810594248U, // <1,0,4,7>: Cost 4 vsldoi8 <u,7,1,0>, <4,7,5,0>
+ 2686422569U, // <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2284470272U, // <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0>
+ 2284471974U, // <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1>
+ 3809267435U, // <1,0,5,2>: Cost 4 vsldoi8 <u,5,1,0>, <5,2,1,3>
+ 3297968384U, // <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4>
+ 2284471977U, // <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4>
+ 3721555603U, // <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5>
+ 3792679010U, // <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0>
+ 3792679037U, // <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0>
+ 2284471981U, // <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u>
+ 3356893184U, // <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0>
+ 2224676966U, // <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS
+ 3298295985U, // <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6>
+ 3298345212U, // <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0>
+ 2224972114U, // <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 3808604907U, // <1,0,6,5>: Cost 4 vsldoi8 <u,4,1,0>, <6,5,7,1>
+ 3799978808U, // <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6>
+ 2726237006U, // <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1>
+ 2224677522U, // <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1>
+ 2726237176U, // <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0>
+ 2285815462U, // <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1>
+ 3805951193U, // <1,0,7,2>: Cost 4 vsldoi8 <u,0,1,0>, <7,2,u,0>
+ 3807941859U, // <1,0,7,3>: Cost 4 vsldoi8 <u,3,1,0>, <7,3,0,1>
+ 3799979366U, // <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6>
+ 3803297165U, // <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0>
+ 3799979540U, // <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0>
+ 3799979628U, // <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7>
+ 2731546240U, // <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0>
+ 2284494848U, // <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0>
+ 1683112594U, // <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1>
+ 1683112605U, // <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2734200772U, // <1,0,u,3>: Cost 3 vsldoi8 <u,3,1,0>, <u,3,1,0>
+ 2757075629U, // <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1>
+ 2686425242U, // <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2791948430U, // <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 2736855304U, // <1,0,u,7>: Cost 3 vsldoi8 <u,7,1,0>, <u,7,1,0>
+ 1683112659U, // <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1610694666U, // <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1>
+ 1616003174U, // <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS
+ 2283767958U, // <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2>
+ 3357507596U, // <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3>
+ 2689745234U, // <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5>
+ 3357507922U, // <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5>
+ 3294397647U, // <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7>
+ 3373433334U, // <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7>
+ 1616003730U, // <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1>
+ 1550221414U, // <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,1,1>: Cost 1 vspltisw1 LHS
+ 2287093910U, // <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2>
+ 2287092615U, // <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3>
+ 1550224694U, // <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 2287092050U, // <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5>
+ 2689746127U, // <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7>
+ 2659800138U, // <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1>
+ 269271142U, // <1,1,1,u>: Cost 1 vspltisw1 LHS
+ 2222113516U, // <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1>
+ 2756854663U, // <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3>
+ 1148371862U, // <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689746598U, // <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1>
+ 2618002742U, // <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS
+ 2299707730U, // <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5>
+ 2689746874U, // <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7>
+ 3361506511U, // <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7>
+ 1148371862U, // <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689747094U, // <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2>
+ 2691074278U, // <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1>
+ 3356870806U, // <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2>
+ 2283126958U, // <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3>
+ 2689747458U, // <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6>
+ 3356868946U, // <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5>
+ 3811265144U, // <1,1,3,6>: Cost 4 vsldoi8 <u,u,1,1>, <3,6,0,7>
+ 3362841807U, // <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7>
+ 2689747742U, // <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2>
+ 2623987814U, // <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS
+ 2758181931U, // <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5>
+ 2223408022U, // <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 3697731734U, // <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2>
+ 2283798784U, // <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4>
+ 1616006454U, // <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 3297379535U, // <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7>
+ 3373466102U, // <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7>
+ 1616006697U, // <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2760762479U, // <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1>
+ 2284470282U, // <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1>
+ 2284472470U, // <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2>
+ 3358212270U, // <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3>
+ 2284470285U, // <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4>
+ 1210728786U, // <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2737524834U, // <1,1,5,6>: Cost 3 vsldoi8 <u,u,1,1>, <5,6,7,0>
+ 3360867535U, // <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7>
+ 1210728786U, // <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 3697746022U, // <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS
+ 2756854991U, // <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7>
+ 2737525242U, // <1,1,6,2>: Cost 3 vsldoi8 <u,u,1,1>, <6,2,7,3>
+ 3839149281U, // <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7>
+ 3697749302U, // <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS
+ 3356893522U, // <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5>
+ 2283151537U, // <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6>
+ 2791949566U, // <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0>
+ 2792613127U, // <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0>
+ 2737525754U, // <1,1,7,0>: Cost 3 vsldoi8 <u,u,1,1>, <7,0,1,2>
+ 2291786386U, // <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1>
+ 3365528292U, // <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2>
+ 3365528455U, // <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3>
+ 2737526118U, // <1,1,7,4>: Cost 3 vsldoi8 <u,u,1,1>, <7,4,5,6>
+ 3365527890U, // <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5>
+ 3365528377U, // <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6>
+ 2291786959U, // <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7>
+ 2737526402U, // <1,1,7,u>: Cost 3 vsldoi8 <u,u,1,1>, <7,u,1,2>
+ 1550221414U, // <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,u,1>: Cost 1 vspltisw1 LHS
+ 1148371862U, // <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689750972U, // <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, <u,3,0,1>
+ 1550224694U, // <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1616009370U, // <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2689751248U, // <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, <u,6,3,7>
+ 2736863497U, // <1,1,u,7>: Cost 3 vsldoi8 <u,7,1,1>, <u,7,1,1>
+ 269271142U, // <1,1,u,u>: Cost 1 vspltisw1 LHS
+ 2702360576U, // <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0>
+ 1628618854U, // <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2685771949U, // <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2>
+ 2283765862U, // <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 2702360914U, // <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5>
+ 3788046813U, // <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0>
+ 2688426481U, // <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2>
+ 2726249024U, // <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1628619421U, // <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2690417380U, // <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2>
+ 2702361396U, // <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1>
+ 2287093352U, // <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2>
+ 1213349990U, // <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 3764159522U, // <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5>
+ 3295053672U, // <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6>
+ 2221311930U, // <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7>
+ 3799991593U, // <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7>
+ 1213349995U, // <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 2624045158U, // <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS
+ 2702362144U, // <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2>
+ 2283120232U, // <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2>
+ 1225965670U, // <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2624048438U, // <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS
+ 3356860763U, // <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5>
+ 2222114746U, // <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7>
+ 2299708632U, // <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7>
+ 1225965675U, // <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 470597734U, // <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544340276U, // <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544341096U, // <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544341916U, // <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3>
+ 470601014U, // <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592119300U, // <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592119802U, // <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592120314U, // <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470603566U, // <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708335471U, // <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2>
+ 3838043908U, // <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5>
+ 3357541992U, // <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2>
+ 2283798630U, // <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2726251728U, // <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4>
+ 1628622134U, // <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 3297077178U, // <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7>
+ 2726251976U, // <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1628622377U, // <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 2714308168U, // <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2>
+ 3297633827U, // <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5>
+ 2284471912U, // <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2>
+ 1210728550U, // <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 3776106420U, // <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6>
+ 2726252548U, // <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5>
+ 2726252642U, // <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0>
+ 3799994538U, // <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0>
+ 1210728555U, // <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720280865U, // <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2>
+ 2702365096U, // <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2>
+ 2726253050U, // <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3>
+ 2283151462U, // <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 3697823030U, // <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS
+ 3298715497U, // <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7>
+ 2726253368U, // <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6>
+ 2724926296U, // <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2>
+ 2283151467U, // <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652511738U, // <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2>
+ 3371500916U, // <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1>
+ 3365529192U, // <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2>
+ 2291785830U, // <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2726253926U, // <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6>
+ 3788051845U, // <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1>
+ 3794023894U, // <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1>
+ 2726254119U, // <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1>
+ 1657820802U, // <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2>
+ 470638699U, // <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544381236U, // <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544382056U, // <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544382614U, // <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 470641974U, // <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1628625050U, // <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 1592160762U, // <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592161274U, // <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470644526U, // <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2769389708U, // <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1>
+ 2685780070U, // <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2685780142U, // <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3>
+ 2686443775U, // <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3>
+ 2769684656U, // <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1>
+ 3357507940U, // <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5>
+ 3759522294U, // <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7>
+ 3357509562U, // <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7>
+ 2685780637U, // <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2287092630U, // <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0>
+ 2221312230U, // <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1>
+ 2691752839U, // <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3>
+ 2287093362U, // <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3>
+ 2287092634U, // <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4>
+ 3360835107U, // <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5>
+ 3759523041U, // <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7>
+ 2287093690U, // <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7>
+ 2287092638U, // <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u>
+ 2222114966U, // <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2>
+ 2222115057U, // <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3>
+ 2630092320U, // <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2>
+ 2685781670U, // <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1>
+ 2222115330U, // <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6>
+ 3373449572U, // <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5>
+ 2222115448U, // <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2299709370U, // <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7>
+ 2222115614U, // <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2>
+ 2771380607U, // <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1>
+ 3356874468U, // <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, <u,0,3,1>
+ 3759524168U, // <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0>
+ 2283792796U, // <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3>
+ 3356869530U, // <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4>
+ 3721760428U, // <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3>
+ 3296496248U, // <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7>
+ 3356870586U, // <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7>
+ 2771970503U, // <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1>
+ 2772044240U, // <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1>
+ 3362186135U, // <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1>
+ 3297151280U, // <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3>
+ 3357542002U, // <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3>
+ 3357540626U, // <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4>
+ 2685783350U, // <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 3357546622U, // <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, <u,5,3,6>
+ 3357542330U, // <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7>
+ 2685783593U, // <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2284471190U, // <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0>
+ 3358213015U, // <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1>
+ 2630116899U, // <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5>
+ 2284471922U, // <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3>
+ 2284471194U, // <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4>
+ 2284471843U, // <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5>
+ 3358218366U, // <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, <u,5,3,6>
+ 2284472250U, // <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7>
+ 2284471198U, // <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u>
+ 2224752790U, // <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2>
+ 3832736385U, // <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7>
+ 3703866916U, // <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6>
+ 3356894834U, // <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3>
+ 3356894106U, // <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4>
+ 3356894755U, // <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5>
+ 3356899130U, // <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, <u,1,3,6>
+ 2283153338U, // <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2283153338U, // <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2774035139U, // <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1>
+ 3703874767U, // <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7>
+ 3703875109U, // <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7>
+ 3365529202U, // <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3>
+ 3365528474U, // <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4>
+ 3789387159U, // <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1>
+ 3865692927U, // <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7>
+ 3363538874U, // <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7>
+ 2774625035U, // <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1>
+ 2284495766U, // <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0>
+ 2685785902U, // <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2630141478U, // <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u>
+ 2283169880U, // <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3>
+ 2284495770U, // <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4>
+ 2685786266U, // <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2222115448U, // <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2284496826U, // <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7>
+ 2685786469U, // <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2684461069U, // <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4>
+ 2686451814U, // <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 3759530159U, // <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4>
+ 2686451968U, // <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2684461394U, // <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5>
+ 1701989266U, // <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1>
+ 3776119286U, // <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7>
+ 2689106500U, // <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4>
+ 1702210477U, // <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1>
+ 2221312914U, // <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1>
+ 2691097399U, // <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4>
+ 3760194454U, // <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0>
+ 3766166489U, // <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4>
+ 2334870736U, // <1,4,1,4>: Cost 3 vmrglw <u,u,1,1>, <4,4,4,4>
+ 1147571510U, // <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 3760194794U, // <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7>
+ 3867315188U, // <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0>
+ 1147571753U, // <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2222115730U, // <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1>
+ 2222115812U, // <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 3760195176U, // <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2>
+ 2702378662U, // <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1>
+ 2323598544U, // <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4>
+ 1148374326U, // <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 3760195514U, // <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7>
+ 3373451932U, // <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7>
+ 1148374569U, // <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2702379160U, // <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4>
+ 3760195840U, // <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0>
+ 3776121160U, // <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0>
+ 3760195996U, // <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3>
+ 2686454274U, // <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6>
+ 3356870350U, // <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5>
+ 3800009392U, // <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0>
+ 3366824604U, // <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7>
+ 2707688224U, // <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4>
+ 2775731368U, // <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0>
+ 3830820018U, // <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1>
+ 3691980454U, // <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1>
+ 3357541282U, // <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3>
+ 2781039824U, // <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4>
+ 2686455094U, // <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 3357541528U, // <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6>
+ 3810627020U, // <1,4,4,7>: Cost 4 vsldoi8 <u,7,1,4>, <4,7,5,4>
+ 2686455337U, // <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 2624217190U, // <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS
+ 2284470309U, // <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1>
+ 2618246822U, // <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1>
+ 3358212297U, // <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3>
+ 2284470312U, // <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4>
+ 2284470637U, // <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5>
+ 1683115318U, // <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3721851898U, // <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2>
+ 1683115336U, // <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3794039075U, // <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4>
+ 3830820186U, // <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7>
+ 3800011258U, // <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3>
+ 3807973938U, // <1,4,6,3>: Cost 4 vsldoi8 <u,3,1,4>, <6,3,4,5>
+ 3298716880U, // <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4>
+ 2224680246U, // <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 3800011576U, // <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6>
+ 2726269774U, // <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1>
+ 2224680489U, // <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726269948U, // <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4>
+ 3383444141U, // <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1>
+ 3805983961U, // <1,4,7,2>: Cost 4 vsldoi8 <u,0,1,4>, <7,2,u,0>
+ 3807974667U, // <1,4,7,3>: Cost 4 vsldoi8 <u,3,1,4>, <7,3,4,5>
+ 2736887142U, // <1,4,7,4>: Cost 3 vsldoi8 <u,7,1,4>, <7,4,5,6>
+ 3365528403U, // <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5>
+ 3800012308U, // <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0>
+ 3800012396U, // <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7>
+ 2731579012U, // <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4>
+ 2624241766U, // <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS
+ 2686457646U, // <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 2618271398U, // <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1>
+ 2734233544U, // <1,4,u,3>: Cost 3 vsldoi8 <u,3,1,4>, <u,3,1,4>
+ 2689775679U, // <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, <u,4,5,6>
+ 1152355638U, // <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS
+ 1683115561U, // <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2736888076U, // <1,4,u,7>: Cost 3 vsldoi8 <u,7,1,4>, <u,7,1,4>
+ 1683115579U, // <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2687123456U, // <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0>
+ 1613381734U, // <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759538352U, // <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5>
+ 3760865532U, // <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0>
+ 1613381970U, // <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2687787427U, // <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5>
+ 2781777524U, // <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1>
+ 3733828717U, // <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0>
+ 1613382301U, // <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2781040271U, // <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1>
+ 2687124276U, // <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1>
+ 2687124374U, // <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0>
+ 3760866297U, // <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0>
+ 2693096491U, // <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5>
+ 2687124591U, // <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1>
+ 2687124723U, // <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7>
+ 3360834803U, // <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7>
+ 2687124860U, // <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0>
+ 2323598792U, // <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0>
+ 2687125027U, // <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5>
+ 2687125096U, // <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2>
+ 2687125158U, // <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1>
+ 2642185188U, // <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2>
+ 2323598554U, // <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5>
+ 2687125434U, // <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7>
+ 3373450483U, // <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7>
+ 2687125563U, // <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1>
+ 2687125654U, // <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2>
+ 2312990234U, // <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1>
+ 3760867649U, // <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2>
+ 2687125916U, // <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3>
+ 2687126018U, // <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6>
+ 3386731738U, // <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5>
+ 3356871170U, // <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6>
+ 3808643779U, // <1,5,3,7>: Cost 4 vsldoi8 <u,4,1,5>, <3,7,0,1>
+ 2687126302U, // <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2>
+ 2642198630U, // <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS
+ 2687126498U, // <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0>
+ 3715941923U, // <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5>
+ 3709970701U, // <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4>
+ 2687126736U, // <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4>
+ 1613385014U, // <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2283801090U, // <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 3733861489U, // <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4>
+ 1613385257U, // <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2624290918U, // <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS
+ 2624291676U, // <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5>
+ 3698034211U, // <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5>
+ 2284471211U, // <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3>
+ 2624294198U, // <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS
+ 2284471132U, // <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5>
+ 2284472834U, // <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6>
+ 2284471539U, // <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7>
+ 2284471216U, // <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u>
+ 2785316900U, // <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1>
+ 2781040691U, // <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7>
+ 2734903802U, // <1,5,6,2>: Cost 3 vsldoi8 <u,4,1,5>, <6,2,7,3>
+ 3848736834U, // <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4>
+ 3298717620U, // <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6>
+ 3298717700U, // <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5>
+ 2734904120U, // <1,5,6,6>: Cost 3 vsldoi8 <u,4,1,5>, <6,6,6,6>
+ 2781040738U, // <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0>
+ 2781040747U, // <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0>
+ 2734904314U, // <1,5,7,0>: Cost 3 vsldoi8 <u,4,1,5>, <7,0,1,2>
+ 2315677210U, // <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1>
+ 3808646292U, // <1,5,7,2>: Cost 4 vsldoi8 <u,4,1,5>, <7,2,0,3>
+ 3808646371U, // <1,5,7,3>: Cost 4 vsldoi8 <u,4,1,5>, <7,3,0,1>
+ 2734904678U, // <1,5,7,4>: Cost 3 vsldoi8 <u,4,1,5>, <7,4,5,6>
+ 3389418714U, // <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5>
+ 3365528656U, // <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6>
+ 2734904940U, // <1,5,7,7>: Cost 3 vsldoi8 <u,4,1,5>, <7,7,7,7>
+ 2734904962U, // <1,5,7,u>: Cost 3 vsldoi8 <u,4,1,5>, <7,u,1,2>
+ 2687129299U, // <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, <u,0,1,2>
+ 1613387566U, // <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2687129480U, // <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, <u,2,3,3>
+ 2687129532U, // <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, <u,3,0,1>
+ 1661163546U, // <1,5,u,4>: Cost 2 vsldoi8 <u,4,1,5>, <u,4,1,5>
+ 1613387930U, // <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2687129808U, // <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, <u,6,3,7>
+ 2781040900U, // <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0>
+ 1613388133U, // <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759546368U, // <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0>
+ 2685804646U, // <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2685804721U, // <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6>
+ 3861270834U, // <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1>
+ 3759546706U, // <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5>
+ 2687795620U, // <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6>
+ 2688459253U, // <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6>
+ 2283769142U, // <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 2685805213U, // <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 3698073702U, // <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS
+ 3759547188U, // <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1>
+ 2221314554U, // <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3>
+ 3759547401U, // <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7>
+ 3698076982U, // <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS
+ 3767510141U, // <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6>
+ 2334872376U, // <1,6,1,6>: Cost 3 vmrglw <u,u,1,1>, <6,6,6,6>
+ 1213353270U, // <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 1213353271U, // <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 3704053862U, // <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS
+ 3759547961U, // <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0>
+ 2222117370U, // <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3>
+ 3759548070U, // <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1>
+ 3704057142U, // <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS
+ 3373451057U, // <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5>
+ 2685806522U, // <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7>
+ 1225968950U, // <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1225968951U, // <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 3759548566U, // <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2>
+ 3842912793U, // <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7>
+ 3759548774U, // <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3>
+ 3759548828U, // <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3>
+ 3759548930U, // <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6>
+ 3809315421U, // <1,6,3,5>: Cost 4 vsldoi8 <u,5,1,6>, <3,5,6,7>
+ 3386733368U, // <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6>
+ 2283130166U, // <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 2283130167U, // <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 3704070246U, // <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS
+ 3862229608U, // <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5>
+ 3704071741U, // <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4>
+ 3721988610U, // <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6>
+ 3704073526U, // <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS
+ 2685807926U, // <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3865621141U, // <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5>
+ 2283801910U, // <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 2685808169U, // <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3710050406U, // <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS
+ 3710051571U, // <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7>
+ 3405989597U, // <1,6,5,2>: Cost 4 vmrglw <u,4,1,5>, <2,3,6,2>
+ 3358214502U, // <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3>
+ 3710053686U, // <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS
+ 3721998025U, // <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5>
+ 2332250936U, // <1,6,5,6>: Cost 3 vmrglw <u,4,1,5>, <6,6,6,6>
+ 1210731830U, // <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210731831U, // <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 2791289597U, // <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1>
+ 3698115430U, // <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6>
+ 3698116538U, // <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7>
+ 3356894132U, // <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3>
+ 3698117942U, // <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS
+ 3722006218U, // <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6>
+ 2781041464U, // <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6>
+ 2283154742U, // <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283154743U, // <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 1718211406U, // <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1>
+ 2792026967U, // <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1>
+ 2765411170U, // <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3>
+ 3854783336U, // <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0>
+ 2781041526U, // <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5>
+ 3365528664U, // <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5>
+ 2791953290U, // <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7>
+ 2291789110U, // <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1718801302U, // <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1>
+ 1718875039U, // <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1>
+ 2685810478U, // <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2792764337U, // <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1>
+ 3759552444U, // <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, <u,3,0,1>
+ 2781041607U, // <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5>
+ 2685810842U, // <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 2689792208U, // <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, <u,6,3,7>
+ 1210756406U, // <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 1210756407U, // <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 2793280496U, // <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1>
+ 2694439014U, // <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 3393343912U, // <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2>
+ 3397325306U, // <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3>
+ 2793575444U, // <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1>
+ 3722030797U, // <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0>
+ 2688467446U, // <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7>
+ 2689131079U, // <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7>
+ 2694439570U, // <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1>
+ 2654265354U, // <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1>
+ 2794017866U, // <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1>
+ 3768181639U, // <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3>
+ 2334872058U, // <1,7,1,3>: Cost 3 vmrglw <u,u,1,1>, <6,2,7,3>
+ 2654268726U, // <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS
+ 3792069797U, // <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1>
+ 2694440143U, // <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7>
+ 2334872386U, // <1,7,1,7>: Cost 3 vmrglw <u,u,1,1>, <6,6,7,7>
+ 2695767409U, // <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7>
+ 2654273638U, // <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2222117973U, // <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3>
+ 2299711912U, // <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2654275734U, // <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2>
+ 2654276918U, // <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS
+ 3385397675U, // <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5>
+ 2654278056U, // <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2>
+ 2323599627U, // <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7>
+ 2654279470U, // <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2795271395U, // <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1>
+ 3768183059U, // <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1>
+ 3728025254U, // <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1>
+ 3768183196U, // <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3>
+ 3768183298U, // <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6>
+ 3792071255U, // <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1>
+ 3780127361U, // <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7>
+ 3847779617U, // <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0>
+ 2795861291U, // <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1>
+ 2795935028U, // <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1>
+ 3728032975U, // <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7>
+ 3839153480U, // <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3>
+ 3397358074U, // <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3>
+ 3854783835U, // <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4>
+ 2694442294U, // <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 3786100058U, // <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7>
+ 3722065254U, // <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6>
+ 2694442537U, // <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654298214U, // <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS
+ 3854783893U, // <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u>
+ 3710126010U, // <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7>
+ 2332250618U, // <1,7,5,3>: Cost 3 vmrglw <u,4,1,5>, <6,2,7,3>
+ 2654301494U, // <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS
+ 2284474795U, // <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5>
+ 2718330931U, // <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7>
+ 2332250946U, // <1,7,5,7>: Cost 3 vmrglw <u,4,1,5>, <6,6,7,7>
+ 2719658197U, // <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7>
+ 2332921954U, // <1,7,6,0>: Cost 3 vmrglw <u,5,1,6>, <5,6,7,0>
+ 3768185254U, // <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0>
+ 3710134202U, // <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7>
+ 3710134561U, // <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6>
+ 3710135606U, // <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS
+ 3864884745U, // <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7>
+ 3854784017U, // <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6>
+ 2791953940U, // <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0>
+ 2792617501U, // <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0>
+ 2797925927U, // <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1>
+ 3365528426U, // <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1>
+ 3728058022U, // <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1>
+ 3365528509U, // <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3>
+ 3854784079U, // <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5>
+ 3722088148U, // <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7>
+ 3728060845U, // <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7>
+ 2781042284U, // <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7>
+ 2798515823U, // <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1>
+ 2654322705U, // <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u>
+ 2694444846U, // <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 2299711912U, // <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2323649018U, // <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3>
+ 2654326070U, // <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS
+ 2694445210U, // <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654327214U, // <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u>
+ 2323649346U, // <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7>
+ 2694445413U, // <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 1610752017U, // <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u>
+ 1613406310U, // <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 2685821107U, // <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u>
+ 2283765916U, // <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 1613406549U, // <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u>
+ 1725880054U, // <1,u,0,5>: Cost 2 vsldoi12 <u,0,5,1>, <u,0,5,1>
+ 2688475639U, // <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u>
+ 2283769160U, // <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 1613406877U, // <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 1550221414U, // <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,u,1,1>: Cost 1 vspltisw1 LHS
+ 1683117870U, // <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1213350044U, // <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 1550224694U, // <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1147574426U, // <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2687149326U, // <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7>
+ 1213353288U, // <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 269271142U, // <1,u,1,u>: Cost 1 vspltisw1 LHS
+ 2222118611U, // <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, <u,0,1,2>
+ 1148376878U, // <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 1148371862U, // <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 1225965724U, // <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2222118975U, // <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, <u,4,5,6>
+ 1148377242U, // <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2687150010U, // <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7>
+ 1225968968U, // <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1148377445U, // <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 471040156U, // <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544782644U, // <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544783464U, // <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544784022U, // <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471043382U, // <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592561668U, // <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592562170U, // <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592562682U, // <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 471045934U, // <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708384629U, // <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u>
+ 2687151101U, // <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0>
+ 2223408022U, // <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 2283798684U, // <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2642422785U, // <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4>
+ 1613409590U, // <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2283801090U, // <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 2283801928U, // <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 1613409833U, // <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2284471235U, // <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0>
+ 2284472046U, // <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1>
+ 2284472533U, // <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2>
+ 1210728604U, // <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2284471239U, // <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4>
+ 1210728786U, // <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 1683118234U, // <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210731848U, // <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210728609U, // <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720330023U, // <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u>
+ 2757376190U, // <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, <u,6,1,7>
+ 2726302202U, // <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3>
+ 2283151516U, // <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 2224972114U, // <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 2224683162U, // <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726302520U, // <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6>
+ 2283154760U, // <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283151521U, // <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652560896U, // <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u>
+ 2333590225U, // <1,u,7,1>: Cost 3 vmrglw <u,6,1,7>, <0,u,u,1>
+ 2765412628U, // <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <u,7,2,3>
+ 2291785884U, // <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2781042984U, // <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <u,7,4,5>
+ 3365527953U, // <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5>
+ 2791954748U, // <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <u,7,6,7>
+ 2291789128U, // <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1657869960U, // <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u>
+ 471081121U, // <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 269271142U, // <1,u,u,1>: Cost 1 vspltisw1 LHS
+ 1544824424U, // <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544824982U, // <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471084342U, // <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1613412506U, // <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 1683118477U, // <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210756424U, // <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 471086894U, // <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2226757632U, // <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0>
+ 2226757734U, // <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 3826622483U, // <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1>
+ 3843211292U, // <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1>
+ 3300499794U, // <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5>
+ 3356256724U, // <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5>
+ 3825664056U, // <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2>
+ 3762889289U, // <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0>
+ 2226758301U, // <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 2227429386U, // <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1>
+ 2227429478U, // <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS
+ 1691156582U, // <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2666358997U, // <2,0,1,3>: Cost 3 vsldoi4 <u,2,0,1>, <3,0,u,2>
+ 2227462482U, // <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5>
+ 3722186464U, // <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1>
+ 3867099278U, // <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7>
+ 3366881912U, // <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7>
+ 1691156636U, // <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2228027392U, // <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0>
+ 1154285670U, // <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 2228027565U, // <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2>
+ 3301769468U, // <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0>
+ 2228027730U, // <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5>
+ 3301769635U, // <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5>
+ 3780806586U, // <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7>
+ 3368880760U, // <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7>
+ 1154286237U, // <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 1213440000U, // <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1213441702U, // <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2228535470U, // <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3>
+ 2636515632U, // <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3>
+ 2287182962U, // <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4>
+ 2660405346U, // <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0>
+ 2228535798U, // <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660406420U, // <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3>
+ 1213441709U, // <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3368894464U, // <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0>
+ 2764898642U, // <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5>
+ 3826622811U, // <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5>
+ 3843211620U, // <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5>
+ 3838640493U, // <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5>
+ 2732944694U, // <2,0,4,5>: Cost 3 vsldoi8 <u,1,2,0>, RHS
+ 3797396857U, // <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2>
+ 3867099528U, // <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5>
+ 2764898705U, // <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5>
+ 3364257792U, // <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0>
+ 2230124646U, // <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 3304235184U, // <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5>
+ 3364260144U, // <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3>
+ 3303817554U, // <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5>
+ 3364260146U, // <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5>
+ 3867099602U, // <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7>
+ 3364260472U, // <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7>
+ 2230125213U, // <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2230796288U, // <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0>
+ 1157054566U, // <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 2230796465U, // <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6>
+ 3304538364U, // <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0>
+ 2230796626U, // <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5>
+ 3797398205U, // <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0>
+ 3304538614U, // <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7>
+ 3798725471U, // <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0>
+ 1157055133U, // <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 3371573248U, // <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0>
+ 2231189606U, // <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 3801380003U, // <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0>
+ 3802043636U, // <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0>
+ 3806688614U, // <2,0,7,4>: Cost 4 vsldoi8 <u,1,2,0>, <7,4,5,6>
+ 3356317308U, // <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5>
+ 3804034535U, // <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0>
+ 3806688876U, // <2,0,7,7>: Cost 4 vsldoi8 <u,1,2,0>, <7,7,7,7>
+ 2231190173U, // <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 1208836096U, // <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1208837798U, // <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 1691157149U, // <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2636556597U, // <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u>
+ 2282579625U, // <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2660446306U, // <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0>
+ 2228535798U, // <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660447385U, // <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u>
+ 1208837805U, // <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3692388523U, // <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0>
+ 2757526244U, // <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2>
+ 2330290974U, // <2,1,0,2>: Cost 3 vmrglw <u,1,2,0>, <3,u,1,2>
+ 3843212020U, // <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0>
+ 3692391734U, // <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS
+ 3300533362U, // <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4>
+ 3794084337U, // <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2>
+ 3374170614U, // <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7>
+ 2758042403U, // <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2>
+ 2690482924U, // <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1>
+ 2764899124U, // <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1>
+ 2695791510U, // <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0>
+ 3362235271U, // <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3>
+ 3692399926U, // <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS
+ 3832226649U, // <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2>
+ 3301205235U, // <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7>
+ 3768870179U, // <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1>
+ 2695791988U, // <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1>
+ 2618663085U, // <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2>
+ 2228028212U, // <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1>
+ 2618664552U, // <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2>
+ 2759000984U, // <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2>
+ 2618666294U, // <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS
+ 2295136594U, // <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5>
+ 3769534376U, // <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7>
+ 2793358266U, // <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0>
+ 2618668846U, // <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS
+ 2282536969U, // <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208795146U, // <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1213442198U, // <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2287181998U, // <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2618674486U, // <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS
+ 1208795474U, // <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2287182001U, // <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287183055U, // <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208795153U, // <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 3692421295U, // <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4>
+ 3838641195U, // <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5>
+ 2330323742U, // <2,1,4,2>: Cost 3 vmrglw <u,1,2,4>, <3,u,1,2>
+ 3692423318U, // <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2>
+ 3692424502U, // <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS
+ 2695793974U, // <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3799395705U, // <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2>
+ 3368895695U, // <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7>
+ 2695794217U, // <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3692429488U, // <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5>
+ 3364257802U, // <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1>
+ 3692431253U, // <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6>
+ 3692431874U, // <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6>
+ 3692432694U, // <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS
+ 3364258130U, // <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5>
+ 3303875827U, // <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7>
+ 3867100333U, // <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0>
+ 3692435246U, // <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS
+ 2618695857U, // <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6>
+ 2230797108U, // <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1>
+ 2618697658U, // <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7>
+ 3692439702U, // <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2>
+ 2618699062U, // <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS
+ 3364929874U, // <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5>
+ 3692442424U, // <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6>
+ 3798733664U, // <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1>
+ 2618701614U, // <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS
+ 3799397370U, // <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2>
+ 3371573258U, // <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1>
+ 2330351234U, // <2,1,7,2>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 3799397658U, // <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2>
+ 3799397734U, // <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6>
+ 3371573586U, // <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5>
+ 3799397870U, // <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7>
+ 3799397956U, // <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3>
+ 2330351234U, // <2,1,7,u>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 2282577929U, // <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208836106U, // <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1208838294U, // <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282578094U, // <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282577933U, // <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1208836434U, // <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282578097U, // <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287224015U, // <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208836113U, // <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 2226759117U, // <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0>
+ 1624047718U, // <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 2697789613U, // <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2>
+ 2226767526U, // <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1>
+ 2697789778U, // <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5>
+ 3300657000U, // <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6>
+ 2226988986U, // <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7>
+ 3734271139U, // <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0>
+ 1624048285U, // <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 3831268868U, // <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1>
+ 2293138804U, // <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1>
+ 2697790358U, // <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0>
+ 2293137510U, // <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 3771532331U, // <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5>
+ 3767551106U, // <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2>
+ 3301173178U, // <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7>
+ 3372853169U, // <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7>
+ 2293137515U, // <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 1556938854U, // <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2295137733U, // <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1>
+ 336380006U, // <2,2,2,2>: Cost 1 vspltisw2 LHS
+ 1221394534U, // <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS
+ 1556942134U, // <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2228029370U, // <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7>
+ 2660545701U, // <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2>
+ 336380006U, // <2,2,2,u>: Cost 1 vspltisw2 LHS
+ 2697791638U, // <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2>
+ 2765489840U, // <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2>
+ 1213441640U, // <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135053414U, // <2,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 2697792002U, // <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6>
+ 2330313780U, // <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5>
+ 2287183549U, // <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6>
+ 2660553894U, // <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3>
+ 135053419U, // <2,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2630697062U, // <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS
+ 3771534282U, // <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3>
+ 2764900109U, // <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5>
+ 2295152742U, // <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS
+ 2295154282U, // <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4>
+ 1624050998U, // <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 2229675962U, // <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7>
+ 3368896433U, // <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7>
+ 1624051241U, // <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 3771534920U, // <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2>
+ 3364258540U, // <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1>
+ 2296489576U, // <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2>
+ 2290516070U, // <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 3771535284U, // <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6>
+ 2290517044U, // <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5>
+ 2697793634U, // <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0>
+ 3370231729U, // <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7>
+ 2290516075U, // <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2230797801U, // <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1>
+ 3304539679U, // <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1>
+ 2764900273U, // <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7>
+ 2764900282U, // <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7>
+ 2230798129U, // <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5>
+ 3304540008U, // <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6>
+ 1157056442U, // <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725000033U, // <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2>
+ 1157056442U, // <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2793359338U, // <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1>
+ 3371574725U, // <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1>
+ 2297833064U, // <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2>
+ 2297831526U, // <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 2697794918U, // <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6>
+ 3371575053U, // <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5>
+ 3304933297U, // <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7>
+ 2297833393U, // <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7>
+ 2297831531U, // <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1556938854U, // <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1624053550U, // <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 336380006U, // <2,2,u,2>: Cost 1 vspltisw2 LHS
+ 135094374U, // <2,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 1556942134U, // <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1624053914U, // <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 1157056442U, // <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2660594859U, // <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u>
+ 135094379U, // <2,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611448320U, // <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537706598U, // <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2689835181U, // <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2689835260U, // <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611448658U, // <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2732966354U, // <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2732966390U, // <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660603052U, // <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0>
+ 537707165U, // <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689835748U, // <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611449140U, // <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611449238U, // <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 3763577805U, // <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1>
+ 2689836112U, // <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689836143U, // <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689836239U, // <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 3366881210U, // <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7>
+ 1616094588U, // <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2689836493U, // <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0>
+ 2685191711U, // <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611449960U, // <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611450022U, // <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2689836822U, // <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5>
+ 2689836904U, // <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6>
+ 1611450298U, // <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2295138234U, // <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7>
+ 1611450456U, // <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3>
+ 1213440918U, // <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282538527U, // <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1>
+ 1557022322U, // <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3>
+ 1208796786U, // <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1213440922U, // <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282538531U, // <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2287188094U, // <2,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1213441978U, // <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 1208796791U, // <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u>
+ 1551056998U, // <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS
+ 1551057818U, // <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4>
+ 2624800360U, // <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2>
+ 2624800918U, // <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2>
+ 1551060278U, // <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS
+ 537709878U, // <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2732969337U, // <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2660635824U, // <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4>
+ 537710121U, // <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689838664U, // <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2732969615U, // <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2732969707U, // <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3>
+ 3763580721U, // <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1>
+ 2689839028U, // <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659228164U, // <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659228258U, // <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 3364259770U, // <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7>
+ 1659228420U, // <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2230798486U, // <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2>
+ 2732970407U, // <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1659228666U, // <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2230798748U, // <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3>
+ 2230798850U, // <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6>
+ 2732970731U, // <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659228984U, // <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659229006U, // <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1659229087U, // <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1>
+ 1659229178U, // <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2726999125U, // <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3>
+ 2727662758U, // <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3>
+ 2732971235U, // <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1>
+ 1659229542U, // <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2732971446U, // <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2732971484U, // <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7>
+ 1659229804U, // <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659229826U, // <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1208837014U, // <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 537712430U, // <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1616099205U, // <2,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,0>
+ 1208837746U, // <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1208837018U, // <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 537712794U, // <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1616099536U, // <2,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1208838074U, // <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 537712997U, // <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 3771547648U, // <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0>
+ 2697805926U, // <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS
+ 3770884269U, // <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2>
+ 3806716164U, // <2,4,0,3>: Cost 4 vsldoi8 <u,1,2,4>, <0,3,1,u>
+ 3771547986U, // <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5>
+ 2226761014U, // <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3853462427U, // <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1>
+ 3867102116U, // <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1>
+ 2226761257U, // <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3849186231U, // <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2>
+ 3301207010U, // <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0>
+ 3766240150U, // <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0>
+ 3766240226U, // <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4>
+ 3301207248U, // <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4>
+ 2227432758U, // <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 3758941400U, // <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7>
+ 3768894758U, // <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4>
+ 2227433001U, // <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 2228030354U, // <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1>
+ 3770885657U, // <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4>
+ 2697807466U, // <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4>
+ 3368880468U, // <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3>
+ 2228030672U, // <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4>
+ 1154288950U, // <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 3771549617U, // <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7>
+ 3368880796U, // <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7>
+ 1154289193U, // <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 2636808294U, // <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS
+ 2287181861U, // <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1>
+ 2228866102U, // <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3>
+ 2636810580U, // <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3>
+ 1256574160U, // <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1213441742U, // <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2228866430U, // <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7>
+ 2660701368U, // <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3>
+ 1213441745U, // <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3704586342U, // <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS
+ 3782831051U, // <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4>
+ 3704587900U, // <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4>
+ 3368896123U, // <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3>
+ 2793360592U, // <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4>
+ 2697809206U, // <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 3303198078U, // <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7>
+ 3867102444U, // <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5>
+ 2697809449U, // <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 2630852710U, // <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS
+ 2624881572U, // <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5>
+ 2630854269U, // <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5>
+ 2666686677U, // <2,4,5,3>: Cost 3 vsldoi4 <u,2,4,5>, <3,0,u,2>
+ 2630855990U, // <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS
+ 2230127926U, // <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS
+ 1691159862U, // <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 3867102520U, // <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0>
+ 1691159880U, // <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230799250U, // <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1>
+ 3304541130U, // <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3>
+ 2230799417U, // <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6>
+ 3304541323U, // <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7>
+ 2230799568U, // <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4>
+ 1157057846U, // <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3304541566U, // <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7>
+ 3798758243U, // <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4>
+ 1157058089U, // <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3806721018U, // <2,4,7,0>: Cost 4 vsldoi8 <u,1,2,4>, <7,0,1,2>
+ 3853831590U, // <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2>
+ 3801412775U, // <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4>
+ 3802076408U, // <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4>
+ 3401436368U, // <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4>
+ 2793360840U, // <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0>
+ 3804067307U, // <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4>
+ 3867102682U, // <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0>
+ 2793360867U, // <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0>
+ 2630877286U, // <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS
+ 2282580144U, // <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2630878848U, // <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u>
+ 2636851545U, // <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u>
+ 1256615120U, // <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1208837838U, // <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 1691160105U, // <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2660742333U, // <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u>
+ 1208837841U, // <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3766910976U, // <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0>
+ 2693169254U, // <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3760939181U, // <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2>
+ 3843214936U, // <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0>
+ 3760939355U, // <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5>
+ 3867102827U, // <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1>
+ 3867102836U, // <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1>
+ 3867102844U, // <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0>
+ 2693169821U, // <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3766911724U, // <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1>
+ 3766911796U, // <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1>
+ 2693170070U, // <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0>
+ 3384798262U, // <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3>
+ 2693170228U, // <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5>
+ 3301208068U, // <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5>
+ 3366879607U, // <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6>
+ 3867102925U, // <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0>
+ 2695824760U, // <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5>
+ 2642845798U, // <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS
+ 2295139218U, // <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1>
+ 2699142760U, // <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2>
+ 3766912678U, // <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1>
+ 2699142925U, // <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5>
+ 2228031492U, // <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5>
+ 2295138818U, // <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6>
+ 3368879347U, // <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7>
+ 2295138820U, // <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u>
+ 2287184866U, // <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256573842U, // <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642855630U, // <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5>
+ 2287182763U, // <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287184870U, // <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256574170U, // <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1213442562U, // <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287183091U, // <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1213442564U, // <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3716604006U, // <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS
+ 3716604822U, // <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0>
+ 3766914099U, // <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0>
+ 3368895403U, // <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3>
+ 3716607031U, // <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4>
+ 2693172534U, // <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3363588610U, // <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6>
+ 3368895731U, // <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7>
+ 2693172777U, // <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3704668262U, // <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS
+ 3704669078U, // <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0>
+ 3704669830U, // <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5>
+ 3364259460U, // <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3>
+ 3704671542U, // <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS
+ 2793361412U, // <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 3364258167U, // <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6>
+ 3867103249U, // <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0>
+ 2793361412U, // <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 2642878566U, // <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS
+ 3386166810U, // <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1>
+ 2723033594U, // <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3>
+ 3848523842U, // <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4>
+ 2723033713U, // <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5>
+ 2230800388U, // <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5>
+ 2230800482U, // <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0>
+ 2785841252U, // <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2>
+ 2785914989U, // <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2>
+ 3796775930U, // <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2>
+ 3800757335U, // <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5>
+ 3853463689U, // <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3>
+ 3796776218U, // <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2>
+ 3796776294U, // <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6>
+ 3803411867U, // <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5>
+ 3371575081U, // <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6>
+ 3796776516U, // <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3>
+ 3371575083U, // <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u>
+ 2287225826U, // <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256614802U, // <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642896590U, // <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5>
+ 2287223723U, // <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287225830U, // <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256615130U, // <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1208838658U, // <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287224051U, // <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1208838660U, // <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3772227584U, // <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0>
+ 2698485862U, // <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3759620282U, // <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6>
+ 3710675299U, // <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0>
+ 3767583058U, // <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5>
+ 3378153265U, // <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5>
+ 3865186637U, // <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1>
+ 2330291510U, // <2,6,0,7>: Cost 3 vmrglw <u,1,2,0>, RHS
+ 2698486429U, // <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3734569062U, // <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS
+ 3764929346U, // <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6>
+ 3772228502U, // <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0>
+ 3734571158U, // <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2>
+ 3734572342U, // <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS
+ 3767583878U, // <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6>
+ 3768247511U, // <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6>
+ 2293140790U, // <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 2293140791U, // <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 3704717414U, // <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS
+ 3395424589U, // <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1>
+ 2228031993U, // <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2>
+ 2698487485U, // <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6>
+ 3704720694U, // <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS
+ 3773556575U, // <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6>
+ 2698487738U, // <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7>
+ 1221397814U, // <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 1221397815U, // <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 2636955750U, // <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS
+ 2330314217U, // <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2636957626U, // <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7>
+ 2287184230U, // <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636959030U, // <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS
+ 2648903448U, // <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3>
+ 1256575800U, // <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135056694U, // <2,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135056695U, // <2,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 3710705766U, // <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS
+ 3698762677U, // <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4>
+ 3710707389U, // <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6>
+ 3710708071U, // <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4>
+ 3710709046U, // <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS
+ 2698489142U, // <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 3796782457U, // <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2>
+ 2295156022U, // <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 2295156023U, // <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 3303870753U, // <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2>
+ 3788820134U, // <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6>
+ 3779530520U, // <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3>
+ 3303871026U, // <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5>
+ 3303871117U, // <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6>
+ 3791474666U, // <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6>
+ 3792138299U, // <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6>
+ 2290519350U, // <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2290519351U, // <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2631008358U, // <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS
+ 3372893673U, // <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1>
+ 2791445264U, // <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2>
+ 2230800968U, // <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0>
+ 2631011638U, // <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS
+ 3372894001U, // <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5>
+ 2793362232U, // <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6>
+ 2295835958U, // <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2295835959U, // <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2793362254U, // <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1>
+ 2792035160U, // <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2>
+ 2792108897U, // <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2>
+ 2769474408U, // <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0>
+ 2793362294U, // <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5>
+ 3371575089U, // <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5>
+ 2792403845U, // <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2>
+ 2297834806U, // <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2297834807U, // <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2636996710U, // <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS
+ 2698491694U, // <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 2636998631U, // <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7>
+ 2282580326U, // <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636999990U, // <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS
+ 2698492058U, // <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 1256616760U, // <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135097654U, // <2,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135097655U, // <2,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 2666864742U, // <2,7,0,0>: Cost 3 vsldoi4 <u,2,7,0>, LHS
+ 1719620602U, // <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2>
+ 3768254637U, // <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2>
+ 3393417722U, // <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3>
+ 2666868022U, // <2,7,0,4>: Cost 3 vsldoi4 <u,2,7,0>, RHS
+ 3867104290U, // <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6>
+ 3728667127U, // <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0>
+ 2666869817U, // <2,7,0,7>: Cost 3 vsldoi4 <u,2,7,0>, <7,0,u,2>
+ 1720136761U, // <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2>
+ 3728670822U, // <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS
+ 3774227252U, // <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1>
+ 3774227350U, // <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0>
+ 2323001850U, // <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 3728674102U, // <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS
+ 3774227567U, // <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1>
+ 2694513880U, // <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7>
+ 3396744002U, // <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7>
+ 2323001850U, // <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 2654937190U, // <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS
+ 3728679732U, // <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1>
+ 2700486248U, // <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2>
+ 2321682938U, // <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3>
+ 2654940470U, // <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS
+ 3859584196U, // <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6>
+ 2700486577U, // <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7>
+ 2228033132U, // <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7>
+ 2701813843U, // <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7>
+ 1581203558U, // <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 2654946100U, // <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1>
+ 2637031354U, // <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7>
+ 1256575482U, // <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581206838U, // <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS
+ 2654949380U, // <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5>
+ 1581208058U, // <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3>
+ 1256575810U, // <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581209390U, // <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 3728695398U, // <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS
+ 3869758782U, // <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2>
+ 3728696936U, // <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2>
+ 3393450490U, // <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3>
+ 3728698678U, // <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS
+ 2700487990U, // <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3728699899U, // <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4>
+ 3867104626U, // <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0>
+ 2700488233U, // <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3855160709U, // <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1>
+ 3728704406U, // <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0>
+ 3370233956U, // <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2>
+ 2320380410U, // <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 3728706870U, // <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS
+ 3867104694U, // <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5>
+ 3792146492U, // <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7>
+ 3394122562U, // <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7>
+ 2320380410U, // <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 2230801402U, // <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2>
+ 3768258984U, // <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2>
+ 2730349050U, // <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 3372894575U, // <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3>
+ 2230801766U, // <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6>
+ 3304543670U, // <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5>
+ 3728716285U, // <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6>
+ 2230802028U, // <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7>
+ 2730349050U, // <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 2793362983U, // <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1>
+ 3728721112U, // <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7>
+ 3371574933U, // <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2>
+ 2327695866U, // <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3>
+ 3728723254U, // <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS
+ 3371574855U, // <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5>
+ 2730350062U, // <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7>
+ 2793363052U, // <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7>
+ 2798671471U, // <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1>
+ 1581244518U, // <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1724929666U, // <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2>
+ 2637072314U, // <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7>
+ 1256616442U, // <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581247798U, // <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS
+ 2700490906U, // <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 1581249023U, // <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u>
+ 1256616770U, // <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581250350U, // <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1611489280U, // <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537747563U, // <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685231277U, // <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685231356U, // <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611489618U, // <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2226763930U, // <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 2733007350U, // <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660971737U, // <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0>
+ 537748125U, // <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689876708U, // <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611490100U, // <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611490198U, // <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 2293137564U, // <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 2689877072U, // <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689877103U, // <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689877199U, // <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2293140808U, // <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 1616135548U, // <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 1556938854U, // <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1154291502U, // <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 336380006U, // <2,u,2,2>: Cost 1 vspltisw2 LHS
+ 1611490982U, // <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 1556942134U, // <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1154291866U, // <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 1611491258U, // <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1221397832U, // <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 336380006U, // <2,u,2,u>: Cost 1 vspltisw2 LHS
+ 1611491478U, // <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2>
+ 1213440073U, // <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1>
+ 1213442261U, // <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135053468U, // <2,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 1611491842U, // <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6>
+ 1213440401U, // <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5>
+ 1213442589U, // <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135056712U, // <2,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135053473U, // <2,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1551425638U, // <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS
+ 1551426503U, // <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4>
+ 2625169000U, // <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2>
+ 2625169558U, // <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2>
+ 1551428918U, // <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS
+ 537750838U, // <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2733010297U, // <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2295156040U, // <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 537751081U, // <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689879624U, // <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2230130478U, // <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2631149217U, // <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5>
+ 2290516124U, // <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2689879988U, // <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659269124U, // <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1691162778U, // <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2290519368U, // <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 1691162796U, // <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230802131U, // <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, <u,0,1,2>
+ 1157060398U, // <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659269626U, // <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2764904656U, // <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <u,6,3,7>
+ 2230802495U, // <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, <u,4,5,6>
+ 1157060762U, // <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 1659269944U, // <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659269966U, // <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1157060965U, // <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659270138U, // <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2727040090U, // <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u>
+ 2727703723U, // <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u>
+ 2297831580U, // <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1659270502U, // <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2733012406U, // <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2730358255U, // <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u>
+ 1659270764U, // <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659270786U, // <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1213481923U, // <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0>
+ 537753390U, // <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 336380006U, // <2,u,u,2>: Cost 1 vspltisw2 LHS
+ 135094428U, // <2,u,u,3>: Cost 1 vmrglw LHS, LHS
+ 1213481927U, // <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4>
+ 537753754U, // <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1208838685U, // <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135097672U, // <2,u,u,7>: Cost 1 vmrglw LHS, RHS
+ 135094433U, // <2,u,u,u>: Cost 1 vmrglw LHS, LHS
+ 1678557184U, // <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1678557194U, // <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2631181989U, // <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0>
+ 2289223984U, // <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3>
+ 2756943909U, // <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1>
+ 3362965729U, // <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5>
+ 3362966054U, // <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6>
+ 2289224312U, // <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7>
+ 1683202121U, // <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1>
+ 1557446758U, // <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS
+ 2752741467U, // <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1>
+ 604815462U, // <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2631190676U, // <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0>
+ 1557450038U, // <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS
+ 2667024388U, // <3,0,1,5>: Cost 3 vsldoi4 <u,3,0,1>, <5,5,5,5>
+ 2800074894U, // <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7>
+ 2661053667U, // <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1>
+ 604815516U, // <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696521165U, // <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0>
+ 2752741549U, // <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2>
+ 2691876456U, // <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2>
+ 2691876518U, // <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1>
+ 3830685895U, // <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1>
+ 3765618536U, // <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6>
+ 2691876794U, // <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7>
+ 2701166596U, // <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0>
+ 2756944108U, // <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2>
+ 2691877014U, // <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2>
+ 1161003110U, // <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2691877168U, // <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3>
+ 2691877246U, // <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0>
+ 2691877378U, // <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6>
+ 3765619238U, // <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6>
+ 2691877496U, // <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 3368962680U, // <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7>
+ 1161003677U, // <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2289254400U, // <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0>
+ 1678557522U, // <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2631214761U, // <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4>
+ 2235580672U, // <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 2756944237U, // <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5>
+ 1618136374U, // <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 3309322742U, // <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7>
+ 3362998904U, // <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7>
+ 1683202449U, // <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 3765620296U, // <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2>
+ 2752299427U, // <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5>
+ 3789508346U, // <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0>
+ 3403486842U, // <3,0,5,3>: Cost 4 vmrglw <u,0,3,5>, <7,u,0,3>
+ 3765620660U, // <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6>
+ 2733682692U, // <3,0,5,5>: Cost 3 vsldoi8 <u,2,3,0>, <5,5,5,5>
+ 2800075218U, // <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7>
+ 3873817044U, // <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0>
+ 2800075234U, // <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5>
+ 2752299501U, // <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7>
+ 2236547174U, // <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2733683194U, // <3,0,6,2>: Cost 3 vsldoi8 <u,2,3,0>, <6,2,7,3>
+ 3844473352U, // <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7>
+ 3310289234U, // <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5>
+ 3873817114U, // <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7>
+ 2733683512U, // <3,0,6,6>: Cost 3 vsldoi8 <u,2,3,0>, <6,6,6,6>
+ 2725057384U, // <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0>
+ 2236547741U, // <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2297905152U, // <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0>
+ 2297906854U, // <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1>
+ 2727711916U, // <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0>
+ 3371649328U, // <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3>
+ 2733684070U, // <3,0,7,4>: Cost 3 vsldoi8 <u,2,3,0>, <7,4,5,6>
+ 3734843490U, // <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0>
+ 3798799895U, // <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3>
+ 2733684332U, // <3,0,7,7>: Cost 3 vsldoi8 <u,2,3,0>, <7,7,7,7>
+ 2297906861U, // <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u>
+ 1557504102U, // <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS
+ 1678557842U, // <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1>
+ 604816029U, // <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2691880892U, // <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, <u,3,0,1>
+ 1557507382U, // <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS
+ 1618139290U, // <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 2691881168U, // <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, <u,6,3,7>
+ 2661111018U, // <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u>
+ 604816083U, // <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2619310332U, // <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0>
+ 2756944612U, // <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2>
+ 2289221724U, // <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2>
+ 2619312278U, // <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2>
+ 2619313462U, // <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS
+ 2289221970U, // <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5>
+ 2232599768U, // <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7>
+ 3362964687U, // <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7>
+ 2619316014U, // <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS
+ 2756944683U, // <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1>
+ 1678558004U, // <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2691883927U, // <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1>
+ 3826631496U, // <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3>
+ 2756944723U, // <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5>
+ 2756944732U, // <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 3830686561U, // <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1>
+ 3734869228U, // <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1>
+ 1678558004U, // <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2696529358U, // <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1>
+ 2756944775U, // <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 2294548630U, // <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2>
+ 1678558102U, // <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0>
+ 2631273782U, // <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS
+ 2756944811U, // <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 3830686644U, // <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3>
+ 2800075706U, // <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0>
+ 1679000515U, // <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0>
+ 2619334911U, // <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3>
+ 2295218186U, // <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1>
+ 2293229718U, // <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2>
+ 2619337116U, // <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3>
+ 2619338038U, // <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS
+ 2295218514U, // <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5>
+ 3830686729U, // <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7>
+ 3368961231U, // <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7>
+ 2619340590U, // <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS
+ 2619343104U, // <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4>
+ 2289254410U, // <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1>
+ 2289256598U, // <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2>
+ 2619345410U, // <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6>
+ 2619346230U, // <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS
+ 2756944976U, // <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6>
+ 3362996401U, // <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6>
+ 3362997455U, // <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7>
+ 2619348782U, // <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS
+ 2756945007U, // <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1>
+ 3830686840U, // <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1>
+ 3358361750U, // <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2>
+ 3830686857U, // <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0>
+ 2756945047U, // <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5>
+ 2294571346U, // <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5>
+ 3806105698U, // <3,1,5,6>: Cost 4 vsldoi8 <u,0,3,1>, <5,6,7,0>
+ 3873817774U, // <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1>
+ 2756945079U, // <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1>
+ 3830686912U, // <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1>
+ 2756945103U, // <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2236547990U, // <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0>
+ 3826631905U, // <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7>
+ 3830686952U, // <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5>
+ 2756945139U, // <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 3830686972U, // <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7>
+ 2800076030U, // <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0>
+ 2756945166U, // <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7>
+ 3699081318U, // <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS
+ 2297905162U, // <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1>
+ 2297907350U, // <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2>
+ 3365675182U, // <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3>
+ 3699084598U, // <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS
+ 2297905490U, // <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5>
+ 2297905329U, // <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 3368330447U, // <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7>
+ 2297905169U, // <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u>
+ 2619375876U, // <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u>
+ 1678558004U, // <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2289289366U, // <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2>
+ 1679000956U, // <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0>
+ 2619378998U, // <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS
+ 2756945297U, // <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3>
+ 2297905329U, // <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 2800076192U, // <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0>
+ 1683203497U, // <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0>
+ 3362964203U, // <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0>
+ 2289222380U, // <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1>
+ 2289222462U, // <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2>
+ 1215479910U, // <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3362964207U, // <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4>
+ 2289222708U, // <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2232600506U, // <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7>
+ 3396142296U, // <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7>
+ 1215479915U, // <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3699105894U, // <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS
+ 3765633844U, // <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1>
+ 2691892120U, // <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2>
+ 2752300575U, // <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1>
+ 3699109174U, // <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS
+ 3830687280U, // <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0>
+ 3830687289U, // <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0>
+ 3874260548U, // <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2>
+ 2752742988U, // <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1>
+ 2631344230U, // <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS
+ 2697201184U, // <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2>
+ 1678558824U, // <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678558834U, // <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 2631347510U, // <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS
+ 3368953613U, // <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5>
+ 2234304442U, // <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7>
+ 3368953777U, // <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7>
+ 1679001247U, // <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3>
+ 1678558886U, // <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1>
+ 2752300719U, // <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1>
+ 2752300729U, // <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2>
+ 1221476454U, // <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS
+ 1678558926U, // <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5>
+ 2800076503U, // <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5>
+ 2234746810U, // <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7>
+ 2800076516U, // <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0>
+ 1678558958U, // <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1>
+ 3699130470U, // <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS
+ 3362996972U, // <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1>
+ 2289256040U, // <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2>
+ 1215512678U, // <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3362998676U, // <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4>
+ 2691894582U, // <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2235582394U, // <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7>
+ 3734967544U, // <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4>
+ 1215512683U, // <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3705110630U, // <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS
+ 3368313985U, // <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1>
+ 3368314472U, // <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2>
+ 2756945768U, // <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6>
+ 3705113910U, // <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS
+ 3310061416U, // <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6>
+ 3310135226U, // <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7>
+ 3370305457U, // <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7>
+ 2752743317U, // <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6>
+ 2631376998U, // <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS
+ 3705119540U, // <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1>
+ 2631378621U, // <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6>
+ 1678559162U, // <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2631380278U, // <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS
+ 3370976956U, // <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5>
+ 2237065146U, // <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7>
+ 3798815594U, // <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2>
+ 1679001575U, // <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 2800076778U, // <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1>
+ 3371647724U, // <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1>
+ 2297906792U, // <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2>
+ 1224163430U, // <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 3705130294U, // <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS
+ 3371648052U, // <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5>
+ 2297906877U, // <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6>
+ 3371648702U, // <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7>
+ 1224163435U, // <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1679001659U, // <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1>
+ 2752743492U, // <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1>
+ 1678558824U, // <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678559320U, // <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3>
+ 1679001699U, // <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5>
+ 2691897498U, // <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2237908922U, // <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7>
+ 2800519289U, // <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0>
+ 1679001731U, // <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1>
+ 1215480726U, // <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1678559382U, // <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2>
+ 2631403200U, // <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0>
+ 2289223282U, // <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3>
+ 2752301232U, // <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1>
+ 3362965027U, // <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5>
+ 3362965352U, // <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6>
+ 2289223610U, // <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7>
+ 1678559445U, // <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2>
+ 3830687964U, // <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0>
+ 2752301286U, // <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1>
+ 2752301297U, // <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3>
+ 2305157532U, // <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3>
+ 3830688000U, // <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0>
+ 3830688009U, // <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0>
+ 3830688019U, // <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1>
+ 3362973626U, // <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7>
+ 2752743719U, // <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3>
+ 2631417958U, // <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS
+ 3826043193U, // <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3>
+ 1624131186U, // <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3>
+ 2752301384U, // <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0>
+ 2631421238U, // <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS
+ 3826485602U, // <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u>
+ 2752301414U, // <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3>
+ 2771249519U, // <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3>
+ 1628112984U, // <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3>
+ 1563656294U, // <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS
+ 2301855911U, // <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1>
+ 2697873730U, // <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3>
+ 403488870U, // <3,3,3,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 2301856239U, // <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5>
+ 2697874067U, // <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7>
+ 2295220154U, // <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7>
+ 403488870U, // <3,3,3,u>: Cost 1 vspltisw3 LHS
+ 2289255318U, // <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0>
+ 2631435162U, // <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4>
+ 2631435972U, // <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4>
+ 2289256050U, // <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3>
+ 1215513498U, // <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679002114U, // <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6>
+ 3362998120U, // <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6>
+ 2289256378U, // <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7>
+ 1679002141U, // <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6>
+ 3831130657U, // <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1>
+ 3376277671U, // <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1>
+ 3771617012U, // <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3>
+ 2302536092U, // <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3>
+ 3831130697U, // <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5>
+ 2294572579U, // <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5>
+ 2800519773U, // <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7>
+ 3368314810U, // <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7>
+ 2800519791U, // <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7>
+ 2800077432U, // <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7>
+ 3310291185U, // <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3>
+ 2789165706U, // <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7>
+ 2764982931U, // <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7>
+ 2800077468U, // <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7>
+ 3873819301U, // <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7>
+ 2297235304U, // <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6>
+ 2725081963U, // <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3>
+ 2725745596U, // <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3>
+ 2631458918U, // <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS
+ 3705201460U, // <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1>
+ 2631460551U, // <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7>
+ 2297906802U, // <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3>
+ 2631462198U, // <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS
+ 3371648547U, // <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5>
+ 3371648548U, // <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6>
+ 1224165306U, // <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1224165306U, // <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1215480726U, // <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1679002398U, // <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2>
+ 1659967368U, // <3,3,u,2>: Cost 2 vsldoi8 <u,2,3,3>, <u,2,3,3>
+ 403488870U, // <3,3,u,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 1679002438U, // <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6>
+ 2756946764U, // <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3>
+ 1224165306U, // <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 403488870U, // <3,3,u,u>: Cost 1 vspltisw3 LHS
+ 2691907584U, // <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0>
+ 1618165862U, // <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631476937U, // <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0>
+ 2232601732U, // <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0>
+ 2691907922U, // <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5>
+ 1158860086U, // <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 3306343806U, // <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7>
+ 3366947484U, // <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7>
+ 1618166429U, // <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631483494U, // <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS
+ 2691908404U, // <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1>
+ 1618166682U, // <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4>
+ 3765650393U, // <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4>
+ 2631486774U, // <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS
+ 2756946914U, // <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0>
+ 3765650639U, // <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7>
+ 3735090439U, // <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1>
+ 1622148480U, // <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4>
+ 3765650893U, // <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0>
+ 3831131154U, // <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3>
+ 2691909224U, // <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2>
+ 2691909286U, // <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1>
+ 2699208469U, // <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4>
+ 2233863478U, // <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS
+ 2691909562U, // <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7>
+ 2701199368U, // <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4>
+ 2691909691U, // <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1>
+ 2691909782U, // <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2>
+ 3765651686U, // <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1>
+ 2691909972U, // <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3>
+ 2691910044U, // <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1>
+ 1161006390U, // <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691910300U, // <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 3368962716U, // <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7>
+ 1161006633U, // <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2631508070U, // <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS
+ 2631508890U, // <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4>
+ 2631509709U, // <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4>
+ 2289256788U, // <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3>
+ 1726336208U, // <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4>
+ 1618169142U, // <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 3362998858U, // <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6>
+ 2289257116U, // <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7>
+ 1618169385U, // <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 1557774438U, // <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS
+ 2631516980U, // <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1>
+ 1557776078U, // <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5>
+ 2631518358U, // <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2>
+ 1557777718U, // <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS
+ 2296563406U, // <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5>
+ 604818742U, // <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661381387U, // <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5>
+ 604818760U, // <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 3705266278U, // <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS
+ 3831131482U, // <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7>
+ 2733715962U, // <3,4,6,2>: Cost 3 vsldoi8 <u,2,3,4>, <6,2,7,3>
+ 3844771180U, // <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7>
+ 2800078197U, // <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7>
+ 2236550454U, // <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716280U, // <3,4,6,6>: Cost 3 vsldoi8 <u,2,3,4>, <6,6,6,6>
+ 2725090156U, // <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4>
+ 2236550697U, // <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716474U, // <3,4,7,0>: Cost 3 vsldoi8 <u,2,3,4>, <7,0,1,2>
+ 3371647013U, // <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1>
+ 2727744688U, // <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4>
+ 3371649364U, // <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3>
+ 2733716838U, // <3,4,7,4>: Cost 3 vsldoi8 <u,2,3,4>, <7,4,5,6>
+ 2297906894U, // <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5>
+ 3371647180U, // <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6>
+ 2733717100U, // <3,4,7,7>: Cost 3 vsldoi8 <u,2,3,4>, <7,7,7,7>
+ 2297906897U, // <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u>
+ 1557799014U, // <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS
+ 1618171694U, // <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 1557800657U, // <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u>
+ 2691913660U, // <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, <u,3,0,1>
+ 1557802294U, // <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS
+ 1618172058U, // <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 604818985U, // <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661405966U, // <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u>
+ 604819003U, // <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2643492966U, // <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS
+ 2756947528U, // <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2>
+ 2331029019U, // <3,5,0,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2643495062U, // <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2>
+ 2756947554U, // <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1>
+ 2800078443U, // <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1>
+ 2289224194U, // <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6>
+ 3362964723U, // <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7>
+ 2756947590U, // <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1>
+ 2800078479U, // <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1>
+ 2333027218U, // <3,5,1,1>: Cost 3 vmrglw <u,5,3,1>, <4,0,5,1>
+ 2691916699U, // <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5>
+ 3832901294U, // <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5>
+ 2800078519U, // <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5>
+ 3830689467U, // <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0>
+ 3830689481U, // <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5>
+ 3873820365U, // <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0>
+ 2800078551U, // <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1>
+ 3770967487U, // <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4>
+ 2697225763U, // <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5>
+ 3830689523U, // <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2>
+ 2699216590U, // <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5>
+ 2699216662U, // <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5>
+ 2783047439U, // <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3>
+ 2783121176U, // <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3>
+ 3856936737U, // <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3>
+ 2701871194U, // <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5>
+ 2643517542U, // <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS
+ 2331052946U, // <3,5,3,1>: Cost 3 vmrglw <u,2,3,3>, <4,0,5,1>
+ 3699345010U, // <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3>
+ 2705189276U, // <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3>
+ 2705189359U, // <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5>
+ 2331053274U, // <3,5,3,5>: Cost 3 vmrglw <u,2,3,3>, <4,4,5,5>
+ 2295220738U, // <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6>
+ 3368961267U, // <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7>
+ 2295220740U, // <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u>
+ 2643525734U, // <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS
+ 2331061138U, // <3,5,4,1>: Cost 3 vmrglw <u,2,3,4>, <4,0,5,1>
+ 2235584280U, // <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3>
+ 2643528194U, // <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6>
+ 2735713498U, // <3,5,4,4>: Cost 3 vsldoi8 <u,5,3,5>, <4,4,5,5>
+ 2756947892U, // <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6>
+ 2289256962U, // <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6>
+ 3362997491U, // <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7>
+ 2756947919U, // <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6>
+ 2800078803U, // <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1>
+ 2800078812U, // <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1>
+ 2631591639U, // <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5>
+ 3832901616U, // <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3>
+ 2800078843U, // <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5>
+ 1726337028U, // <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078862U, // <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6>
+ 3368314099U, // <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7>
+ 1726337028U, // <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078884U, // <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1>
+ 2800078899U, // <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7>
+ 2631599832U, // <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6>
+ 2800078914U, // <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4>
+ 2800078924U, // <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5>
+ 2800078935U, // <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7>
+ 2297235970U, // <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6>
+ 1726337122U, // <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0>
+ 1726337131U, // <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0>
+ 3699376230U, // <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS
+ 2333739922U, // <3,5,7,1>: Cost 3 vmrglw <u,6,3,7>, <4,0,5,1>
+ 3699378106U, // <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7>
+ 3371647915U, // <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3>
+ 3699379510U, // <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS
+ 2333740250U, // <3,5,7,5>: Cost 3 vmrglw <u,6,3,7>, <4,4,5,5>
+ 2297907714U, // <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6>
+ 3370984691U, // <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7>
+ 2297907716U, // <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u>
+ 2800079046U, // <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1>
+ 2756948176U, // <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2>
+ 2331029019U, // <3,5,u,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2800079076U, // <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4>
+ 2800079085U, // <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4>
+ 1726337028U, // <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2289289730U, // <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6>
+ 1726337284U, // <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0>
+ 1726337293U, // <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0>
+ 3773628416U, // <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0>
+ 2699886694U, // <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789167401U, // <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1>
+ 3362965862U, // <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3>
+ 3773628754U, // <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5>
+ 3723284326U, // <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0>
+ 2800079181U, // <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1>
+ 1215483190U, // <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1215483191U, // <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 3873821032U, // <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1>
+ 3773629236U, // <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1>
+ 2691924892U, // <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6>
+ 3830690184U, // <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6>
+ 3873821072U, // <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5>
+ 3873821082U, // <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6>
+ 3403453240U, // <3,6,1,6>: Cost 4 vmrglw <u,0,3,1>, <6,6,6,6>
+ 2289233206U, // <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2289233207U, // <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2661498982U, // <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS
+ 3770975780U, // <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6>
+ 2631640797U, // <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2>
+ 3771639485U, // <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6>
+ 2661502262U, // <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS
+ 2699888488U, // <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6>
+ 2661503482U, // <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3>
+ 1715425786U, // <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3>
+ 1715499523U, // <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3>
+ 3773630614U, // <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2>
+ 3372942825U, // <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1>
+ 2234749434U, // <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3>
+ 3368962406U, // <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3>
+ 2699889154U, // <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6>
+ 3773631068U, // <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6>
+ 2331054904U, // <3,6,3,6>: Cost 3 vmrglw <u,2,3,3>, <6,6,6,6>
+ 1221479734U, // <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 1221479735U, // <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 2235584801U, // <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2>
+ 3717342106U, // <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4>
+ 2789167729U, // <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5>
+ 2235585074U, // <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5>
+ 2235585165U, // <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6>
+ 2699889974U, // <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 2800079509U, // <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5>
+ 1215515958U, // <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1215515959U, // <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 3873821356U, // <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1>
+ 3372959209U, // <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1>
+ 3862909629U, // <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0>
+ 3773632358U, // <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0>
+ 3873821396U, // <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5>
+ 3873821405U, // <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5>
+ 3862909672U, // <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7>
+ 2294574390U, // <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2294574391U, // <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2800079613U, // <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1>
+ 3873821446U, // <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1>
+ 2789167888U, // <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2>
+ 3844920090U, // <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3>
+ 2800079653U, // <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5>
+ 3723333484U, // <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6>
+ 1726337848U, // <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726337858U, // <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7>
+ 1726337867U, // <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7>
+ 1726337870U, // <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1>
+ 2297906665U, // <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1>
+ 2792117090U, // <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3>
+ 2297907558U, // <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3>
+ 1726337910U, // <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5>
+ 2297906993U, // <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5>
+ 2297906832U, // <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6>
+ 1224166710U, // <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224166711U, // <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1726337951U, // <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1>
+ 2699892526U, // <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789168049U, // <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1>
+ 2792854460U, // <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3>
+ 1726337991U, // <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5>
+ 2699892890U, // <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 1726337848U, // <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1215548726U, // <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 1215548727U, // <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 2700558336U, // <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0>
+ 1626816614U, // <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700558513U, // <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6>
+ 2331030010U, // <3,7,0,3>: Cost 3 vmrglw <u,2,3,0>, <6,2,7,3>
+ 2700558674U, // <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5>
+ 2800079906U, // <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6>
+ 2655588936U, // <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0>
+ 2800079919U, // <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1>
+ 1626817181U, // <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 3774300899U, // <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1>
+ 2700559156U, // <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1>
+ 2700559254U, // <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0>
+ 3774301148U, // <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7>
+ 3774301227U, // <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5>
+ 3774301295U, // <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1>
+ 3768329441U, // <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7>
+ 3403453250U, // <3,7,1,7>: Cost 4 vmrglw <u,0,3,1>, <6,6,7,7>
+ 2700559740U, // <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0>
+ 2700559849U, // <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1>
+ 3770983973U, // <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7>
+ 2700559976U, // <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2>
+ 2698569415U, // <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7>
+ 2700560177U, // <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5>
+ 3773638505U, // <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7>
+ 1626818490U, // <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7>
+ 2795140307U, // <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3>
+ 1628145756U, // <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7>
+ 2700560534U, // <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2>
+ 3774302438U, // <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1>
+ 2700560742U, // <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3>
+ 2700560796U, // <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3>
+ 2700560898U, // <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6>
+ 3774302821U, // <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6>
+ 2700561079U, // <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7>
+ 2700561091U, // <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1>
+ 2700561182U, // <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2>
+ 2655617126U, // <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS
+ 3774303178U, // <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3>
+ 2655619002U, // <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7>
+ 2331062778U, // <3,7,4,3>: Cost 3 vmrglw <u,2,3,4>, <6,2,7,3>
+ 2655620406U, // <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS
+ 1626819894U, // <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 2655621708U, // <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4>
+ 2800080247U, // <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5>
+ 1626820137U, // <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 3774303816U, // <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2>
+ 3873822093U, // <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0>
+ 3774303998U, // <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4>
+ 3862910368U, // <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1>
+ 3774304180U, // <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6>
+ 2800080310U, // <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5>
+ 2800080321U, // <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7>
+ 3873822147U, // <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0>
+ 2800080339U, // <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7>
+ 2800080348U, // <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7>
+ 3873822181U, // <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7>
+ 2789168622U, // <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7>
+ 2700563016U, // <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0>
+ 2800080384U, // <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7>
+ 3862910472U, // <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6>
+ 2700563256U, // <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6>
+ 2800080404U, // <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0>
+ 2793149988U, // <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7>
+ 2637725798U, // <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS
+ 3371649227U, // <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1>
+ 2637727674U, // <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7>
+ 2297907567U, // <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3>
+ 2637729078U, // <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS
+ 3371649312U, // <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5>
+ 2655646287U, // <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7>
+ 1726338668U, // <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1726338668U, // <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 2700564179U, // <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, <u,0,1,2>
+ 1626822446U, // <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700564357U, // <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, <u,2,3,0>
+ 2700564412U, // <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, <u,3,0,1>
+ 2700564543U, // <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, <u,4,5,6>
+ 1626822810U, // <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 1662654672U, // <3,7,u,6>: Cost 2 vsldoi8 <u,6,3,7>, <u,6,3,7>
+ 1726338668U, // <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1626823013U, // <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 1678557184U, // <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1679005395U, // <3,u,0,1>: Cost 2 vsldoi12 LHS, <u,0,1,2>
+ 2289221787U, // <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2>
+ 1215479964U, // <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 2752747245U, // <3,u,0,4>: Cost 3 vsldoi12 LHS, <u,0,4,1>
+ 1158863002U, // <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 2289224221U, // <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6>
+ 1215483208U, // <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1679005458U, // <3,u,0,u>: Cost 2 vsldoi12 LHS, <u,0,u,2>
+ 1558036582U, // <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS
+ 1678558004U, // <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 604821294U, // <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2752747317U, // <3,u,1,3>: Cost 3 vsldoi12 LHS, <u,1,3,1>
+ 1558039862U, // <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS
+ 2756949830U, // <3,u,1,5>: Cost 3 vsldoi12 LHS, <u,1,5,0>
+ 2800080726U, // <3,u,1,6>: Cost 3 vsldoi12 LHS, <u,1,6,7>
+ 2289233224U, // <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 604821348U, // <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696586709U, // <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u>
+ 2757392246U, // <3,u,2,1>: Cost 3 vsldoi12 LHS, <u,2,1,3>
+ 1624172151U, // <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u>
+ 1679005576U, // <3,u,2,3>: Cost 2 vsldoi12 LHS, <u,2,3,3>
+ 2631789878U, // <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS
+ 2699904874U, // <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u>
+ 1626826683U, // <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u>
+ 1726338988U, // <3,u,2,7>: Cost 2 vsldoi12 LHS, <u,2,7,3>
+ 1683208117U, // <3,u,2,u>: Cost 2 vsldoi12 LHS, <u,2,u,3>
+ 1679005628U, // <3,u,3,0>: Cost 2 vsldoi12 LHS, <u,3,0,1>
+ 1161008942U, // <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2752747471U, // <3,u,3,2>: Cost 3 vsldoi12 LHS, <u,3,2,2>
+ 403488870U, // <3,u,3,3>: Cost 1 vspltisw3 LHS
+ 1679005668U, // <3,u,3,4>: Cost 2 vsldoi12 LHS, <u,3,4,5>
+ 1161009306U, // <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691943104U, // <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7>
+ 1221479752U, // <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 403488870U, // <3,u,3,u>: Cost 1 vspltisw3 LHS
+ 2289255363U, // <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0>
+ 1161844526U, // <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS
+ 2289256661U, // <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2>
+ 1215512732U, // <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 1215513498U, // <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679005759U, // <3,u,4,5>: Cost 2 vsldoi12 LHS, <u,4,5,6>
+ 2289256989U, // <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6>
+ 1215515976U, // <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1679005786U, // <3,u,4,u>: Cost 2 vsldoi12 LHS, <u,4,u,6>
+ 1558069350U, // <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS
+ 2631811892U, // <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1>
+ 1558071026U, // <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5>
+ 2752747646U, // <3,u,5,3>: Cost 3 vsldoi12 LHS, <u,5,3,6>
+ 1558072630U, // <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS
+ 1726337028U, // <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 604821658U, // <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2294574408U, // <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 604821676U, // <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2631819366U, // <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS
+ 2757392574U, // <3,u,6,1>: Cost 3 vsldoi12 LHS, <u,6,1,7>
+ 2631821043U, // <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6>
+ 1679005904U, // <3,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 2631822646U, // <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS
+ 2236553370U, // <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 1726337848U, // <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726339309U, // <3,u,6,7>: Cost 2 vsldoi12 LHS, <u,6,7,0>
+ 1683208445U, // <3,u,6,u>: Cost 2 vsldoi12 LHS, <u,6,u,7>
+ 1726339328U, // <3,u,7,0>: Cost 2 vsldoi12 LHS, <u,7,0,1>
+ 2297905225U, // <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1>
+ 2631829236U, // <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7>
+ 1224163484U, // <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1726339368U, // <3,u,7,4>: Cost 2 vsldoi12 LHS, <u,7,4,5>
+ 2297905553U, // <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5>
+ 2297905392U, // <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6>
+ 1224166728U, // <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224163489U, // <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1683208529U, // <3,u,u,0>: Cost 2 vsldoi12 LHS, <u,u,0,1>
+ 1679006043U, // <3,u,u,1>: Cost 2 vsldoi12 LHS, <u,u,1,2>
+ 604821861U, // <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 403488870U, // <3,u,u,3>: Cost 1 vspltisw3 LHS
+ 1683208569U, // <3,u,u,4>: Cost 2 vsldoi12 LHS, <u,u,4,5>
+ 1679006083U, // <3,u,u,5>: Cost 2 vsldoi12 LHS, <u,u,5,6>
+ 604821901U, // <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 1215548744U, // <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 604821915U, // <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2759016448U, // <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0>
+ 1165115494U, // <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 3717531337U, // <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0>
+ 3369675785U, // <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3>
+ 2751791144U, // <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4>
+ 2238857630U, // <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0>
+ 3312591341U, // <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7>
+ 3369676113U, // <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7>
+ 1165116061U, // <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 2637824102U, // <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS
+ 2637824922U, // <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4>
+ 1685274726U, // <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637826512U, // <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1>
+ 2637827382U, // <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS
+ 2661716070U, // <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4>
+ 3729486427U, // <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1>
+ 2661717300U, // <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1>
+ 1685274780U, // <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 3711574118U, // <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS
+ 2240200806U, // <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 3771663992U, // <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0>
+ 2698585801U, // <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0>
+ 3373672105U, // <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4>
+ 3810813795U, // <4,0,2,5>: Cost 4 vsldoi8 <u,7,4,0>, <2,5,3,1>
+ 3772327866U, // <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7>
+ 3386280568U, // <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7>
+ 2701903966U, // <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0>
+ 3699638374U, // <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS
+ 2753560832U, // <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 3772328276U, // <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3>
+ 3827302674U, // <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4>
+ 3699641654U, // <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS
+ 3779627588U, // <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0>
+ 3772328604U, // <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7>
+ 3780954854U, // <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0>
+ 2753560832U, // <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 2725129106U, // <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1>
+ 1167720550U, // <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 3839172953U, // <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3>
+ 3772329051U, // <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4>
+ 2241462610U, // <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5>
+ 2698587446U, // <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 3772329297U, // <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7>
+ 3735483703U, // <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4>
+ 1167721117U, // <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 1168556032U, // <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 94814310U, // <4,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2242298029U, // <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2637859284U, // <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5>
+ 1168556370U, // <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2242306530U, // <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5>
+ 2242298358U, // <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661750072U, // <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5>
+ 94814877U, // <4,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 3316580362U, // <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1>
+ 2242846822U, // <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3798872570U, // <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3>
+ 3796218413U, // <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0>
+ 3834528273U, // <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7>
+ 3798872811U, // <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1>
+ 3316621876U, // <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6>
+ 2725131121U, // <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0>
+ 2242847389U, // <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3377692672U, // <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0>
+ 2243493990U, // <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 3775648970U, // <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3>
+ 3802191110U, // <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0>
+ 3317236050U, // <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5>
+ 3803518376U, // <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0>
+ 3317236214U, // <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7>
+ 3798873708U, // <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7>
+ 2243494557U, // <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 1170546688U, // <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 96804966U, // <4,0,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685275293U, // <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637883863U, // <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u>
+ 1170547026U, // <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2698590362U, // <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 2244289014U, // <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661774651U, // <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u>
+ 96805533U, // <4,0,u,u>: Cost 1 vmrghw RHS, LHS
+ 2667749478U, // <4,1,0,0>: Cost 3 vsldoi4 <u,4,1,0>, LHS
+ 2689966182U, // <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS
+ 2238571418U, // <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4>
+ 3711633880U, // <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0>
+ 2689966418U, // <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5>
+ 3361046866U, // <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5>
+ 3741495802U, // <4,1,0,6>: Cost 4 vsldoi4 <u,4,1,0>, <6,2,7,3>
+ 3741496314U, // <4,1,0,7>: Cost 4 vsldoi4 <u,4,1,0>, <7,0,1,2>
+ 2689966765U, // <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1>
+ 3764372222U, // <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1>
+ 2758206263U, // <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4>
+ 2698593178U, // <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4>
+ 3361057810U, // <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3>
+ 3827303250U, // <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4>
+ 2287313234U, // <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5>
+ 3763709171U, // <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7>
+ 3361058138U, // <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7>
+ 2239759744U, // <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4>
+ 2637906022U, // <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS
+ 2637906842U, // <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4>
+ 3763709544U, // <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2>
+ 1685275546U, // <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4>
+ 2637909302U, // <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS
+ 3361063250U, // <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5>
+ 3763709882U, // <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7>
+ 3735541054U, // <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2>
+ 1685644231U, // <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4>
+ 2702575792U, // <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1>
+ 3832759257U, // <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4>
+ 3833349090U, // <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4>
+ 3763710364U, // <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3>
+ 2707884546U, // <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6>
+ 3361071442U, // <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5>
+ 3772336796U, // <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7>
+ 3775654595U, // <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1>
+ 2707884856U, // <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1>
+ 2667782246U, // <4,1,4,0>: Cost 3 vsldoi4 <u,4,1,4>, LHS
+ 2241463092U, // <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1>
+ 2241553306U, // <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4>
+ 3827303484U, // <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4>
+ 2667785424U, // <4,1,4,4>: Cost 3 vsldoi4 <u,4,1,4>, <4,4,4,4>
+ 2689969462U, // <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 3763711322U, // <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7>
+ 3867116636U, // <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0>
+ 2689969705U, // <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 1546273106U, // <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5>
+ 1168556852U, // <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1168556950U, // <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2620016790U, // <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2>
+ 1546276150U, // <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS
+ 2620018692U, // <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5>
+ 2242299087U, // <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667795450U, // <4,1,5,7>: Cost 3 vsldoi4 <u,4,1,5>, <7,0,1,2>
+ 1546278702U, // <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS
+ 3781628193U, // <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2>
+ 3832759503U, // <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7>
+ 3316261786U, // <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4>
+ 3781628466U, // <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5>
+ 3827303658U, // <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7>
+ 3361096018U, // <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5>
+ 3788264248U, // <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6>
+ 3788264270U, // <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1>
+ 3832759566U, // <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7>
+ 2726466580U, // <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1>
+ 3377692682U, // <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1>
+ 3377694870U, // <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2>
+ 3802199303U, // <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1>
+ 2731775334U, // <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6>
+ 3377693010U, // <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5>
+ 3365749804U, // <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6>
+ 3788265068U, // <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7>
+ 2731775644U, // <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1>
+ 1546297685U, // <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u>
+ 1170547508U, // <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1170547606U, // <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 1689257344U, // <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4>
+ 1546300726U, // <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS
+ 2284716370U, // <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5>
+ 2244289743U, // <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667820026U, // <4,1,u,7>: Cost 3 vsldoi4 <u,4,1,u>, <7,0,1,2>
+ 1546303278U, // <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS
+ 3729621094U, // <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS
+ 3763716198U, // <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS
+ 2238858856U, // <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2>
+ 2295930982U, // <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3763716434U, // <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5>
+ 2238859107U, // <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1>
+ 2238859194U, // <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7>
+ 3312601066U, // <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1>
+ 2295930987U, // <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3699769446U, // <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS
+ 3313255971U, // <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5>
+ 3361056360U, // <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2>
+ 2287312998U, // <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3788932148U, // <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5>
+ 3313256290U, // <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0>
+ 3838289469U, // <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4>
+ 3369682865U, // <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7>
+ 2287313003U, // <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3838658133U, // <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1>
+ 3711722394U, // <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4>
+ 2759018088U, // <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2>
+ 2759018098U, // <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3>
+ 3838658168U, // <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0>
+ 3369027341U, // <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5>
+ 2240227258U, // <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7>
+ 3735614791U, // <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2>
+ 2759018143U, // <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3>
+ 2759018150U, // <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1>
+ 3831948975U, // <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1>
+ 3832759993U, // <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2>
+ 2759018180U, // <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4>
+ 2759018185U, // <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0>
+ 3839542998U, // <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4>
+ 3314640826U, // <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7>
+ 2765948648U, // <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4>
+ 2759018222U, // <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1>
+ 3838658295U, // <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1>
+ 3315205667U, // <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5>
+ 2241463912U, // <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2>
+ 1234829414U, // <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2241464085U, // <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4>
+ 2241546087U, // <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5>
+ 2241464250U, // <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7>
+ 3741602873U, // <4,2,4,7>: Cost 4 vsldoi4 <u,4,2,4>, <7,0,u,2>
+ 1234829419U, // <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2626060390U, // <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS
+ 2626061364U, // <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5>
+ 1168557672U, // <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222230118U, // <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 2626063670U, // <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS
+ 2242299752U, // <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1168558010U, // <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2242299882U, // <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1222230123U, // <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 3711754342U, // <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS
+ 3711755162U, // <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4>
+ 3838658481U, // <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7>
+ 2759018426U, // <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7>
+ 3838658499U, // <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7>
+ 3735646310U, // <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4>
+ 3316590522U, // <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7>
+ 3798889331U, // <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2>
+ 2759018471U, // <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7>
+ 3874564074U, // <4,2,7,0>: Cost 4 vsldoi12 <u,2,3,4>, <2,7,0,1>
+ 3800880230U, // <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2>
+ 3371722344U, // <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2>
+ 2303950950U, // <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 3371722346U, // <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4>
+ 3371722509U, // <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5>
+ 3317237690U, // <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7>
+ 3317237738U, // <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1>
+ 2303950955U, // <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2759018555U, // <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1>
+ 2626085943U, // <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u>
+ 1170548328U, // <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222254694U, // <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2759018595U, // <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5>
+ 2244290408U, // <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1170548666U, // <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2769266813U, // <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4>
+ 1222254699U, // <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2238859414U, // <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2>
+ 2759018646U, // <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2>
+ 3312314708U, // <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3>
+ 2238859676U, // <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3>
+ 2295931802U, // <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4>
+ 3735670886U, // <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4>
+ 3312315036U, // <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7>
+ 3369674682U, // <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7>
+ 2759018709U, // <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2>
+ 3361055638U, // <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0>
+ 3831949542U, // <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1>
+ 2703917978U, // <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3361056370U, // <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3>
+ 2295939994U, // <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4>
+ 3361056291U, // <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5>
+ 3378972520U, // <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6>
+ 3361056698U, // <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7>
+ 2703917978U, // <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3832760624U, // <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3>
+ 3711796122U, // <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4>
+ 3832760641U, // <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2>
+ 2770962764U, // <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4>
+ 2759018836U, // <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3>
+ 3827304802U, // <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u>
+ 3832760678U, // <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3>
+ 3859597679U, // <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3>
+ 2771331449U, // <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4>
+ 2240841878U, // <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2>
+ 3776997635U, // <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3>
+ 2703919444U, // <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3>
+ 2759018908U, // <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3>
+ 2759018918U, // <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4>
+ 3386951446U, // <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5>
+ 3777661596U, // <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7>
+ 3375007674U, // <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7>
+ 2707901242U, // <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3>
+ 2759018960U, // <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1>
+ 2759018970U, // <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2>
+ 2632099605U, // <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4>
+ 2241464732U, // <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3>
+ 2759019000U, // <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5>
+ 2753563138U, // <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6>
+ 3777662316U, // <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7>
+ 2308573114U, // <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7>
+ 2759019032U, // <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1>
+ 1168558230U, // <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2242300134U, // <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1>
+ 2632107798U, // <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5>
+ 1168558492U, // <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1168558594U, // <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2295973654U, // <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5>
+ 2242300536U, // <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295973818U, // <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7>
+ 1168558878U, // <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 3832760952U, // <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7>
+ 3711828890U, // <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4>
+ 3316484436U, // <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3>
+ 3711830512U, // <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6>
+ 2759019164U, // <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3361097251U, // <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5>
+ 3316624045U, // <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6>
+ 2773912244U, // <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4>
+ 2759019164U, // <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3377693590U, // <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0>
+ 3365751680U, // <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1>
+ 2727810232U, // <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3>
+ 3377694322U, // <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3>
+ 2303951770U, // <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4>
+ 3741700198U, // <4,3,7,5>: Cost 4 vsldoi4 <u,4,3,7>, <5,6,7,4>
+ 3377695216U, // <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6>
+ 3375703994U, // <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7>
+ 2731792030U, // <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3>
+ 1170548886U, // <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2759019294U, // <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2>
+ 2632132377U, // <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u>
+ 1170549148U, // <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1170549250U, // <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2759019334U, // <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6>
+ 2244291192U, // <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295998394U, // <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7>
+ 1170549534U, // <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 1165118354U, // <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1637482598U, // <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 3711854285U, // <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4>
+ 3827305344U, // <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1>
+ 2711224658U, // <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5>
+ 1165118774U, // <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3312602489U, // <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2>
+ 3369675420U, // <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7>
+ 1165119017U, // <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3369682633U, // <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0>
+ 2287313581U, // <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1>
+ 2759019466U, // <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3>
+ 3369683284U, // <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3>
+ 2311204048U, // <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4>
+ 2239319350U, // <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS
+ 3784967411U, // <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7>
+ 3369683612U, // <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7>
+ 2763000832U, // <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3>
+ 3711869030U, // <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS
+ 3711869850U, // <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4>
+ 2240203830U, // <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3>
+ 2698618573U, // <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4>
+ 2711226133U, // <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4>
+ 2240204086U, // <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2711226298U, // <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7>
+ 3832761416U, // <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3>
+ 2701936738U, // <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4>
+ 2711226518U, // <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2>
+ 3777005828U, // <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4>
+ 3832761453U, // <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4>
+ 2301266260U, // <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 2705254903U, // <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4>
+ 2240843062U, // <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 3832761489U, // <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4>
+ 3375008412U, // <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7>
+ 2301266260U, // <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 1570373734U, // <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS
+ 2308574089U, // <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1>
+ 2644117096U, // <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2>
+ 2638146039U, // <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4>
+ 229035318U, // <4,4,4,4>: Cost 1 vspltisw0 RHS
+ 1167723830U, // <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS
+ 2644120058U, // <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3>
+ 2662036827U, // <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4>
+ 229035318U, // <4,4,4,u>: Cost 1 vspltisw0 RHS
+ 1168558994U, // <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 2638152602U, // <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4>
+ 2242300981U, // <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638154232U, // <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5>
+ 1168559322U, // <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5>
+ 94817590U, // <4,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685278006U, // <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2242309576U, // <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 94817833U, // <4,4,5,u>: Cost 1 vmrghw RHS, RHS
+ 3316591506U, // <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1>
+ 3758428587U, // <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5>
+ 2711228922U, // <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3>
+ 3796251185U, // <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4>
+ 2711229085U, // <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4>
+ 2242850102U, // <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2242850169U, // <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2>
+ 2725163893U, // <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4>
+ 2242850345U, // <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2711229434U, // <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2>
+ 3377694410U, // <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1>
+ 3868593584U, // <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3>
+ 3377695060U, // <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3>
+ 2729145691U, // <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4>
+ 2243497270U, // <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 3871542744U, // <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7>
+ 2303953564U, // <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7>
+ 2243497513U, // <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 1170549650U, // <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 1637488430U, // <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 2244291637U, // <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638178811U, // <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u>
+ 229035318U, // <4,4,u,4>: Cost 1 vspltisw0 RHS
+ 96808246U, // <4,4,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685278249U, // <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2244292040U, // <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 96808489U, // <4,4,u,u>: Cost 1 vmrghw RHS, RHS
+ 2698625024U, // <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0>
+ 1624883302U, // <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2638186190U, // <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5>
+ 2638187004U, // <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0>
+ 2687345005U, // <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5>
+ 2238861316U, // <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5>
+ 2662077302U, // <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5>
+ 2662077792U, // <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0>
+ 1624883869U, // <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 3361057762U, // <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0>
+ 2691326803U, // <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5>
+ 2698625942U, // <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0>
+ 3361055659U, // <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3>
+ 3761087567U, // <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5>
+ 2693981335U, // <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5>
+ 2305231362U, // <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 3361055987U, // <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7>
+ 2695972234U, // <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5>
+ 2638200934U, // <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS
+ 3761088035U, // <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5>
+ 2697963133U, // <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5>
+ 1624884942U, // <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5>
+ 2698626838U, // <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5>
+ 3772368744U, // <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6>
+ 2698627002U, // <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7>
+ 3775023122U, // <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5>
+ 1628203107U, // <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5>
+ 2698627222U, // <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2>
+ 3765070057U, // <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4>
+ 2698627404U, // <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4>
+ 2698627484U, // <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3>
+ 2698627580U, // <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0>
+ 3779668553U, // <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5>
+ 2725169844U, // <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4>
+ 2707253995U, // <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5>
+ 2698627870U, // <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2>
+ 2638217318U, // <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS
+ 2308574098U, // <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1>
+ 2698628150U, // <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3>
+ 2638219776U, // <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4>
+ 2698628314U, // <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5>
+ 1624886582U, // <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 2698628478U, // <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7>
+ 2662110564U, // <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4>
+ 1624886825U, // <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1570455654U, // <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS
+ 2312564250U, // <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1>
+ 2644199118U, // <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5>
+ 2295974966U, // <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3>
+ 1570458842U, // <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5>
+ 1168568324U, // <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5>
+ 1168568418U, // <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2295975294U, // <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7>
+ 1168716036U, // <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0>
+ 1564491878U, // <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS
+ 2626290768U, // <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6>
+ 2632263465U, // <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6>
+ 1564494338U, // <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6>
+ 1564495158U, // <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS
+ 2638237464U, // <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3>
+ 2656154253U, // <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2725172218U, // <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2>
+ 3859599489U, // <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4>
+ 2698630320U, // <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4>
+ 2728490251U, // <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5>
+ 2725172576U, // <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0>
+ 3317239812U, // <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5>
+ 2725172760U, // <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4>
+ 2725172844U, // <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7>
+ 2725172866U, // <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2>
+ 1564508262U, // <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS
+ 1624889134U, // <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2698631045U, // <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, <u,2,3,0>
+ 1564510724U, // <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u>
+ 1564511542U, // <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS
+ 1624889498U, // <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1170550882U, // <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 3312595285U, // <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0>
+ 3763748966U, // <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS
+ 2238861818U, // <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3>
+ 3767730432U, // <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4>
+ 3763749202U, // <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5>
+ 2238862059U, // <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1>
+ 2238862136U, // <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6>
+ 2295934262U, // <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 2295934263U, // <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 3378973999U, // <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0>
+ 3378974648U, // <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1>
+ 3779675034U, // <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4>
+ 3378974002U, // <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3>
+ 3378974003U, // <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4>
+ 3767731352U, // <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6>
+ 3378974734U, // <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6>
+ 2287316278U, // <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 2287316279U, // <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 3735904358U, // <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS
+ 3763750435U, // <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5>
+ 3313938937U, // <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2>
+ 3772376782U, // <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5>
+ 3852890591U, // <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3>
+ 3735908454U, // <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4>
+ 3801573306U, // <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7>
+ 2785858042U, // <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3>
+ 2785858051U, // <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3>
+ 3863065101U, // <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4>
+ 3314586024U, // <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2>
+ 3863212575U, // <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4>
+ 3863286312U, // <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4>
+ 3767732738U, // <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6>
+ 3779676746U, // <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6>
+ 3398898488U, // <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6>
+ 2301267254U, // <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2301267255U, // <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 3852890715U, // <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1>
+ 3315208615U, // <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1>
+ 2241466874U, // <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3>
+ 3852890745U, // <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4>
+ 2241467037U, // <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4>
+ 2241549039U, // <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5>
+ 2241467192U, // <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6>
+ 1234832694U, // <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 1234832695U, // <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 2242302241U, // <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2242310567U, // <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1168568826U, // <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2242302514U, // <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2242302605U, // <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2242310891U, // <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1168569144U, // <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222233398U, // <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 1222233399U, // <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 3316576545U, // <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2>
+ 3316584871U, // <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1>
+ 2242851322U, // <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3>
+ 3316601394U, // <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5>
+ 3852890916U, // <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4>
+ 3316617963U, // <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1>
+ 2242884408U, // <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6>
+ 2785858370U, // <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7>
+ 2785858379U, // <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7>
+ 2785858382U, // <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1>
+ 3859600215U, // <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1>
+ 3317240314U, // <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3>
+ 2792199020U, // <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4>
+ 2785858422U, // <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5>
+ 3856651132U, // <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2>
+ 3317240632U, // <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6>
+ 2303954230U, // <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303954231U, // <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2244292897U, // <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2244293031U, // <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1170551290U, // <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2244293170U, // <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2244293261U, // <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2244293355U, // <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1170551608U, // <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222257974U, // <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 1222257975U, // <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 2238862330U, // <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2>
+ 2706604134U, // <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3312604308U, // <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3>
+ 3768402176U, // <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4>
+ 2238862648U, // <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5>
+ 3859600418U, // <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6>
+ 3729994393U, // <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0>
+ 2238862956U, // <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7>
+ 2706604701U, // <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3385610338U, // <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0>
+ 3780346676U, // <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1>
+ 2706604954U, // <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3385610746U, // <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3>
+ 3385610342U, // <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4>
+ 3385610667U, // <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5>
+ 3768403178U, // <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7>
+ 3385611074U, // <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7>
+ 2706604954U, // <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3859600532U, // <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3>
+ 3712091034U, // <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4>
+ 3774375528U, // <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2>
+ 2794853552U, // <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4>
+ 2785858744U, // <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3>
+ 3735982182U, // <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4>
+ 3774375875U, // <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7>
+ 3735983476U, // <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2>
+ 2795222237U, // <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4>
+ 3780348054U, // <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2>
+ 3730015130U, // <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4>
+ 3780348244U, // <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3>
+ 3778357673U, // <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7>
+ 2325155942U, // <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4>
+ 3779684939U, // <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7>
+ 2706606748U, // <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7>
+ 3398898498U, // <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7>
+ 2707934014U, // <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7>
+ 2785858868U, // <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1>
+ 3780348874U, // <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3>
+ 3780349000U, // <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3>
+ 2308575738U, // <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3>
+ 2656283856U, // <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4>
+ 2706607414U, // <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2656285341U, // <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4>
+ 2241468012U, // <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7>
+ 2706607657U, // <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 1168569338U, // <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2242311242U, // <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1>
+ 2242303178U, // <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3>
+ 2242311395U, // <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1168569702U, // <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2242311606U, // <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5>
+ 2242311662U, // <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1168569964U, // <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1168569986U, // <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 3316593658U, // <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2>
+ 3316593738U, // <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1>
+ 3316634800U, // <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4>
+ 3386978810U, // <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3>
+ 2785859072U, // <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7>
+ 3736014950U, // <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4>
+ 3316594158U, // <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7>
+ 2797803032U, // <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4>
+ 2797876769U, // <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4>
+ 2243499002U, // <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2>
+ 3718103962U, // <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4>
+ 3317257418U, // <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3>
+ 3377695816U, // <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3>
+ 2243532134U, // <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6>
+ 3317282230U, // <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5>
+ 2730497536U, // <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7>
+ 2243556972U, // <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7>
+ 2243565186U, // <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2>
+ 1170551802U, // <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2706609966U, // <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 2244293797U, // <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2>
+ 2244293859U, // <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1170552166U, // <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2706610330U, // <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2244294126U, // <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1170552428U, // <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1170552450U, // <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 1165118354U, // <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1624907878U, // <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638407377U, // <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u>
+ 2295931036U, // <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 2687369584U, // <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u>
+ 1165121690U, // <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 2662298489U, // <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u>
+ 2295934280U, // <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 1624908445U, // <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638413926U, // <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS
+ 2691351382U, // <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u>
+ 1685280558U, // <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2287313052U, // <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 2299257799U, // <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4>
+ 2694005914U, // <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u>
+ 2305231362U, // <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 2287316296U, // <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 1685280612U, // <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2638422118U, // <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS
+ 2240206638U, // <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 2697987712U, // <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u>
+ 1624909521U, // <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u>
+ 2759391121U, // <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, <u,2,4,3>
+ 2240207002U, // <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2698651578U, // <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7>
+ 2785859500U, // <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <u,2,7,3>
+ 1628227686U, // <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u>
+ 2759022524U, // <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,0,1>
+ 2801342408U, // <4,u,3,1>: Cost 3 vsldoi12 <u,3,1,4>, <u,3,1,4>
+ 2703960409U, // <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u>
+ 2759022554U, // <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,3,4>
+ 2759022564U, // <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,4,5>
+ 2240845978U, // <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 2706614941U, // <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u>
+ 2301267272U, // <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2759022596U, // <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,u,1>
+ 1570668646U, // <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS
+ 1167726382U, // <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 2698652753U, // <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3>
+ 1234829468U, // <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 229035318U, // <4,u,4,4>: Cost 1 vspltisw0 RHS
+ 1624911158U, // <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS
+ 2698653081U, // <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7>
+ 1234832712U, // <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 229035318U, // <4,u,4,u>: Cost 1 vspltisw0 RHS
+ 1168561875U, // <4,u,5,0>: Cost 2 vmrghw RHS, <u,0,1,2>
+ 94820142U, // <4,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1168562053U, // <4,u,5,2>: Cost 2 vmrghw RHS, <u,2,3,0>
+ 1222230172U, // <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 1168562239U, // <4,u,5,4>: Cost 2 vmrghw RHS, <u,4,5,6>
+ 94820506U, // <4,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685280922U, // <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 1222233416U, // <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 94820709U, // <4,u,5,u>: Cost 1 vmrghw RHS, LHS
+ 1564713062U, // <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS
+ 2626511979U, // <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6>
+ 2632484676U, // <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6>
+ 1564715549U, // <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6>
+ 1564716342U, // <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS
+ 2242853018U, // <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2656375464U, // <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6>
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2785859840U, // <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,0,1>
+ 2243499822U, // <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 2727851197U, // <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u>
+ 2303951004U, // <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2785859880U, // <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,4,5>
+ 2243500186U, // <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 2730505729U, // <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u>
+ 2303954248U, // <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303951009U, // <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 1564729446U, // <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS
+ 96810798U, // <4,u,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685281125U, // <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 1222254748U, // <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 229035318U, // <4,u,u,4>: Cost 1 vspltisw0 RHS
+ 96811162U, // <4,u,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685281165U, // <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2754232320U, // <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0>
+ 2754232330U, // <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1>
+ 3718194894U, // <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5>
+ 3376385762U, // <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3>
+ 2754232357U, // <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1>
+ 3845816370U, // <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5>
+ 3782353389U, // <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7>
+ 3376386090U, // <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7>
+ 2757402697U, // <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1>
+ 2626543718U, // <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS
+ 2626544751U, // <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1>
+ 1680490598U, // <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3766428665U, // <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0>
+ 2626546998U, // <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS
+ 2650435539U, // <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1>
+ 3783017715U, // <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7>
+ 3385019000U, // <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7>
+ 1680490652U, // <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3376398336U, // <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0>
+ 2245877862U, // <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 3773064808U, // <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2>
+ 2705295054U, // <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 3827974343U, // <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1>
+ 3845816530U, // <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3>
+ 3779037114U, // <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7>
+ 3810887658U, // <5,0,2,7>: Cost 4 vsldoi8 <u,7,5,0>, <2,7,0,1>
+ 2245878429U, // <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2710603926U, // <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2>
+ 3827974396U, // <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0>
+ 3779037516U, // <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4>
+ 3779037596U, // <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3>
+ 2705295868U, // <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0>
+ 3379726804U, // <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5>
+ 3802925748U, // <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4>
+ 3363138168U, // <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7>
+ 2707950400U, // <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0>
+ 2626568294U, // <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS
+ 1680490834U, // <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 3828048219U, // <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5>
+ 2710604932U, // <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0>
+ 2754232685U, // <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5>
+ 2705296694U, // <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779038590U, // <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7>
+ 2713259464U, // <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1680490834U, // <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 2311307264U, // <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0>
+ 1174437990U, // <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 3779038946U, // <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3>
+ 3845816752U, // <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0>
+ 2248180050U, // <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5>
+ 2248180194U, // <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5>
+ 3779039274U, // <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7>
+ 3385051768U, // <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7>
+ 1174438557U, // <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2302689280U, // <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0>
+ 1175208038U, // <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 3787002362U, // <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3>
+ 3376432160U, // <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3>
+ 2248950098U, // <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5>
+ 2248950180U, // <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 3376433702U, // <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6>
+ 2729186166U, // <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5>
+ 1175208605U, // <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2713261050U, // <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2>
+ 3365823599U, // <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1>
+ 3808900317U, // <5,0,7,2>: Cost 4 vsldoi8 <u,4,5,0>, <7,2,u,4>
+ 3784348899U, // <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1>
+ 2729186656U, // <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0>
+ 3787003268U, // <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0>
+ 3802928664U, // <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4>
+ 3787003431U, // <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1>
+ 2731841188U, // <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0>
+ 2626601062U, // <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS
+ 1683145366U, // <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5>
+ 1680491165U, // <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2705295054U, // <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 2754233005U, // <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1>
+ 2705299610U, // <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779041488U, // <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, <u,6,3,7>
+ 2737150252U, // <5,0,u,7>: Cost 3 vsldoi8 <u,7,5,0>, <u,7,5,0>
+ 1680491219U, // <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2713927680U, // <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0>
+ 1640185958U, // <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2310607866U, // <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 3787669756U, // <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0>
+ 2713928018U, // <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5>
+ 2306621778U, // <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5>
+ 3787670006U, // <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7>
+ 3736188301U, // <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0>
+ 1640186525U, // <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2650505318U, // <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS
+ 2754233140U, // <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1>
+ 2311276694U, // <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2>
+ 2311278315U, // <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3>
+ 2758435667U, // <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5>
+ 2754233180U, // <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5>
+ 3385016497U, // <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6>
+ 2311278643U, // <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7>
+ 2758730615U, // <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5>
+ 3700367462U, // <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS
+ 3830629255U, // <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3>
+ 2713929320U, // <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2>
+ 2754233238U, // <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0>
+ 2759099300U, // <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5>
+ 2754233259U, // <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3>
+ 2713929658U, // <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7>
+ 3872359354U, // <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0>
+ 2754233283U, // <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0>
+ 2713929878U, // <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2>
+ 3363135498U, // <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1>
+ 3363137686U, // <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2>
+ 2713930140U, // <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3>
+ 2713930242U, // <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6>
+ 2289394002U, // <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5>
+ 3787672184U, // <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7>
+ 3787672259U, // <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1>
+ 2713930526U, // <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2>
+ 1634880402U, // <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1>
+ 2760205355U, // <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5>
+ 2760279092U, // <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5>
+ 3787672708U, // <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0>
+ 2713930960U, // <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4>
+ 1640189238U, // <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 3786345848U, // <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1>
+ 3787009481U, // <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1>
+ 1640189466U, // <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1>
+ 2754233455U, // <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1>
+ 2713931407U, // <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1>
+ 2713931499U, // <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3>
+ 3827975305U, // <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0>
+ 2754233495U, // <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5>
+ 2288746834U, // <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5>
+ 2713931827U, // <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7>
+ 3787673725U, // <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0>
+ 2754233527U, // <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1>
+ 2668462182U, // <5,1,6,0>: Cost 3 vsldoi4 <u,5,1,6>, LHS
+ 2290746002U, // <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1>
+ 2302691478U, // <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2>
+ 3364488071U, // <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3>
+ 2302689536U, // <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4>
+ 2754233587U, // <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7>
+ 2713932600U, // <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6>
+ 2713932622U, // <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1>
+ 2302689297U, // <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u>
+ 2713932794U, // <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2>
+ 3365822474U, // <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1>
+ 3365824662U, // <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2>
+ 3787674851U, // <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1>
+ 2713933158U, // <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6>
+ 2292080978U, // <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5>
+ 3365823613U, // <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6>
+ 2713933420U, // <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7>
+ 2713933442U, // <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2>
+ 1658771190U, // <5,1,u,0>: Cost 2 vsldoi8 <u,0,5,1>, <u,0,5,1>
+ 1640191790U, // <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2762933624U, // <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5>
+ 2754233724U, // <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0>
+ 2763081098U, // <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5>
+ 1640192154U, // <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 2713934032U, // <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, <u,6,3,7>
+ 2713934080U, // <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, <u,7,0,1>
+ 1640192357U, // <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 3779051520U, // <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0>
+ 2705309798U, // <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 3838813637U, // <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1>
+ 2302640230U, // <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3765117266U, // <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5>
+ 3381027892U, // <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5>
+ 3842794985U, // <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1>
+ 3408232554U, // <5,2,0,7>: Cost 4 vmrglw <u,7,5,0>, <0,1,2,7>
+ 2302640235U, // <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3700432998U, // <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS
+ 3765117785U, // <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2>
+ 2311276136U, // <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2>
+ 1237532774U, // <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700436278U, // <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS
+ 3381036084U, // <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5>
+ 3385018045U, // <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6>
+ 3385017560U, // <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7>
+ 1237532779U, // <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700441190U, // <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS
+ 3700442242U, // <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2>
+ 2754233960U, // <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2>
+ 2754233970U, // <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3>
+ 2765071997U, // <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5>
+ 3834021508U, // <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3>
+ 3842795152U, // <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6>
+ 3376402492U, // <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7>
+ 2754234015U, // <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3>
+ 2754234022U, // <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1>
+ 3827975855U, // <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1>
+ 2644625102U, // <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393766U, // <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1691993806U, // <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5>
+ 2785052375U, // <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5>
+ 3854812897U, // <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6>
+ 3802942187U, // <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5>
+ 1692288754U, // <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5>
+ 3839846139U, // <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5>
+ 2709294052U, // <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2>
+ 2766251789U, // <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5>
+ 2765735702U, // <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5>
+ 3840141087U, // <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5>
+ 2705313078U, // <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2712612217U, // <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2>
+ 3787017674U, // <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2>
+ 2765735747U, // <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5>
+ 3834021704U, // <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1>
+ 3834021714U, // <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2>
+ 2311308904U, // <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2>
+ 1237565542U, // <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3834021744U, // <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5>
+ 3369124916U, // <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5>
+ 2248181690U, // <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7>
+ 3786354825U, // <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3>
+ 1237565547U, // <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3700473958U, // <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS
+ 3700475014U, // <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6>
+ 2296718952U, // <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2>
+ 1228947558U, // <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3700477238U, // <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS
+ 3834021836U, // <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7>
+ 2248951738U, // <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7>
+ 3370461105U, // <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7>
+ 1228947563U, // <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3786355706U, // <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2>
+ 3783038037U, // <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3>
+ 3365824104U, // <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2>
+ 2292080742U, // <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 3842131986U, // <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5>
+ 3371795508U, // <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5>
+ 3786356206U, // <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7>
+ 3786356332U, // <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7>
+ 2292080747U, // <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 2754234427U, // <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1>
+ 2705315630U, // <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 2296735336U, // <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2>
+ 1228963942U, // <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 1695311971U, // <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5>
+ 2705315994U, // <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2769201269U, // <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5>
+ 3370477489U, // <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7>
+ 1695606919U, // <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5>
+ 3827976331U, // <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0>
+ 2754234518U, // <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2>
+ 3706472290U, // <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0>
+ 3700500630U, // <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2>
+ 2754234544U, // <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1>
+ 3376383766U, // <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5>
+ 3769770513U, // <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7>
+ 3376383930U, // <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7>
+ 2754234581U, // <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2>
+ 2311275414U, // <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0>
+ 2305967971U, // <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1>
+ 2692047787U, // <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3>
+ 2311276146U, // <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3>
+ 2311275418U, // <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4>
+ 3765789807U, // <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1>
+ 3765789939U, // <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7>
+ 2311276474U, // <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7>
+ 2696029585U, // <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3>
+ 2311288709U, // <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 3765790243U, // <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5>
+ 3827976513U, // <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2>
+ 2765736268U, // <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4>
+ 2246248962U, // <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6>
+ 3765790563U, // <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1>
+ 3827976550U, // <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3>
+ 3842795887U, // <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3>
+ 2769054073U, // <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4>
+ 3827976575U, // <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1>
+ 3765790963U, // <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5>
+ 3839478162U, // <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2>
+ 2754234780U, // <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3>
+ 2771708327U, // <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5>
+ 3363137059U, // <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5>
+ 3375081320U, // <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6>
+ 3363137466U, // <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7>
+ 2772003275U, // <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5>
+ 2772077012U, // <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5>
+ 3765791714U, // <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0>
+ 2709965878U, // <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3>
+ 2772298223U, // <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5>
+ 2772371960U, // <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5>
+ 2754234882U, // <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6>
+ 3839478282U, // <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5>
+ 3376416698U, // <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7>
+ 2754234909U, // <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6>
+ 2311308182U, // <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0>
+ 3765792421U, // <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5>
+ 2715938575U, // <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3>
+ 2311308914U, // <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3>
+ 2311308186U, // <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4>
+ 2248182354U, // <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5>
+ 3765792837U, // <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7>
+ 2311309242U, // <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7>
+ 2311308190U, // <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u>
+ 2632777830U, // <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3706520372U, // <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1>
+ 2632779624U, // <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6>
+ 2632780290U, // <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6>
+ 2632781110U, // <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS
+ 2248952413U, // <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7>
+ 2302691176U, // <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302691258U, // <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7>
+ 2632783662U, // <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3365823382U, // <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0>
+ 3706529011U, // <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7>
+ 3706529641U, // <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7>
+ 3365824114U, // <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3>
+ 2774362859U, // <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5>
+ 3365824035U, // <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5>
+ 3383740183U, // <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6>
+ 3363833786U, // <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7>
+ 2774657807U, // <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5>
+ 2632794214U, // <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS
+ 2754235166U, // <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2>
+ 2632796010U, // <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u>
+ 2632796676U, // <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u>
+ 2632797494U, // <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS
+ 2754235206U, // <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6>
+ 2302691176U, // <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302707642U, // <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7>
+ 2754235229U, // <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2>
+ 3765133325U, // <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4>
+ 2705326182U, // <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 3718489806U, // <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5>
+ 3718490624U, // <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4>
+ 2709307730U, // <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5>
+ 2302641870U, // <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5>
+ 3376383695U, // <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6>
+ 3384351018U, // <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, <u,7,4,7>
+ 2705326749U, // <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 2305971057U, // <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0>
+ 3765134171U, // <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4>
+ 3766461338U, // <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4>
+ 3766461437U, // <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4>
+ 2311277776U, // <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4>
+ 2754235362U, // <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0>
+ 3783050483U, // <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7>
+ 3385019036U, // <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7>
+ 2311276241U, // <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u>
+ 3718504550U, // <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS
+ 3783050787U, // <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5>
+ 3773097576U, // <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2>
+ 2705327822U, // <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 3773097767U, // <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4>
+ 2765737014U, // <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3>
+ 3779069882U, // <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7>
+ 3376401052U, // <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7>
+ 2245881370U, // <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1>
+ 3779070102U, // <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2>
+ 3363135525U, // <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1>
+ 3779070284U, // <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4>
+ 3779070364U, // <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3>
+ 2705328640U, // <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4>
+ 2307311310U, // <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5>
+ 3866021012U, // <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7>
+ 3363138204U, // <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7>
+ 2707983172U, // <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4>
+ 2708646805U, // <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4>
+ 2709310438U, // <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4>
+ 3779071030U, // <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3>
+ 2710637704U, // <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4>
+ 2754235600U, // <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4>
+ 1704676570U, // <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5>
+ 3779071358U, // <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7>
+ 2713292236U, // <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4>
+ 1704897781U, // <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5>
+ 2626871398U, // <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS
+ 2626872471U, // <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5>
+ 2765737230U, // <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3>
+ 3700615318U, // <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2>
+ 2626874678U, // <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS
+ 1174441270U, // <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS
+ 1680493878U, // <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 3385051804U, // <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7>
+ 1680493896U, // <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2248952722U, // <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1>
+ 2302692152U, // <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 3382406107U, // <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2>
+ 3700623874U, // <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6>
+ 2248953040U, // <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4>
+ 1175211318U, // <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3376432280U, // <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6>
+ 2729218934U, // <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5>
+ 1175211561U, // <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3787035642U, // <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2>
+ 3365822501U, // <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1>
+ 3808933085U, // <5,4,7,2>: Cost 4 vsldoi8 <u,4,5,4>, <7,2,u,4>
+ 3784381707U, // <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5>
+ 2713294182U, // <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6>
+ 2309998286U, // <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5>
+ 3383740111U, // <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6>
+ 3787036239U, // <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5>
+ 2731873960U, // <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4>
+ 2626895974U, // <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS
+ 2626897050U, // <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u>
+ 2644813518U, // <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5>
+ 2705327822U, // <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 2626899254U, // <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS
+ 1707331102U, // <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5>
+ 1680494121U, // <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2737183024U, // <5,4,u,7>: Cost 3 vsldoi8 <u,7,5,4>, <u,7,5,4>
+ 1680494139U, // <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2302642684U, // <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0>
+ 1640218726U, // <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 3376384510U, // <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2>
+ 3376385078U, // <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3>
+ 2754236002U, // <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1>
+ 2717942242U, // <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5>
+ 2244907106U, // <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 3376385406U, // <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7>
+ 1640219293U, // <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2305969365U, // <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0>
+ 1237536282U, // <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2713961366U, // <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0>
+ 3766469630U, // <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5>
+ 2782326455U, // <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5>
+ 2311277786U, // <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5>
+ 2311277058U, // <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6>
+ 3385017587U, // <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7>
+ 1237536282U, // <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 3376400892U, // <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0>
+ 3827977963U, // <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3>
+ 2302659070U, // <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2>
+ 2765737726U, // <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4>
+ 3839479558U, // <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3>
+ 2781073167U, // <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3>
+ 2713962426U, // <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7>
+ 3376401790U, // <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7>
+ 2769055531U, // <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4>
+ 2713962646U, // <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2>
+ 3765143786U, // <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5>
+ 3839479621U, // <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3>
+ 2289394603U, // <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3>
+ 2713963010U, // <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6>
+ 2313285150U, // <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5>
+ 3363138050U, // <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6>
+ 3363136755U, // <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7>
+ 2713963294U, // <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2>
+ 2713963410U, // <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1>
+ 3827978127U, // <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5>
+ 3839479704U, // <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5>
+ 3376417846U, // <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3>
+ 1637567706U, // <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5>
+ 1640222006U, // <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS
+ 2310640998U, // <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6>
+ 3376418174U, // <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7>
+ 1640222238U, // <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5>
+ 1577091174U, // <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 2311310226U, // <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1>
+ 2713964303U, // <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3>
+ 2311311119U, // <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3>
+ 1577094454U, // <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,5,5>: Cost 1 vspltisw1 RHS
+ 2311309826U, // <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6>
+ 2311311447U, // <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7>
+ 296144182U, // <5,5,5,u>: Cost 1 vspltisw1 RHS
+ 2248953460U, // <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1>
+ 2326580114U, // <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1>
+ 2713965050U, // <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3>
+ 3700697602U, // <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6>
+ 2785644620U, // <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5>
+ 2781073495U, // <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7>
+ 1228950018U, // <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965390U, // <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1>
+ 1228950018U, // <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965562U, // <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2>
+ 3383741330U, // <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1>
+ 3718620878U, // <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5>
+ 3365823403U, // <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3>
+ 2713965926U, // <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6>
+ 2717947318U, // <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5>
+ 3365825026U, // <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6>
+ 2292081907U, // <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7>
+ 2713966210U, // <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2>
+ 1577091174U, // <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1640224558U, // <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2713966469U, // <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, <u,2,3,0>
+ 2713966524U, // <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, <u,3,0,1>
+ 1577094454U, // <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,u,5>: Cost 1 vspltisw1 RHS
+ 1228950018U, // <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713966848U, // <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, <u,7,0,1>
+ 296144182U, // <5,5,u,u>: Cost 1 vspltisw1 RHS
+ 2705342464U, // <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0>
+ 1631600742U, // <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3773112493U, // <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2>
+ 2705342720U, // <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4>
+ 2705342802U, // <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5>
+ 3779084708U, // <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6>
+ 3779084790U, // <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7>
+ 2302643510U, // <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631601309U, // <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3767141092U, // <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2>
+ 2705343284U, // <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1>
+ 2705343382U, // <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0>
+ 3779085282U, // <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4>
+ 2693399632U, // <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6>
+ 3767805089U, // <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6>
+ 2311279416U, // <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6>
+ 1237536054U, // <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1237536055U, // <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 3773113789U, // <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2>
+ 3779085855U, // <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1>
+ 2699372136U, // <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2>
+ 2705344166U, // <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1>
+ 2699372329U, // <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6>
+ 2705344360U, // <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6>
+ 2705344442U, // <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7>
+ 2302659894U, // <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2702026861U, // <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6>
+ 2705344662U, // <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2>
+ 3767142661U, // <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5>
+ 3773114689U, // <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2>
+ 2705344924U, // <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3>
+ 1631603202U, // <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6>
+ 3842945597U, // <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7>
+ 3779086962U, // <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1>
+ 2289397046U, // <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634257734U, // <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6>
+ 2644926566U, // <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS
+ 3779087306U, // <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3>
+ 2790142577U, // <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5>
+ 2644929026U, // <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6>
+ 2711317723U, // <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6>
+ 1631604022U, // <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 2712644989U, // <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6>
+ 2302676278U, // <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631604265U, // <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 3842945708U, // <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1>
+ 3767144133U, // <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1>
+ 2705346328U, // <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3>
+ 3779088207U, // <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4>
+ 2717290420U, // <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6>
+ 2705346574U, // <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6>
+ 2705346596U, // <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1>
+ 1237568822U, // <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 1237568823U, // <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 2650914918U, // <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS
+ 3364490949U, // <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1>
+ 2248954362U, // <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3>
+ 2302693144U, // <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3>
+ 2650918198U, // <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS
+ 2650918926U, // <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6>
+ 2302693390U, // <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6>
+ 1228950838U, // <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228950839U, // <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 497467494U, // <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571210036U, // <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571210856U, // <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571211414U, // <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497470774U, // <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571213316U, // <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571213818U, // <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571214956U, // <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7>
+ 497473326U, // <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497475686U, // <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631606574U, // <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 1571219048U, // <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571219606U, // <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497478967U, // <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631606938U, // <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 1571222010U, // <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1228967222U, // <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497481518U, // <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 3768475648U, // <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0>
+ 2694733926U, // <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 3718711395U, // <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5>
+ 3384349178U, // <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3>
+ 2694734162U, // <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5>
+ 3384347884U, // <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5>
+ 3730658026U, // <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0>
+ 3718714362U, // <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2>
+ 2694734493U, // <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2311278690U, // <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0>
+ 2305970923U, // <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1>
+ 3768476566U, // <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0>
+ 2311279098U, // <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3>
+ 2311278694U, // <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4>
+ 3768476783U, // <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1>
+ 2694735091U, // <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7>
+ 2311279426U, // <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7>
+ 2696062357U, // <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7>
+ 3383701602U, // <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0>
+ 3768477219U, // <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5>
+ 3768477288U, // <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2>
+ 2309960186U, // <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3383701606U, // <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4>
+ 3768477545U, // <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7>
+ 3766486970U, // <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7>
+ 3383702338U, // <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7>
+ 2309960186U, // <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3768477846U, // <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2>
+ 3768477975U, // <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5>
+ 3786393932U, // <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4>
+ 3768478108U, // <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3>
+ 2795599115U, // <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5>
+ 3385037470U, // <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5>
+ 3780422309U, // <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7>
+ 3848107301U, // <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4>
+ 2795894063U, // <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5>
+ 2795967800U, // <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5>
+ 3768478690U, // <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0>
+ 3718744163U, // <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5>
+ 3784404107U, // <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7>
+ 2796262748U, // <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5>
+ 2694737206U, // <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2712653182U, // <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7>
+ 2713316815U, // <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7>
+ 2694737449U, // <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2311311458U, // <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0>
+ 3768479433U, // <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5>
+ 3768479521U, // <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3>
+ 2311311866U, // <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3>
+ 2311311462U, // <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4>
+ 2248185270U, // <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5>
+ 2718625879U, // <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7>
+ 2311312194U, // <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7>
+ 2311311466U, // <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u>
+ 2248954874U, // <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2>
+ 3322696778U, // <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1>
+ 2248955028U, // <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2656963074U, // <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6>
+ 2248955238U, // <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6>
+ 2248955329U, // <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7>
+ 2656965360U, // <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6>
+ 2248955500U, // <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7>
+ 2248955522U, // <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2>
+ 3718766694U, // <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS
+ 3724739827U, // <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7>
+ 3718768739U, // <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5>
+ 3365826337U, // <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3>
+ 2798253647U, // <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5>
+ 3365826258U, // <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5>
+ 3730715377U, // <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7>
+ 2310665836U, // <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7>
+ 2798548595U, // <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5>
+ 2311336034U, // <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0>
+ 2694739758U, // <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2248955028U, // <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2311336442U, // <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3>
+ 2311336038U, // <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4>
+ 2694740122U, // <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2656981746U, // <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u>
+ 2311336770U, // <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7>
+ 2694740325U, // <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2705358848U, // <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0>
+ 1631617126U, // <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2310607866U, // <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 2302640284U, // <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 2754238189U, // <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <u,0,4,1>
+ 2305296114U, // <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5>
+ 2244907106U, // <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 2302643528U, // <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631617693U, // <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2627133542U, // <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS
+ 1237536282U, // <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 1680496430U, // <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1237532828U, // <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 2693416018U, // <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u>
+ 2756892486U, // <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, <u,1,5,0>
+ 2694743284U, // <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u>
+ 1237536072U, // <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1680496484U, // <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2311288709U, // <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 2245883694U, // <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2699388520U, // <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2>
+ 2754238344U, // <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,3,3>
+ 2699388715U, // <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u>
+ 2757408666U, // <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, <u,2,5,3>
+ 2705360826U, // <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7>
+ 2302659912U, // <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2754238389U, // <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,u,3>
+ 2754238396U, // <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <u,3,0,1>
+ 3827980229U, // <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <u,3,1,1>
+ 2644625102U, // <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393820U, // <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1631619588U, // <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u>
+ 2785056749U, // <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <u,3,5,5>
+ 3363138077U, // <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6>
+ 2289397064U, // <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634274120U, // <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u>
+ 1634937753U, // <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u>
+ 1728272410U, // <5,u,4,1>: Cost 2 vsldoi12 <u,4,1,5>, <u,4,1,5>
+ 2710006843U, // <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u>
+ 2765740076U, // <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <u,4,3,5>
+ 1637592285U, // <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u>
+ 1631620406U, // <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 2712661375U, // <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u>
+ 2302676296U, // <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631620649U, // <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 1577091174U, // <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1174443822U, // <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2766035058U, // <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, <u,5,2,3>
+ 1237565596U, // <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 1577094454U, // <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,u,5,5>: Cost 1 vspltisw1 RHS
+ 1680496794U, // <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1237568840U, // <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 296144182U, // <5,u,5,u>: Cost 1 vspltisw1 RHS
+ 2633146470U, // <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS
+ 1175213870U, // <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2633148309U, // <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6>
+ 1228947612U, // <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 2633149750U, // <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS
+ 1175214234U, // <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 1228950018U, // <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 1228950856U, // <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228947617U, // <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 497614950U, // <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571357492U, // <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571358312U, // <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571358870U, // <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497618248U, // <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571360772U, // <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571361274U, // <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571361786U, // <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2>
+ 497620782U, // <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497623142U, // <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631622958U, // <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 1680496997U, // <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1228963996U, // <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 497626441U, // <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 296144182U, // <5,u,u,5>: Cost 1 vspltisw1 RHS
+ 1680497037U, // <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1228967240U, // <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497628974U, // <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 2772451328U, // <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0>
+ 2772451338U, // <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1>
+ 3771146417U, // <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6>
+ 3383095739U, // <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3>
+ 3846193189U, // <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1>
+ 3724832803U, // <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0>
+ 3383095985U, // <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6>
+ 3383096067U, // <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7>
+ 2772451401U, // <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1>
+ 2651095142U, // <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS
+ 2251612262U, // <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS
+ 1698709606U, // <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2651097602U, // <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6>
+ 2651098422U, // <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS
+ 2651099172U, // <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1>
+ 2657071869U, // <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1>
+ 3724841978U, // <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2>
+ 1698709660U, // <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2252292096U, // <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0>
+ 1178550374U, // <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3826655418U, // <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6>
+ 3777783485U, // <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6>
+ 2252292434U, // <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5>
+ 3785746280U, // <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6>
+ 2252292593U, // <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2>
+ 3736794583U, // <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2>
+ 1178550941U, // <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3375153152U, // <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0>
+ 2772451584U, // <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4>
+ 3777784163U, // <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0>
+ 3846193426U, // <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4>
+ 2712005122U, // <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6>
+ 3724857382U, // <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3>
+ 3802335864U, // <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7>
+ 3801672410U, // <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6>
+ 2772451647U, // <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4>
+ 3383123968U, // <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0>
+ 2772451666U, // <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5>
+ 3773803577U, // <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6>
+ 3724864002U, // <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6>
+ 3846193517U, // <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5>
+ 2712005935U, // <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0>
+ 3327009265U, // <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2>
+ 3383126648U, // <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7>
+ 2772451729U, // <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5>
+ 3373178880U, // <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0>
+ 2254266470U, // <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 3785748248U, // <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3>
+ 3790393190U, // <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0>
+ 3328000338U, // <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5>
+ 3785748494U, // <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6>
+ 3785748516U, // <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1>
+ 3379153528U, // <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7>
+ 2254267037U, // <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 2254897152U, // <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0>
+ 1181155430U, // <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 3785748923U, // <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3>
+ 3785749042U, // <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5>
+ 2254897490U, // <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5>
+ 3785749169U, // <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6>
+ 2724614962U, // <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0>
+ 3787739982U, // <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1>
+ 1181155997U, // <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1235664896U, // <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235666598U, // <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 3712943720U, // <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2>
+ 2639202936U, // <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7>
+ 2639203638U, // <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS
+ 2309409236U, // <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 3712946517U, // <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0>
+ 2309409400U, // <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235666605U, // <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 1235673088U, // <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235674790U, // <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 1698710173U, // <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2639211129U, // <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u>
+ 2639211830U, // <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS
+ 2712008858U, // <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS
+ 2657129220U, // <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u>
+ 2309417592U, // <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1698710227U, // <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 3775799296U, // <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0>
+ 2702057574U, // <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3373143763U, // <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, <u,0,1,2>
+ 3695045122U, // <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6>
+ 3775799634U, // <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5>
+ 3383091538U, // <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5>
+ 3368493233U, // <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6>
+ 3362522319U, // <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7>
+ 2702058141U, // <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3834250027U, // <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1>
+ 2772452148U, // <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 3832038210U, // <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6>
+ 3373150660U, // <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3>
+ 3834250067U, // <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5>
+ 3373146450U, // <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5>
+ 3826656102U, // <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6>
+ 3362530511U, // <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7>
+ 2772452148U, // <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 2669092966U, // <6,1,2,0>: Cost 3 vsldoi4 <u,6,1,2>, LHS
+ 2252292916U, // <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1>
+ 2252293014U, // <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0>
+ 2772452246U, // <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0>
+ 2669096246U, // <6,1,2,4>: Cost 3 vsldoi4 <u,6,1,2>, RHS
+ 3846194091U, // <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3>
+ 2702059450U, // <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7>
+ 3870081978U, // <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0>
+ 2702059633U, // <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1>
+ 3775801494U, // <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2>
+ 3777128723U, // <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1>
+ 3775801702U, // <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3>
+ 3775801756U, // <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3>
+ 3775801858U, // <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6>
+ 3375153490U, // <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5>
+ 3826656265U, // <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7>
+ 3775802051U, // <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1>
+ 3775802142U, // <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2>
+ 3846194206U, // <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1>
+ 3846194219U, // <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5>
+ 3846194228U, // <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5>
+ 3846194236U, // <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4>
+ 3846194246U, // <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5>
+ 2760508496U, // <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6>
+ 3368526001U, // <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6>
+ 3870082144U, // <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4>
+ 2760729707U, // <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6>
+ 2714668660U, // <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1>
+ 3834619005U, // <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6>
+ 3834692742U, // <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6>
+ 3846194317U, // <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4>
+ 3834840216U, // <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6>
+ 3834913953U, // <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6>
+ 2719977570U, // <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0>
+ 3367208143U, // <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7>
+ 2719977724U, // <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1>
+ 2669125734U, // <6,1,6,0>: Cost 3 vsldoi4 <u,6,1,6>, LHS
+ 2254897972U, // <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1>
+ 2254898070U, // <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0>
+ 3775803929U, // <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7>
+ 2669129014U, // <6,1,6,4>: Cost 3 vsldoi4 <u,6,1,6>, RHS
+ 2322006354U, // <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5>
+ 2725950264U, // <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6>
+ 3793720142U, // <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1>
+ 2254898556U, // <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0>
+ 2627330150U, // <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS
+ 1235664906U, // <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235667094U, // <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309406894U, // <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2627333430U, // <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS
+ 1235665234U, // <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309406897U, // <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309407222U, // <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235664913U, // <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 2627338342U, // <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS
+ 1235673098U, // <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235675286U, // <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2772452732U, // <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0>
+ 2627341622U, // <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS
+ 1235673426U, // <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309415089U, // <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309415414U, // <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235673105U, // <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 3324683725U, // <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0>
+ 2725290086U, // <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS
+ 3771162801U, // <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6>
+ 2309349478U, // <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3730951478U, // <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS
+ 3840738784U, // <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1>
+ 3842655721U, // <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1>
+ 3736925671U, // <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0>
+ 2309349483U, // <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3367840468U, // <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0>
+ 3325355551U, // <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1>
+ 3373147752U, // <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2>
+ 2299404390U, // <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 3701099830U, // <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS
+ 3767846054U, // <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2>
+ 3826656825U, // <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0>
+ 3373147838U, // <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7>
+ 2299404395U, // <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2657222758U, // <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS
+ 3771164219U, // <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2>
+ 2766481000U, // <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2>
+ 2772452978U, // <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3>
+ 2657226038U, // <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS
+ 3790407528U, // <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6>
+ 2252294074U, // <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7>
+ 2252294148U, // <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0>
+ 2772453023U, // <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3>
+ 2772453030U, // <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1>
+ 3834250930U, // <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4>
+ 2765596349U, // <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6>
+ 2301411430U, // <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS
+ 2772453070U, // <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5>
+ 2765817560U, // <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6>
+ 2252933050U, // <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7>
+ 2796340968U, // <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4>
+ 2766038771U, // <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6>
+ 3725008998U, // <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS
+ 3368530217U, // <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1>
+ 3840222989U, // <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5>
+ 2309382246U, // <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 3725012278U, // <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS
+ 2766481193U, // <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6>
+ 3842656049U, // <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5>
+ 3327010820U, // <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0>
+ 2766702404U, // <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6>
+ 3713073254U, // <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS
+ 3789082310U, // <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2>
+ 3840665439U, // <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6>
+ 2766997352U, // <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6>
+ 3713076534U, // <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS
+ 3791736842U, // <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2>
+ 3373180605U, // <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6>
+ 3793064108U, // <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2>
+ 2767366037U, // <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6>
+ 3701137510U, // <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS
+ 3701138647U, // <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6>
+ 2254898792U, // <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2>
+ 1248264294U, // <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 3701140790U, // <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS
+ 3725029435U, // <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6>
+ 2254899130U, // <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7>
+ 2725294981U, // <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2>
+ 1248264299U, // <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 2633375846U, // <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS
+ 2309407468U, // <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235666536U, // <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161923174U, // <6,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2633379126U, // <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS
+ 2309407796U, // <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309408445U, // <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309407960U, // <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161923179U, // <6,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 2633384038U, // <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS
+ 2309415660U, // <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235674728U, // <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161931366U, // <6,2,u,3>: Cost 1 vmrglw RHS, LHS
+ 2633387318U, // <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS
+ 2769135725U, // <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6>
+ 2309416637U, // <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309416152U, // <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161931371U, // <6,2,u,u>: Cost 1 vmrglw RHS, LHS
+ 3777806336U, // <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0>
+ 2704064614U, // <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 3765862577U, // <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6>
+ 3843393708U, // <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6>
+ 2250516994U, // <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6>
+ 3725054014U, // <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0>
+ 3383093096U, // <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6>
+ 3368495034U, // <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7>
+ 2704065181U, // <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 2251622550U, // <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 3777807156U, // <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1>
+ 3765863348U, // <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3>
+ 3373147762U, // <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3>
+ 3834251525U, // <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5>
+ 3373147683U, // <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5>
+ 3391727545U, // <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6>
+ 2299406266U, // <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7>
+ 2251622550U, // <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 2252294294U, // <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2>
+ 3326036198U, // <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1>
+ 3771836045U, // <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3>
+ 2252294556U, // <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3>
+ 2252294658U, // <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6>
+ 3840739677U, // <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3>
+ 2704066490U, // <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7>
+ 3368511418U, // <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7>
+ 2252294942U, // <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2>
+ 3707158630U, // <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS
+ 3765864692U, // <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6>
+ 2704066918U, // <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3>
+ 2772453788U, // <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3>
+ 2772453799U, // <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5>
+ 3789752888U, // <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6>
+ 3840739770U, // <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6>
+ 2301413306U, // <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7>
+ 2775108043U, // <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5>
+ 2651340902U, // <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS
+ 3846195674U, // <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2>
+ 3845974503U, // <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6>
+ 2651343362U, // <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6>
+ 2651344182U, // <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS
+ 1698712066U, // <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6>
+ 3383125864U, // <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6>
+ 3368527802U, // <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7>
+ 1698933277U, // <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6>
+ 3373179798U, // <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0>
+ 3707176179U, // <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7>
+ 2716012312U, // <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3>
+ 3373180530U, // <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3>
+ 2254309890U, // <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6>
+ 3785773070U, // <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6>
+ 3840739932U, // <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6>
+ 2299439034U, // <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7>
+ 2719994110U, // <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3>
+ 2254899350U, // <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2>
+ 3328641254U, // <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1>
+ 2633443257U, // <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6>
+ 2254899612U, // <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3>
+ 2254899714U, // <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6>
+ 3785773772U, // <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6>
+ 2725966648U, // <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6>
+ 2322007994U, // <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7>
+ 2254899998U, // <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2>
+ 1559707750U, // <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 2633450292U, // <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1>
+ 1559709626U, // <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7>
+ 1235666546U, // <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559711030U, // <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS
+ 2309408291U, // <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2633454152U, // <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0>
+ 1235666874U, // <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559713582U, // <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 1559715942U, // <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 2633458484U, // <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1>
+ 1559717819U, // <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u>
+ 1235674738U, // <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559719222U, // <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS
+ 1701366598U, // <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6>
+ 2633462353U, // <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0>
+ 1235675066U, // <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559721774U, // <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 3785777152U, // <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0>
+ 2712035430U, // <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3771179185U, // <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6>
+ 3846196096U, // <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1>
+ 3785777490U, // <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5>
+ 2250517814U, // <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS
+ 3324259703U, // <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0>
+ 3383092458U, // <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7>
+ 2712035997U, // <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3325356946U, // <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1>
+ 3785777972U, // <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1>
+ 3846196170U, // <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3>
+ 3325365380U, // <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0>
+ 3852168155U, // <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2>
+ 2251615542U, // <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 3325357432U, // <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1>
+ 3870084088U, // <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4>
+ 2251615785U, // <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 2252295058U, // <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1>
+ 3771180605U, // <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4>
+ 3785778792U, // <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2>
+ 3777816253U, // <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6>
+ 2252295376U, // <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4>
+ 1178553654U, // <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 2252295545U, // <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2>
+ 3326037448U, // <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0>
+ 1178553897U, // <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 3785779350U, // <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2>
+ 3383118648U, // <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1>
+ 3777816935U, // <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4>
+ 3785779612U, // <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3>
+ 2712037890U, // <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6>
+ 2252754230U, // <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3784452764U, // <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7>
+ 3801705178U, // <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6>
+ 2252754473U, // <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3787770770U, // <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1>
+ 3383126840U, // <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1>
+ 3327380534U, // <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3>
+ 3784453265U, // <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4>
+ 2253630672U, // <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4>
+ 2778426587U, // <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6>
+ 3383128789U, // <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6>
+ 3381799580U, // <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7>
+ 2778647798U, // <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6>
+ 2651422822U, // <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS
+ 3701277928U, // <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5>
+ 3701278650U, // <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7>
+ 2651425282U, // <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6>
+ 2651426102U, // <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS
+ 2651426892U, // <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5>
+ 1698712886U, // <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3725169658U, // <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2>
+ 1698712904U, // <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2254900114U, // <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1>
+ 3389115192U, // <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1>
+ 3785781727U, // <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3>
+ 3785781810U, // <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5>
+ 2254900432U, // <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4>
+ 1181158710U, // <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2254900605U, // <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6>
+ 3787772750U, // <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1>
+ 1181158953U, // <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2639495270U, // <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS
+ 2639496090U, // <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4>
+ 3707267011U, // <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7>
+ 2639497884U, // <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7>
+ 1237658832U, // <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235666638U, // <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 3713241753U, // <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0>
+ 2309409436U, // <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235666641U, // <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 2639503462U, // <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS
+ 2639504282U, // <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4>
+ 3701303226U, // <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7>
+ 2639506077U, // <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u>
+ 1235676368U, // <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235674830U, // <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 1698713129U, // <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2309417628U, // <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1698713147U, // <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3775832064U, // <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0>
+ 2702090342U, // <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3775832241U, // <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6>
+ 3719227906U, // <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6>
+ 3775832402U, // <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5>
+ 3385085146U, // <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5>
+ 2309351938U, // <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6>
+ 3376459134U, // <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7>
+ 2702090909U, // <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3719233546U, // <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1>
+ 3775832884U, // <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1>
+ 3775832982U, // <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0>
+ 3846196909U, // <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4>
+ 3719236984U, // <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1>
+ 3856150209U, // <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6>
+ 3834252997U, // <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1>
+ 3870084817U, // <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4>
+ 3769861532U, // <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5>
+ 2645500006U, // <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS
+ 3719242548U, // <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1>
+ 3775833704U, // <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2>
+ 3775833766U, // <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1>
+ 2645503353U, // <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2>
+ 2252296196U, // <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5>
+ 2702092218U, // <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7>
+ 3719246842U, // <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2>
+ 2702092405U, // <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5>
+ 3775834262U, // <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2>
+ 3777161495U, // <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5>
+ 3775834470U, // <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3>
+ 3775834524U, // <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3>
+ 3775834626U, // <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6>
+ 3385109722U, // <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5>
+ 2309376514U, // <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3775834819U, // <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1>
+ 2309376514U, // <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3719258214U, // <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS
+ 3385117586U, // <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1>
+ 3327242008U, // <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3>
+ 3719260674U, // <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6>
+ 3719261563U, // <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4>
+ 2702093622U, // <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 2309384706U, // <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6>
+ 3870085060U, // <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4>
+ 2702093865U, // <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 3719266406U, // <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS
+ 3789106889U, // <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5>
+ 3785789208U, // <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3>
+ 3373183950U, // <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3>
+ 2717355964U, // <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5>
+ 2791772164U, // <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5>
+ 2772455438U, // <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6>
+ 3373183549U, // <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7>
+ 2720010496U, // <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5>
+ 2772455460U, // <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1>
+ 2322008978U, // <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1>
+ 3840225335U, // <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2>
+ 2772455490U, // <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4>
+ 2772455500U, // <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5>
+ 2254901252U, // <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5>
+ 2772455520U, // <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7>
+ 2785874024U, // <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6>
+ 2772455532U, // <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1>
+ 2627625062U, // <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS
+ 1235667858U, // <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309409278U, // <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309407659U, // <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627628342U, // <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS
+ 1235668186U, // <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235667458U, // <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309407987U, // <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235667460U, // <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2627633254U, // <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS
+ 1235676050U, // <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309417470U, // <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309415851U, // <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627636534U, // <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS
+ 1235676378U, // <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235675650U, // <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309416179U, // <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235675652U, // <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2309352751U, // <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0>
+ 1650917478U, // <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 2250584570U, // <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3>
+ 3846197554U, // <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1>
+ 2724659538U, // <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5>
+ 3725275225U, // <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0>
+ 2791772493U, // <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1>
+ 2309352758U, // <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 1650918045U, // <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 3325358368U, // <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1>
+ 2299406449U, // <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1>
+ 2724660118U, // <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0>
+ 3373148518U, // <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3>
+ 3834253712U, // <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5>
+ 3373147953U, // <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5>
+ 2323297080U, // <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6>
+ 2299407670U, // <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2299407671U, // <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2252296489U, // <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1>
+ 3326038394U, // <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1>
+ 1178554874U, // <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724660902U, // <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1>
+ 2252296817U, // <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5>
+ 3840741864U, // <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3>
+ 2252296976U, // <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2>
+ 2785874426U, // <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3>
+ 1178554874U, // <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724661398U, // <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2>
+ 3375154665U, // <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1>
+ 3375154909U, // <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2>
+ 2301413734U, // <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3>
+ 2772455986U, // <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5>
+ 3375154993U, // <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5>
+ 2323313464U, // <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6>
+ 2301414710U, // <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2301414711U, // <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2724662162U, // <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1>
+ 3326939559U, // <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1>
+ 2253271546U, // <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3>
+ 3383127346U, // <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3>
+ 2309385523U, // <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4>
+ 1650920758U, // <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 2724662653U, // <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6>
+ 2309385526U, // <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 1650921001U, // <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 3725312102U, // <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS
+ 3373180393U, // <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1>
+ 3791769368U, // <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3>
+ 3373181286U, // <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3>
+ 3725315382U, // <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS
+ 2299439221U, // <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5>
+ 2724663394U, // <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0>
+ 2299440438U, // <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 2299440439U, // <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1583808614U, // <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2254574074U, // <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3>
+ 2322010609U, // <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3>
+ 1583811894U, // <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2322010773U, // <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5>
+ 363253046U, // <6,6,6,6>: Cost 1 vspltisw2 RHS
+ 1248267574U, // <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS
+ 363253046U, // <6,6,6,u>: Cost 1 vspltisw2 RHS
+ 2309410095U, // <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0>
+ 2309408233U, // <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1>
+ 2311402373U, // <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2>
+ 2309409126U, // <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 2309410099U, // <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4>
+ 2309408561U, // <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5>
+ 1237660472U, // <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 161926454U, // <6,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 161926455U, // <6,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 1583808614U, // <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1650923310U, // <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 1178554874U, // <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2309417318U, // <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 1583811894U, // <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1650923674U, // <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 363253046U, // <6,6,u,6>: Cost 1 vspltisw2 RHS
+ 161934646U, // <6,6,u,7>: Cost 1 vmrglw RHS, RHS
+ 161934647U, // <6,6,u,u>: Cost 1 vmrglw RHS, RHS
+ 1638318080U, // <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564576358U, // <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712060077U, // <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712060156U, // <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638318418U, // <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577865314U, // <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0>
+ 2712060406U, // <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2651608058U, // <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2>
+ 564576925U, // <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712060643U, // <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638318900U, // <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638318998U, // <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 3766559753U, // <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7>
+ 2712060971U, // <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712061039U, // <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712061135U, // <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 3373148612U, // <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7>
+ 1638319484U, // <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712061373U, // <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712061471U, // <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638319720U, // <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638319782U, // <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712061709U, // <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712061800U, // <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1638320058U, // <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252297836U, // <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7>
+ 1638320187U, // <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638320278U, // <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712062182U, // <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2712062256U, // <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3>
+ 1638320540U, // <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638320642U, // <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712062546U, // <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712062584U, // <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2712062659U, // <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1>
+ 1638320926U, // <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638321042U, // <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712062922U, // <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712063029U, // <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712063108U, // <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638321360U, // <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564579638U, // <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712063357U, // <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6>
+ 2712063439U, // <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7>
+ 564579881U, // <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712063560U, // <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714054287U, // <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712063742U, // <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 3373181295U, // <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3>
+ 2712063924U, // <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638322180U, // <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638322274U, // <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 3373181380U, // <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7>
+ 1640313092U, // <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712064289U, // <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712064423U, // <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638322682U, // <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712064562U, // <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712064653U, // <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712064747U, // <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638323000U, // <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638323022U, // <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638323168U, // <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3>
+ 1237659746U, // <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309411158U, // <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1>
+ 2639718330U, // <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7>
+ 1235669498U, // <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1237659750U, // <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309411243U, // <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5>
+ 1583895362U, // <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7>
+ 1235669826U, // <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 1235669503U, // <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u>
+ 1638323923U, // <6,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564582190U, // <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638324101U, // <6,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638324156U, // <6,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638324287U, // <6,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564582554U, // <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638324432U, // <6,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 1235678018U, // <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 564582757U, // <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 1638326272U, // <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564584550U, // <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712068269U, // <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2309349532U, // <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 1638326610U, // <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577939051U, // <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0>
+ 2712068598U, // <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2309352776U, // <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 564585117U, // <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712068835U, // <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638327092U, // <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1698715438U, // <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2299404444U, // <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2712069163U, // <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712069231U, // <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712069327U, // <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 2299407688U, // <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 1698715492U, // <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2712069565U, // <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 1178556206U, // <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 1638327912U, // <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638327974U, // <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712069901U, // <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 1178556570U, // <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 1638328250U, // <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252298496U, // <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, <u,7,0,1>
+ 1638328379U, // <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638328470U, // <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712070374U, // <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2704107883U, // <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u>
+ 1638328732U, // <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638328834U, // <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712070738U, // <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712070776U, // <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2301414728U, // <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 1638329118U, // <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638329234U, // <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712071114U, // <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712071221U, // <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2309382300U, // <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 1638329552U, // <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564587831U, // <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712071545U, // <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2309385544U, // <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 564588073U, // <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712071752U, // <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714062479U, // <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712071934U, // <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2299437212U, // <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS
+ 2712072116U, // <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638330372U, // <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1698715802U, // <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2299440456U, // <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1698715820U, // <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 1583808614U, // <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1181161262U, // <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1638330874U, // <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1248264348U, // <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 1583811894U, // <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1181161626U, // <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 363253046U, // <6,u,6,6>: Cost 1 vspltisw2 RHS
+ 1638331214U, // <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 363253046U, // <6,u,6,u>: Cost 1 vspltisw2 RHS
+ 1560076390U, // <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS
+ 1235664969U, // <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1560078311U, // <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7>
+ 161923228U, // <6,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 1560079670U, // <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS
+ 1235665297U, // <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235667485U, // <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 161926472U, // <6,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 161923233U, // <6,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 1560084582U, // <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS
+ 564590382U, // <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1560086504U, // <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u>
+ 161931420U, // <6,u,u,3>: Cost 1 vmrglw RHS, LHS
+ 1560087862U, // <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS
+ 564590746U, // <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 363253046U, // <6,u,u,6>: Cost 1 vspltisw2 RHS
+ 161934664U, // <6,u,u,7>: Cost 1 vmrglw RHS, RHS
+ 161931425U, // <6,u,u,u>: Cost 1 vmrglw RHS, LHS
+ 1705426944U, // <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705426954U, // <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1>
+ 3713550266U, // <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7>
+ 2316063892U, // <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3>
+ 2779168805U, // <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1>
+ 2663698530U, // <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2657727309U, // <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0>
+ 2316064220U, // <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7>
+ 1705427017U, // <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1>
+ 1583988838U, // <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS
+ 2779168859U, // <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1>
+ 631685222U, // <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639817411U, // <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1>
+ 1583992118U, // <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS
+ 2657734660U, // <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5>
+ 1583993678U, // <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1>
+ 2657735672U, // <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0>
+ 631685276U, // <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779168933U, // <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3>
+ 2767667377U, // <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6>
+ 2718713448U, // <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2>
+ 2718713510U, // <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1>
+ 3841409228U, // <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6>
+ 3852910802U, // <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3>
+ 2718713786U, // <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7>
+ 3847160036U, // <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3>
+ 2767667440U, // <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6>
+ 2718714006U, // <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2>
+ 2779169020U, // <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0>
+ 3852910853U, // <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0>
+ 2718714268U, // <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3>
+ 2718714370U, // <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6>
+ 2718714461U, // <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7>
+ 2706770608U, // <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0>
+ 3847160114U, // <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0>
+ 2779169083U, // <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0>
+ 2718714770U, // <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1>
+ 1705427282U, // <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5>
+ 3713583034U, // <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7>
+ 3713583814U, // <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4>
+ 2779169133U, // <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5>
+ 1644973366U, // <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 2657760081U, // <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4>
+ 2259468868U, // <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4>
+ 1705427345U, // <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5>
+ 2718715508U, // <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1>
+ 2260123750U, // <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS
+ 3792457451U, // <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3>
+ 3852911024U, // <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0>
+ 2718715836U, // <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5>
+ 2718715908U, // <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5>
+ 1644974178U, // <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0>
+ 3792457853U, // <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0>
+ 1646301444U, // <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0>
+ 2720706901U, // <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0>
+ 2779169270U, // <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7>
+ 2718716410U, // <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3>
+ 2722697800U, // <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0>
+ 3852911121U, // <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7>
+ 3852911130U, // <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7>
+ 2718716728U, // <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6>
+ 2718716750U, // <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1>
+ 2779169333U, // <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7>
+ 2718716922U, // <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2>
+ 1187872870U, // <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2718717076U, // <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3>
+ 3847160408U, // <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6>
+ 2718717286U, // <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6>
+ 2718717377U, // <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7>
+ 2718717404U, // <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7>
+ 2718717478U, // <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0>
+ 1187873437U, // <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 1584046182U, // <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS
+ 1705427602U, // <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1>
+ 631685789U, // <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639874762U, // <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u>
+ 1584049462U, // <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS
+ 1644976282U, // <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 1584051029U, // <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u>
+ 2718718208U, // <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, <u,7,0,1>
+ 631685843U, // <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 2721374218U, // <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1>
+ 2779169507U, // <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1>
+ 2779169516U, // <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1>
+ 3852911348U, // <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0>
+ 2669743414U, // <7,1,0,4>: Cost 3 vsldoi4 <u,7,1,0>, RHS
+ 2316058962U, // <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5>
+ 2316059044U, // <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6>
+ 2669745146U, // <7,1,0,7>: Cost 3 vsldoi4 <u,7,1,0>, <7,0,1,2>
+ 2779169570U, // <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1>
+ 2779169579U, // <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1>
+ 1705427764U, // <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169598U, // <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2>
+ 3713632972U, // <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1>
+ 2779169619U, // <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5>
+ 2779169628U, // <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5>
+ 2657809239U, // <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1>
+ 3835290474U, // <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1>
+ 1705427764U, // <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169660U, // <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1>
+ 2779169671U, // <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3>
+ 2779169680U, // <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3>
+ 1705427862U, // <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0>
+ 2779169700U, // <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5>
+ 2779169707U, // <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3>
+ 2657817432U, // <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2>
+ 2803057594U, // <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0>
+ 1705427907U, // <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0>
+ 3776538827U, // <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1>
+ 2319400970U, // <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1>
+ 2316085398U, // <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2>
+ 3852911591U, // <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0>
+ 3852911600U, // <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0>
+ 2319401298U, // <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5>
+ 3833668617U, // <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7>
+ 3367265487U, // <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7>
+ 2319400977U, // <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u>
+ 2724031378U, // <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1>
+ 2779169835U, // <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5>
+ 2779169844U, // <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5>
+ 3852911672U, // <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0>
+ 2669776182U, // <7,1,4,4>: Cost 3 vsldoi4 <u,7,1,4>, RHS
+ 2779169872U, // <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6>
+ 3835290712U, // <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5>
+ 2669778278U, // <7,1,4,7>: Cost 3 vsldoi4 <u,7,1,4>, <7,4,5,6>
+ 2779169898U, // <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5>
+ 2779169903U, // <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1>
+ 3835585661U, // <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6>
+ 3841410182U, // <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6>
+ 3852911753U, // <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0>
+ 2779169943U, // <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5>
+ 2318754130U, // <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5>
+ 2718724195U, // <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1>
+ 3859178670U, // <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1>
+ 2779169975U, // <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1>
+ 2720715094U, // <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1>
+ 2761549007U, // <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7>
+ 2779170008U, // <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7>
+ 3835438305U, // <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7>
+ 3835512042U, // <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7>
+ 2761843955U, // <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7>
+ 3835659516U, // <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7>
+ 2803057918U, // <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0>
+ 2762065166U, // <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7>
+ 2669797478U, // <7,1,7,0>: Cost 3 vsldoi4 <u,7,1,7>, LHS
+ 2322087946U, // <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1>
+ 2317448186U, // <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2>
+ 3395829934U, // <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3>
+ 2669800758U, // <7,1,7,4>: Cost 3 vsldoi4 <u,7,1,7>, RHS
+ 2322088274U, // <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5>
+ 3375923377U, // <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6>
+ 2731996780U, // <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7>
+ 2322087953U, // <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u>
+ 2779170146U, // <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1>
+ 1705427764U, // <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779170164U, // <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1>
+ 1705428348U, // <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0>
+ 2779170186U, // <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5>
+ 2763171221U, // <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7>
+ 2657866590U, // <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u>
+ 2803058080U, // <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0>
+ 1705428393U, // <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0>
+ 3713695846U, // <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS
+ 2779170237U, // <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2>
+ 2779170245U, // <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1>
+ 1242316902U, // <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3713699126U, // <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS
+ 3852912096U, // <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1>
+ 2767668713U, // <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1>
+ 2256488426U, // <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1>
+ 1242316907U, // <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3852912132U, // <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1>
+ 3852912141U, // <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1>
+ 3852912149U, // <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0>
+ 2779170335U, // <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1>
+ 3852912172U, // <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5>
+ 3840747062U, // <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6>
+ 3841410617U, // <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0>
+ 3795125538U, // <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0>
+ 2779170380U, // <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1>
+ 2779170389U, // <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1>
+ 3852912222U, // <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1>
+ 1705428584U, // <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705428594U, // <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3>
+ 2779170429U, // <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5>
+ 3852912259U, // <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2>
+ 2767668880U, // <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6>
+ 3841336981U, // <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2>
+ 1705428639U, // <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3>
+ 1705428646U, // <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1>
+ 2779170479U, // <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1>
+ 2767668925U, // <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6>
+ 1245659238U, // <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705428686U, // <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5>
+ 2779170519U, // <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5>
+ 2657899362U, // <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3>
+ 2319406574U, // <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7>
+ 1705428718U, // <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1>
+ 3713728614U, // <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS
+ 3852912388U, // <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5>
+ 2779170573U, // <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5>
+ 1242349670U, // <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3713731894U, // <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS
+ 2779170601U, // <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6>
+ 2767669041U, // <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5>
+ 3389834456U, // <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7>
+ 1242349675U, // <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3852912456U, // <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1>
+ 3852912466U, // <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2>
+ 3852912475U, // <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2>
+ 2779170664U, // <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6>
+ 3852912496U, // <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5>
+ 3792474116U, // <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5>
+ 2718732388U, // <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2>
+ 3841337228U, // <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6>
+ 2779170709U, // <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6>
+ 2640003174U, // <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS
+ 2721386920U, // <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2>
+ 2767595441U, // <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7>
+ 1693927354U, // <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7>
+ 2640006454U, // <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS
+ 3841558476U, // <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7>
+ 2657923941U, // <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6>
+ 3841337310U, // <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7>
+ 1694296039U, // <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7>
+ 2803058666U, // <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1>
+ 3852912632U, // <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6>
+ 2322089576U, // <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2>
+ 1248346214U, // <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 3841337362U, // <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5>
+ 3395830836U, // <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5>
+ 2261616570U, // <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7>
+ 3371943857U, // <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7>
+ 1248346219U, // <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705429051U, // <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1>
+ 2779170884U, // <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1>
+ 1705428584U, // <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1695254620U, // <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7>
+ 1705429091U, // <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5>
+ 2779170924U, // <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5>
+ 2767669361U, // <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1>
+ 2803058809U, // <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0>
+ 1695623305U, // <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7>
+ 2779170955U, // <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0>
+ 1705429142U, // <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2>
+ 2634057732U, // <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0>
+ 2779170983U, // <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1>
+ 2779170992U, // <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1>
+ 3852912829U, // <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5>
+ 2657948520U, // <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0>
+ 2316060602U, // <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7>
+ 1705429205U, // <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2>
+ 3852912860U, // <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0>
+ 2779171046U, // <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1>
+ 2779171057U, // <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3>
+ 3852912887U, // <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0>
+ 3852912896U, // <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0>
+ 3852912905U, // <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0>
+ 3835291923U, // <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1>
+ 3841411356U, // <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1>
+ 2779171111U, // <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3>
+ 2779171120U, // <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3>
+ 3852912952U, // <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2>
+ 2779171137U, // <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2>
+ 2779171144U, // <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0>
+ 2779171156U, // <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3>
+ 3852912989U, // <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3>
+ 2767669606U, // <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3>
+ 2767669615U, // <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3>
+ 2779171189U, // <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0>
+ 2779171198U, // <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0>
+ 3852913032U, // <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1>
+ 2704140655U, // <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3>
+ 1705429404U, // <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171238U, // <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4>
+ 3852913070U, // <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3>
+ 2657973099U, // <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3>
+ 2767669700U, // <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7>
+ 1705429404U, // <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171280U, // <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1>
+ 2779171290U, // <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2>
+ 2634090504U, // <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4>
+ 2779171311U, // <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5>
+ 2779171319U, // <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4>
+ 1705429506U, // <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6>
+ 2722057593U, // <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2>
+ 2316093370U, // <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7>
+ 1705429533U, // <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6>
+ 3852913185U, // <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1>
+ 3795799695U, // <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1>
+ 3852913203U, // <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1>
+ 3852913214U, // <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3>
+ 3852913225U, // <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5>
+ 2779171410U, // <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5>
+ 2718740581U, // <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3>
+ 3841411685U, // <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6>
+ 2720067847U, // <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3>
+ 2773420664U, // <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7>
+ 3847236225U, // <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7>
+ 1648316922U, // <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3>
+ 2773641875U, // <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7>
+ 2773715612U, // <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7>
+ 3847531173U, // <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7>
+ 2722059024U, // <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2>
+ 2767669943U, // <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7>
+ 1652298720U, // <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3>
+ 2767669955U, // <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1>
+ 3841411788U, // <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1>
+ 2767669978U, // <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6>
+ 2722059546U, // <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2>
+ 2767669995U, // <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5>
+ 3852913396U, // <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5>
+ 2722059758U, // <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7>
+ 2302183354U, // <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7>
+ 2767670027U, // <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1>
+ 2774747930U, // <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7>
+ 1705429790U, // <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2>
+ 1660262316U, // <7,3,u,2>: Cost 2 vsldoi8 <u,2,7,3>, <u,2,7,3>
+ 1705429404U, // <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2775042878U, // <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7>
+ 1705429830U, // <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6>
+ 2779171660U, // <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3>
+ 2767670101U, // <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3>
+ 1705429853U, // <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2>
+ 2718744576U, // <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0>
+ 1645002854U, // <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 3852913527U, // <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1>
+ 3852913536U, // <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1>
+ 2316061904U, // <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4>
+ 1705429906U, // <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658022257U, // <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0>
+ 2256489928U, // <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1707420589U, // <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1>
+ 3852913590U, // <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1>
+ 2718745396U, // <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1>
+ 2779171786U, // <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3>
+ 3852913616U, // <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0>
+ 3852913627U, // <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2>
+ 2779171810U, // <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0>
+ 3792487631U, // <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7>
+ 3394456220U, // <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7>
+ 2779171837U, // <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0>
+ 3852913673U, // <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3>
+ 3852913682U, // <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3>
+ 2718746216U, // <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2>
+ 2718746278U, // <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1>
+ 2779171885U, // <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3>
+ 2779171893U, // <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2>
+ 2718746554U, // <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7>
+ 3847457864U, // <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3>
+ 2779171921U, // <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3>
+ 2718746774U, // <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2>
+ 3852913762U, // <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2>
+ 3852913772U, // <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3>
+ 2718747036U, // <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3>
+ 2718747138U, // <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6>
+ 2779171972U, // <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0>
+ 2706803380U, // <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4>
+ 3847457946U, // <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4>
+ 2781162655U, // <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0>
+ 2718747538U, // <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1>
+ 3852913842U, // <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1>
+ 3852913852U, // <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2>
+ 2316096696U, // <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3>
+ 1705430224U, // <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705430234U, // <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5>
+ 2658055029U, // <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4>
+ 2316097024U, // <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7>
+ 1707420917U, // <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5>
+ 1584316518U, // <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS
+ 2658059060U, // <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1>
+ 2640144314U, // <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7>
+ 2640145131U, // <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5>
+ 1584319798U, // <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS
+ 2779172134U, // <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0>
+ 631688502U, // <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2658063354U, // <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2>
+ 631688520U, // <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 3852914001U, // <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7>
+ 3852914010U, // <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7>
+ 2718749178U, // <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3>
+ 2722730572U, // <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4>
+ 2723394205U, // <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4>
+ 2779172221U, // <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6>
+ 2718749496U, // <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6>
+ 2718749518U, // <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1>
+ 2779172249U, // <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7>
+ 2718749690U, // <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2>
+ 3847458214U, // <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2>
+ 2718749880U, // <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3>
+ 3847458236U, // <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6>
+ 2718750004U, // <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1>
+ 1187876150U, // <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2718750208U, // <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7>
+ 2718750286U, // <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4>
+ 1187876393U, // <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 1584341094U, // <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS
+ 1645008686U, // <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 2640168890U, // <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7>
+ 2640169710U, // <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u>
+ 1584344374U, // <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS
+ 1705430554U, // <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1>
+ 631688745U, // <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 2718750976U, // <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, <u,7,0,1>
+ 631688763U, // <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS
+ 2646147174U, // <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS
+ 2779172424U, // <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2>
+ 3852914258U, // <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3>
+ 3852914268U, // <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4>
+ 2779172450U, // <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1>
+ 2316061914U, // <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5>
+ 2316061186U, // <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6>
+ 2646152186U, // <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2>
+ 2779172486U, // <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1>
+ 2781163151U, // <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1>
+ 2321378194U, // <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1>
+ 3852914339U, // <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3>
+ 3852914350U, // <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5>
+ 2781163191U, // <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5>
+ 3852914363U, // <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0>
+ 3835588297U, // <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5>
+ 3835588306U, // <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5>
+ 2781163223U, // <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1>
+ 3852914400U, // <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1>
+ 2781163243U, // <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3>
+ 3852914419U, // <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2>
+ 2779172606U, // <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4>
+ 3780552497U, // <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5>
+ 2781163279U, // <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2779172632U, // <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3>
+ 3835588385U, // <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3>
+ 2779172650U, // <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3>
+ 3852914481U, // <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1>
+ 2319403922U, // <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1>
+ 2319404409U, // <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 3852914510U, // <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3>
+ 3779226131U, // <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5>
+ 2319404250U, // <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5>
+ 2319403522U, // <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6>
+ 3852914547U, // <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4>
+ 2319403524U, // <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u>
+ 2646179942U, // <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS
+ 2316094354U, // <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1>
+ 3852914582U, // <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3>
+ 3852914592U, // <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4>
+ 2646183372U, // <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4>
+ 2779172788U, // <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6>
+ 2316093954U, // <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6>
+ 2646185318U, // <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6>
+ 2779172815U, // <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6>
+ 2781163475U, // <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1>
+ 2781163484U, // <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1>
+ 3852914662U, // <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2>
+ 3852914672U, // <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3>
+ 2781163515U, // <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5>
+ 1705431044U, // <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172878U, // <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6>
+ 3835588632U, // <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7>
+ 1705431044U, // <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172900U, // <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1>
+ 2781163571U, // <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7>
+ 3852914743U, // <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2>
+ 2779172930U, // <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4>
+ 2779172940U, // <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5>
+ 2781163607U, // <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 2779172960U, // <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7>
+ 1705431138U, // <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0>
+ 1705578603U, // <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0>
+ 2646204518U, // <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2322090898U, // <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1>
+ 3719947880U, // <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2>
+ 3719948438U, // <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2>
+ 2646207951U, // <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7>
+ 2322091226U, // <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5>
+ 2322090498U, // <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6>
+ 2646210156U, // <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7>
+ 2646210350U, // <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2779173062U, // <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1>
+ 2779173072U, // <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2>
+ 2319404409U, // <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 2779173092U, // <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4>
+ 2779173101U, // <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4>
+ 1705431044U, // <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779173118U, // <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3>
+ 1705578756U, // <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0>
+ 1707421965U, // <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0>
+ 3852914966U, // <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0>
+ 2779173153U, // <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2>
+ 2256491002U, // <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3>
+ 3852914994U, // <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1>
+ 3852915003U, // <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1>
+ 2316062652U, // <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316063544U, // <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6>
+ 1242320182U, // <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1242320183U, // <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 3852915048U, // <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1>
+ 3377866217U, // <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1>
+ 3852915068U, // <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3>
+ 3833672072U, // <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6>
+ 3852915088U, // <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5>
+ 3395122056U, // <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5>
+ 3389813560U, // <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6>
+ 2779173287U, // <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1>
+ 2779320752U, // <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1>
+ 2658181222U, // <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS
+ 3852915140U, // <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3>
+ 2257973754U, // <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3>
+ 3841413589U, // <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2>
+ 2658184502U, // <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS
+ 3852915176U, // <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3>
+ 2658186117U, // <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2>
+ 1705431546U, // <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3>
+ 1705579011U, // <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3>
+ 3714015334U, // <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS
+ 3777243425U, // <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6>
+ 2319405957U, // <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2>
+ 3375229286U, // <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3>
+ 2779173426U, // <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5>
+ 3375228721U, // <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5>
+ 2319405880U, // <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6>
+ 1245662518U, // <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1245662519U, // <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 3852915291U, // <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1>
+ 3389834729U, // <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1>
+ 2259472890U, // <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3>
+ 3852915321U, // <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4>
+ 3852915330U, // <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4>
+ 2779173517U, // <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6>
+ 2316096312U, // <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6>
+ 1242352950U, // <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1242352951U, // <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 3852915372U, // <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1>
+ 3835294392U, // <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4>
+ 3852915395U, // <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6>
+ 3852915404U, // <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6>
+ 3852915412U, // <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5>
+ 3377899313U, // <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5>
+ 2718765160U, // <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6>
+ 2779173611U, // <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1>
+ 2779321076U, // <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1>
+ 2658213990U, // <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS
+ 3852915462U, // <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1>
+ 2718765562U, // <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3>
+ 3714042622U, // <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6>
+ 2658217270U, // <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS
+ 2724074224U, // <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6>
+ 1705431864U, // <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705431874U, // <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7>
+ 1705579339U, // <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7>
+ 1705431886U, // <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1>
+ 2779173719U, // <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1>
+ 2779173729U, // <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2>
+ 2779173736U, // <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0>
+ 1705431926U, // <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5>
+ 2779173759U, // <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5>
+ 2779173765U, // <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2>
+ 1248349494U, // <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS
+ 1705431958U, // <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1>
+ 1705579423U, // <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1>
+ 2779173801U, // <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2>
+ 2779321266U, // <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2>
+ 2779321273U, // <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0>
+ 1705579463U, // <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5>
+ 2779173841U, // <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6>
+ 1705431864U, // <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705432032U, // <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3>
+ 1705579495U, // <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1>
+ 1242320994U, // <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705432058U, // <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2>
+ 3841414146U, // <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1>
+ 2316063226U, // <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3>
+ 2779173908U, // <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1>
+ 2658242658U, // <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0>
+ 2658243468U, // <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0>
+ 2316063554U, // <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7>
+ 1705432121U, // <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2>
+ 3852915777U, // <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1>
+ 2779173962U, // <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1>
+ 2779173973U, // <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3>
+ 3389813242U, // <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3>
+ 3852915813U, // <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1>
+ 3852915821U, // <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0>
+ 3835294839U, // <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1>
+ 2329343596U, // <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7>
+ 2779174027U, // <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3>
+ 2803061908U, // <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3>
+ 3852915869U, // <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3>
+ 2779174053U, // <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2>
+ 2779174060U, // <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0>
+ 2803061944U, // <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3>
+ 3852915905U, // <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3>
+ 2767672522U, // <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3>
+ 2791855315U, // <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3>
+ 2768999644U, // <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3>
+ 2779174115U, // <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1>
+ 3852915948U, // <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1>
+ 3841414394U, // <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6>
+ 1245663738U, // <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174155U, // <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5>
+ 3852915988U, // <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5>
+ 2706827959U, // <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7>
+ 2319405890U, // <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7>
+ 1245663738U, // <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174200U, // <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5>
+ 3852916030U, // <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2>
+ 3714099130U, // <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7>
+ 2316095994U, // <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3>
+ 1242353766U, // <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705432422U, // <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6>
+ 2658276240U, // <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4>
+ 2316096322U, // <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7>
+ 1705432449U, // <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6>
+ 3852916101U, // <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1>
+ 3854906765U, // <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0>
+ 3852916121U, // <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3>
+ 3389846010U, // <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3>
+ 3852916141U, // <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5>
+ 2779174326U, // <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5>
+ 2779174337U, // <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7>
+ 2329376364U, // <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7>
+ 2779321811U, // <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7>
+ 2658287718U, // <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS
+ 3852916197U, // <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7>
+ 2779174382U, // <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7>
+ 2316112378U, // <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3>
+ 2658290998U, // <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS
+ 3852916233U, // <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7>
+ 1651004226U, // <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7>
+ 2779174420U, // <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0>
+ 1652331492U, // <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7>
+ 1590526054U, // <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS
+ 2328728623U, // <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1>
+ 2724746451U, // <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3>
+ 2322092538U, // <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3>
+ 1590529334U, // <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS
+ 2328728951U, // <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5>
+ 2724746770U, // <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7>
+ 430361910U, // <7,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,7,u>: Cost 1 vspltisw3 RHS
+ 1242320994U, // <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705580162U, // <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2>
+ 2779321996U, // <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3>
+ 1245663738U, // <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 1242353766U, // <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705580202U, // <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6>
+ 1662949620U, // <7,7,u,6>: Cost 2 vsldoi8 <u,6,7,7>, <u,6,7,7>
+ 430361910U, // <7,7,u,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,u,u>: Cost 1 vspltisw3 RHS
+ 1705426944U, // <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705432787U, // <7,u,0,1>: Cost 2 vsldoi12 RHS, <u,0,1,2>
+ 2316060885U, // <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2>
+ 1242316956U, // <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 2779174637U, // <7,u,0,4>: Cost 3 vsldoi12 RHS, <u,0,4,1>
+ 1182750874U, // <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS
+ 2316061213U, // <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6>
+ 1242320200U, // <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1705432850U, // <7,u,0,u>: Cost 2 vsldoi12 RHS, <u,0,u,2>
+ 1584578662U, // <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS
+ 1705427764U, // <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 631691054U, // <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2640407307U, // <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1>
+ 1584581942U, // <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS
+ 2779174726U, // <7,u,1,5>: Cost 3 vsldoi12 RHS, <u,1,5,0>
+ 1584583574U, // <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1>
+ 2779322201U, // <7,u,1,7>: Cost 3 vsldoi12 RHS, <u,1,7,1>
+ 631691108U, // <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779174763U, // <7,u,2,0>: Cost 3 vsldoi12 RHS, <u,2,0,1>
+ 2779174774U, // <7,u,2,1>: Cost 3 vsldoi12 RHS, <u,2,1,3>
+ 1705428584U, // <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705432965U, // <7,u,2,3>: Cost 2 vsldoi12 RHS, <u,2,3,0>
+ 2779174801U, // <7,u,2,4>: Cost 3 vsldoi12 RHS, <u,2,4,3>
+ 2779174810U, // <7,u,2,5>: Cost 3 vsldoi12 RHS, <u,2,5,3>
+ 2767673251U, // <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <u,2,6,3>
+ 1705580460U, // <7,u,2,7>: Cost 2 vsldoi12 RHS, <u,2,7,3>
+ 1705433010U, // <7,u,2,u>: Cost 2 vsldoi12 RHS, <u,2,u,0>
+ 1705433020U, // <7,u,3,0>: Cost 2 vsldoi12 RHS, <u,3,0,1>
+ 2779174853U, // <7,u,3,1>: Cost 3 vsldoi12 RHS, <u,3,1,1>
+ 2767673299U, // <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <u,3,2,6>
+ 1245659292U, // <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705433060U, // <7,u,3,4>: Cost 2 vsldoi12 RHS, <u,3,4,5>
+ 2779174893U, // <7,u,3,5>: Cost 3 vsldoi12 RHS, <u,3,5,5>
+ 2706836152U, // <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u>
+ 1245662536U, // <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1705433092U, // <7,u,3,u>: Cost 2 vsldoi12 RHS, <u,3,u,1>
+ 2779174925U, // <7,u,4,0>: Cost 3 vsldoi12 RHS, <u,4,0,1>
+ 1185732398U, // <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS
+ 2316093653U, // <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2>
+ 1242349724U, // <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 1705430224U, // <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705433151U, // <7,u,4,5>: Cost 2 vsldoi12 RHS, <u,4,5,6>
+ 2316093981U, // <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6>
+ 1242352968U, // <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1705433178U, // <7,u,4,u>: Cost 2 vsldoi12 RHS, <u,4,u,6>
+ 1584611430U, // <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS
+ 2781165670U, // <7,u,5,1>: Cost 3 vsldoi12 RHS, <u,5,1,0>
+ 2640439226U, // <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7>
+ 2640440079U, // <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5>
+ 1584614710U, // <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS
+ 1705431044U, // <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 631691418U, // <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2779322525U, // <7,u,5,7>: Cost 3 vsldoi12 RHS, <u,5,7,1>
+ 631691436U, // <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 2779175087U, // <7,u,6,0>: Cost 3 vsldoi12 RHS, <u,6,0,1>
+ 2779175102U, // <7,u,6,1>: Cost 3 vsldoi12 RHS, <u,6,1,7>
+ 1648357887U, // <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u>
+ 1705433296U, // <7,u,6,3>: Cost 2 vsldoi12 RHS, <u,6,3,7>
+ 2779175127U, // <7,u,6,4>: Cost 3 vsldoi12 RHS, <u,6,4,5>
+ 2779175138U, // <7,u,6,5>: Cost 3 vsldoi12 RHS, <u,6,5,7>
+ 1651012419U, // <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u>
+ 1705580788U, // <7,u,6,7>: Cost 2 vsldoi12 RHS, <u,6,7,7>
+ 1705433341U, // <7,u,6,u>: Cost 2 vsldoi12 RHS, <u,6,u,7>
+ 1705580800U, // <7,u,7,0>: Cost 2 vsldoi12 RHS, <u,7,0,1>
+ 1187878702U, // <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2768042263U, // <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, <u,7,2,6>
+ 1248346268U, // <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705580840U, // <7,u,7,4>: Cost 2 vsldoi12 RHS, <u,7,4,5>
+ 1187879066U, // <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2779322679U, // <7,u,7,6>: Cost 3 vsldoi12 RHS, <u,7,6,2>
+ 430361910U, // <7,u,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,u,7,u>: Cost 1 vspltisw3 RHS
+ 1705433425U, // <7,u,u,0>: Cost 2 vsldoi12 RHS, <u,u,0,1>
+ 1705433435U, // <7,u,u,1>: Cost 2 vsldoi12 RHS, <u,u,1,2>
+ 631691621U, // <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 1705433451U, // <7,u,u,3>: Cost 2 vsldoi12 RHS, <u,u,3,0>
+ 1705433465U, // <7,u,u,4>: Cost 2 vsldoi12 RHS, <u,u,4,5>
+ 1705433475U, // <7,u,u,5>: Cost 2 vsldoi12 RHS, <u,u,5,6>
+ 631691661U, // <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 430361910U, // <7,u,u,7>: Cost 1 vspltisw3 RHS
+ 631691675U, // <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 202162278U, // <u,0,0,0>: Cost 1 vspltisw0 LHS
+ 1678598154U, // <u,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2634500154U, // <u,0,0,2>: Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0>
+ 2289596269U, // <u,0,0,3>: Cost 3 vmrglw <1,2,u,0>, <u,2,0,3>
+ 1548815670U, // <u,0,0,4>: Cost 2 vsldoi4 <0,u,0,0>, RHS
+ 2663698530U, // <u,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2658390942U, // <u,0,0,6>: Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0>
+ 2289596597U, // <u,0,0,7>: Cost 3 vmrglw <1,2,u,0>, <u,6,0,7>
+ 202162278U, // <u,0,0,u>: Cost 1 vspltisw0 LHS
+ 1560764518U, // <u,0,1,0>: Cost 2 vsldoi4 <2,u,0,1>, LHS
+ 115720294U, // <u,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 604856427U, // <u,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2634508438U, // <u,0,1,3>: Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2>
+ 1560767798U, // <u,0,1,4>: Cost 2 vsldoi4 <2,u,0,1>, RHS
+ 2652426438U, // <u,0,1,5>: Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1>
+ 1584657311U, // <u,0,1,6>: Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1>
+ 2658399226U, // <u,0,1,7>: Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2>
+ 604856476U, // <u,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696889850U, // <u,0,2,0>: Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0>
+ 1190174822U, // <u,0,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 2692245096U, // <u,0,2,2>: Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2>
+ 2692245158U, // <u,0,2,3>: Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1>
+ 2263916882U, // <u,0,2,4>: Cost 3 vmrghw <u,2,3,0>, <0,4,1,5>
+ 2299709908U, // <u,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 2692245434U, // <u,0,2,6>: Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7>
+ 2701535281U, // <u,0,2,7>: Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0>
+ 1190175389U, // <u,0,2,u>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 1209237504U, // <u,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1209239206U, // <u,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2704189813U, // <u,0,3,2>: Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0>
+ 2692245916U, // <u,0,3,3>: Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3>
+ 2282981033U, // <u,0,3,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2664386658U, // <u,0,3,5>: Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0>
+ 2691877496U, // <u,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 2664388218U, // <u,0,3,7>: Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3>
+ 1209239213U, // <u,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 2289623040U, // <u,0,4,0>: Cost 3 vmrglw <1,2,u,4>, <0,0,0,0>
+ 1678598482U, // <u,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2634532926U, // <u,0,4,2>: Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4>
+ 2235580672U, // <u,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 1143619922U, // <u,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1618505014U, // <u,0,4,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 2658423714U, // <u,0,4,6>: Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4>
+ 2713259464U, // <u,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1683243409U, // <u,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 1192443904U, // <u,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 118702182U, // <u,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2266185901U, // <u,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2640513816U, // <u,0,5,3>: Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5>
+ 1192444242U, // <u,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2718789636U, // <u,0,5,5>: Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5>
+ 1645047915U, // <u,0,5,6>: Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0>
+ 2664404604U, // <u,0,5,7>: Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5>
+ 118702749U, // <u,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 2302910464U, // <u,0,6,0>: Cost 3 vmrglw <3,4,u,6>, <0,0,0,0>
+ 1192886374U, // <u,0,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 2718790138U, // <u,0,6,2>: Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3>
+ 2722771537U, // <u,0,6,3>: Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0>
+ 2266628434U, // <u,0,6,4>: Cost 3 vmrghw <u,6,3,7>, <0,4,1,5>
+ 2248950180U, // <u,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 2718790456U, // <u,0,6,6>: Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6>
+ 2718790478U, // <u,0,6,7>: Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1>
+ 1192886941U, // <u,0,6,u>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1235812352U, // <u,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235814054U, // <u,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 2728080601U, // <u,0,7,2>: Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0>
+ 2640530202U, // <u,0,7,3>: Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7>
+ 2640530742U, // <u,0,7,4>: Cost 3 vsldoi4 <3,u,0,7>, RHS
+ 2309556692U, // <u,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 2730735133U, // <u,0,7,6>: Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0>
+ 2309556856U, // <u,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235814061U, // <u,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 202162278U, // <u,0,u,0>: Cost 1 vspltisw0 LHS
+ 120365158U, // <u,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 604856989U, // <u,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2692249532U, // <u,0,u,3>: Cost 3 vsldoi8 <1,2,u,0>, <u,3,0,1>
+ 1560825142U, // <u,0,u,4>: Cost 2 vsldoi4 <2,u,0,u>, RHS
+ 1618507930U, // <u,0,u,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 1584714662U, // <u,0,u,6>: Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u>
+ 2309565048U, // <u,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 604857043U, // <u,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 1611210825U, // <u,1,0,0>: Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1>
+ 1616519270U, // <u,1,0,1>: Cost 2 vsldoi8 <0,u,u,1>, LHS
+ 2287605459U, // <u,1,0,2>: Cost 3 vmrglw <0,u,u,0>, <u,0,1,2>
+ 2640546588U, // <u,1,0,3>: Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0>
+ 2622631222U, // <u,1,0,4>: Cost 3 vsldoi4 <0,u,1,0>, RHS
+ 2289590610U, // <u,1,0,5>: Cost 3 vmrglw <1,2,u,0>, <0,4,1,5>
+ 2664436630U, // <u,1,0,6>: Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1>
+ 2664437376U, // <u,1,0,7>: Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0>
+ 1616519889U, // <u,1,0,u>: Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1>
+ 1548894866U, // <u,1,1,0>: Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1>
+ 269271142U, // <u,1,1,1>: Cost 1 vspltisw1 LHS
+ 1189462934U, // <u,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2622638230U, // <u,1,1,3>: Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2>
+ 1548897590U, // <u,1,1,4>: Cost 2 vsldoi4 <0,u,1,1>, RHS
+ 2756985692U, // <u,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 2658472872U, // <u,1,1,6>: Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1>
+ 2287614142U, // <u,1,1,7>: Cost 3 vmrglw <0,u,u,1>, <u,6,1,7>
+ 269271142U, // <u,1,1,u>: Cost 1 vspltisw1 LHS
+ 1566818406U, // <u,1,2,0>: Cost 2 vsldoi4 <3,u,1,2>, LHS
+ 2756985735U, // <u,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 1148371862U, // <u,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1566821686U, // <u,1,2,4>: Cost 2 vsldoi4 <3,u,1,2>, RHS
+ 2756985771U, // <u,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 2690262970U, // <u,1,2,6>: Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7>
+ 1590711938U, // <u,1,2,7>: Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 2282979337U, // <u,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1209237514U, // <u,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1209239702U, // <u,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282979502U, // <u,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282979341U, // <u,1,3,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1209237842U, // <u,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282979505U, // <u,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287625423U, // <u,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1209237521U, // <u,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 1635101613U, // <u,1,4,0>: Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1>
+ 2289623050U, // <u,1,4,1>: Cost 3 vmrglw <1,2,u,4>, <0,0,1,1>
+ 2289625238U, // <u,1,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,1,2>
+ 2640579360U, // <u,1,4,3>: Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4>
+ 2622663990U, // <u,1,4,4>: Cost 3 vsldoi4 <0,u,1,4>, RHS
+ 1616522550U, // <u,1,4,5>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 2664469398U, // <u,1,4,6>: Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1>
+ 2664470148U, // <u,1,4,7>: Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4>
+ 1616522793U, // <u,1,4,u>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 1548927638U, // <u,1,5,0>: Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5>
+ 1192444724U, // <u,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1192444822U, // <u,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2622670998U, // <u,1,5,3>: Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2>
+ 1548930358U, // <u,1,5,4>: Cost 2 vsldoi4 <0,u,1,5>, RHS
+ 1210728786U, // <u,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2714153058U, // <u,1,5,6>: Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0>
+ 2670449658U, // <u,1,5,7>: Cost 3 vsldoi4 <u,u,1,5>, <7,0,1,2>
+ 1548932910U, // <u,1,5,u>: Cost 2 vsldoi4 <0,u,1,5>, LHS
+ 2622677655U, // <u,1,6,0>: Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6>
+ 2756986063U, // <u,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2302912662U, // <u,1,6,2>: Cost 3 vmrglw <3,4,u,6>, <3,0,1,2>
+ 3696421014U, // <u,1,6,3>: Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2>
+ 2622680374U, // <u,1,6,4>: Cost 3 vsldoi4 <0,u,1,6>, RHS
+ 2756986099U, // <u,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 2714153784U, // <u,1,6,6>: Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6>
+ 1651692438U, // <u,1,6,7>: Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1>
+ 1652356071U, // <u,1,6,u>: Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1>
+ 2628657254U, // <u,1,7,0>: Cost 3 vsldoi4 <1,u,1,7>, LHS
+ 1235812362U, // <u,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235814550U, // <u,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309554350U, // <u,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2628660534U, // <u,1,7,4>: Cost 3 vsldoi4 <1,u,1,7>, RHS
+ 1235812690U, // <u,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309554353U, // <u,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309554678U, // <u,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235812369U, // <u,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 1548952217U, // <u,1,u,0>: Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u>
+ 269271142U, // <u,1,u,1>: Cost 1 vspltisw1 LHS
+ 1209280662U, // <u,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1548954934U, // <u,1,u,4>: Cost 2 vsldoi4 <0,u,1,u>, RHS
+ 1209278802U, // <u,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2283020465U, // <u,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 1590761096U, // <u,1,u,7>: Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 2702876672U, // <u,2,0,0>: Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0>
+ 1629134950U, // <u,2,0,1>: Cost 2 vsldoi8 <3,0,u,2>, LHS
+ 2289591912U, // <u,2,0,2>: Cost 3 vmrglw <1,2,u,0>, <2,2,2,2>
+ 1215848550U, // <u,2,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2702877010U, // <u,2,0,4>: Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5>
+ 2289222708U, // <u,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2779178473U, // <u,2,0,6>: Cost 3 vsldoi12 RHS, <2,0,6,1>
+ 2726249024U, // <u,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1215848555U, // <u,2,0,u>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2690933539U, // <u,2,1,0>: Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2>
+ 2628683124U, // <u,2,1,1>: Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1>
+ 1189463656U, // <u,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1213866086U, // <u,2,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 2628685110U, // <u,2,1,4>: Cost 3 vsldoi4 <1,u,2,1>, RHS
+ 2263205736U, // <u,2,1,5>: Cost 3 vmrghw LHS, <2,5,3,6>
+ 1189463994U, // <u,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2263205866U, // <u,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1213866091U, // <u,2,1,u>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1556938854U, // <u,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2697569869U, // <u,2,2,1>: Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2>
+ 336380006U, // <u,2,2,2>: Cost 1 vspltisw2 LHS
+ 1678599794U, // <u,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 1556942134U, // <u,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <u,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2702878650U, // <u,2,2,6>: Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7>
+ 2300229831U, // <u,2,2,7>: Cost 3 vmrglw <3,0,u,2>, <u,6,2,7>
+ 336380006U, // <u,2,2,u>: Cost 1 vspltisw2 LHS
+ 475243165U, // <u,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1548985140U, // <u,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1209239144U, // <u,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135495782U, // <u,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 475245878U, // <u,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1596764164U, // <u,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1596764666U, // <u,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1596765178U, // <u,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135495787U, // <u,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2708851630U, // <u,2,4,0>: Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2>
+ 2217362979U, // <u,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2289624680U, // <u,2,4,2>: Cost 3 vmrglw <1,2,u,4>, <2,2,2,2>
+ 1215881318U, // <u,2,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2726767824U, // <u,2,4,4>: Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4>
+ 1629138230U, // <u,2,4,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 2779178801U, // <u,2,4,6>: Cost 3 vsldoi12 RHS, <2,4,6,5>
+ 2726251976U, // <u,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1215881323U, // <u,2,4,u>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2628714598U, // <u,2,5,0>: Cost 3 vsldoi4 <1,u,2,5>, LHS
+ 2628715896U, // <u,2,5,1>: Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5>
+ 1192445544U, // <u,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1213898854U, // <u,2,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2628717878U, // <u,2,5,4>: Cost 3 vsldoi4 <1,u,2,5>, RHS
+ 2726768644U, // <u,2,5,5>: Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5>
+ 1192445882U, // <u,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2266187754U, // <u,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1213898859U, // <u,2,5,u>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2634694758U, // <u,2,6,0>: Cost 3 vsldoi4 <2,u,2,6>, LHS
+ 2721460657U, // <u,2,6,1>: Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2>
+ 2296940136U, // <u,2,6,2>: Cost 3 vmrglw <2,4,u,6>, <2,2,2,2>
+ 1678600122U, // <u,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2634698038U, // <u,2,6,4>: Cost 3 vsldoi4 <2,u,2,6>, RHS
+ 3370682125U, // <u,2,6,5>: Cost 4 vmrglw <2,4,u,6>, <2,4,2,5>
+ 1157056442U, // <u,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725442455U, // <u,2,6,7>: Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2>
+ 1678600167U, // <u,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 1653027897U, // <u,2,7,0>: Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2>
+ 2309554924U, // <u,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235813992U, // <u,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 162070630U, // <u,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2634706230U, // <u,2,7,4>: Cost 3 vsldoi4 <2,u,2,7>, RHS
+ 2309555252U, // <u,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309555901U, // <u,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309555416U, // <u,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 162070635U, // <u,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 475284130U, // <u,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1549026100U, // <u,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 336380006U, // <u,2,u,2>: Cost 1 vspltisw2 LHS
+ 135536742U, // <u,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 475286838U, // <u,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1629141146U, // <u,2,u,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 1194108858U, // <u,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 1596806138U, // <u,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135536747U, // <u,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611890688U, // <u,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 538149020U, // <u,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685632685U, // <u,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685632764U, // <u,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611891026U, // <u,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2733408722U, // <u,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2658612153U, // <u,3,0,6>: Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0>
+ 2289592250U, // <u,3,0,7>: Cost 3 vmrglw <1,2,u,0>, <2,6,3,7>
+ 538149533U, // <u,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1189464214U, // <u,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 1611891508U, // <u,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611891606U, // <u,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 1189464476U, // <u,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1189464578U, // <u,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2690278511U, // <u,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2690278607U, // <u,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2287609786U, // <u,3,1,7>: Cost 3 vmrglw <0,u,u,1>, <2,6,3,7>
+ 1611892092U, // <u,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2685634042U, // <u,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,u,0>
+ 2685634079U, // <u,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611892328U, // <u,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611892390U, // <u,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2685634371U, // <u,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,u,5>
+ 2685634453U, // <u,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,u,6>
+ 1611892666U, // <u,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2300225466U, // <u,3,2,7>: Cost 3 vmrglw <3,0,u,2>, <2,6,3,7>
+ 1611892795U, // <u,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,0,1>
+ 1209238422U, // <u,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282980247U, // <u,3,3,1>: Cost 3 vmrglw LHS, <1,2,3,1>
+ 1561004120U, // <u,3,3,2>: Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3>
+ 403488870U, // <u,3,3,3>: Cost 1 vspltisw3 LHS
+ 1209238426U, // <u,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282980899U, // <u,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2282985598U, // <u,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1209239482U, // <u,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 403488870U, // <u,3,3,u>: Cost 1 vspltisw3 LHS
+ 1555038310U, // <u,3,4,0>: Cost 2 vsldoi4 <1,u,3,4>, LHS
+ 1555039616U, // <u,3,4,1>: Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4>
+ 2628781672U, // <u,3,4,2>: Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2>
+ 2289624690U, // <u,3,4,3>: Cost 3 vmrglw <1,2,u,4>, <2,2,3,3>
+ 1555041590U, // <u,3,4,4>: Cost 2 vsldoi4 <1,u,3,4>, RHS
+ 538152246U, // <u,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2658644925U, // <u,3,4,6>: Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4>
+ 2289625018U, // <u,3,4,7>: Cost 3 vmrglw <1,2,u,4>, <2,6,3,7>
+ 538152489U, // <u,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1192446102U, // <u,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2733411983U, // <u,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2634762330U, // <u,3,5,2>: Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5>
+ 1192446364U, // <u,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1192446466U, // <u,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 1659670532U, // <u,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659670626U, // <u,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 2287642554U, // <u,3,5,7>: Cost 3 vmrglw <0,u,u,5>, <2,6,3,7>
+ 1659670788U, // <u,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2634768486U, // <u,3,6,0>: Cost 3 vsldoi4 <2,u,3,6>, LHS
+ 2733412775U, // <u,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1648390659U, // <u,3,6,2>: Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3>
+ 2634770973U, // <u,3,6,3>: Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6>
+ 2634771766U, // <u,3,6,4>: Cost 3 vsldoi4 <2,u,3,6>, RHS
+ 2733413099U, // <u,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659671352U, // <u,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659671374U, // <u,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1652372457U, // <u,3,6,u>: Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3>
+ 1561034854U, // <u,3,7,0>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 2634777396U, // <u,3,7,1>: Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1>
+ 1561036892U, // <u,3,7,2>: Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7>
+ 1235814002U, // <u,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1561038134U, // <u,3,7,4>: Cost 2 vsldoi4 <2,u,3,7>, RHS
+ 2309555747U, // <u,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2309556072U, // <u,3,7,6>: Cost 3 vmrglw RHS, <2,5,3,6>
+ 1235814330U, // <u,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1561040686U, // <u,3,7,u>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 1611896531U, // <u,3,u,0>: Cost 2 vsldoi8 LHS, <u,0,1,2>
+ 538154798U, // <u,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1611896712U, // <u,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,3>
+ 403488870U, // <u,3,u,3>: Cost 1 vspltisw3 LHS
+ 1611896895U, // <u,3,u,4>: Cost 2 vsldoi8 LHS, <u,4,5,6>
+ 538155162U, // <u,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1611897040U, // <u,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1209280442U, // <u,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 538155365U, // <u,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 1165118354U, // <u,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1618534502U, // <u,4,0,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 2634795102U, // <u,4,0,2>: Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0>
+ 2686451968U, // <u,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2692276562U, // <u,4,0,4>: Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5>
+ 1705438098U, // <u,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658685890U, // <u,4,0,6>: Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0>
+ 2256489928U, // <u,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1618535069U, // <u,4,0,u>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1189464978U, // <u,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2692277044U, // <u,4,1,1>: Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1>
+ 1618535367U, // <u,4,1,2>: Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4>
+ 2640775992U, // <u,4,1,3>: Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1>
+ 1189465296U, // <u,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 115723574U, // <u,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2263207289U, // <u,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2664666780U, // <u,4,1,7>: Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1>
+ 115723817U, // <u,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 2263919506U, // <u,4,2,0>: Cost 3 vmrghw <u,2,3,0>, <4,0,5,1>
+ 2222115812U, // <u,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 2692277864U, // <u,4,2,2>: Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2>
+ 2692277926U, // <u,4,2,3>: Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1>
+ 2324114640U, // <u,4,2,4>: Cost 3 vmrglw <7,0,u,2>, <4,4,4,4>
+ 1190178102U, // <u,4,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278202U, // <u,4,2,6>: Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7>
+ 2701568053U, // <u,4,2,7>: Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4>
+ 1190178345U, // <u,4,2,u>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278422U, // <u,4,3,0>: Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2>
+ 2282981552U, // <u,4,3,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2704222585U, // <u,4,3,2>: Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4>
+ 2692278684U, // <u,4,3,3>: Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3>
+ 1257016528U, // <u,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1209239246U, // <u,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2691910300U, // <u,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 2664683166U, // <u,4,3,7>: Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3>
+ 1209239249U, // <u,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 1573027942U, // <u,4,4,0>: Cost 2 vsldoi4 <4,u,4,4>, LHS
+ 2634826695U, // <u,4,4,1>: Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4>
+ 2634827874U, // <u,4,4,2>: Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4>
+ 2289629073U, // <u,4,4,3>: Cost 3 vmrglw <1,2,u,4>, <u,2,4,3>
+ 229035318U, // <u,4,4,4>: Cost 1 vspltisw0 RHS
+ 1618537782U, // <u,4,4,5>: Cost 2 vsldoi8 <1,2,u,4>, RHS
+ 2658718662U, // <u,4,4,6>: Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4>
+ 2289629401U, // <u,4,4,7>: Cost 3 vmrglw <1,2,u,4>, <u,6,4,7>
+ 229035318U, // <u,4,4,u>: Cost 1 vspltisw0 RHS
+ 1561092198U, // <u,4,5,0>: Cost 2 vsldoi4 <2,u,4,5>, LHS
+ 2628863370U, // <u,4,5,1>: Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5>
+ 1561094243U, // <u,4,5,2>: Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5>
+ 2634836118U, // <u,4,5,3>: Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2>
+ 1561095478U, // <u,4,5,4>: Cost 2 vsldoi4 <2,u,4,5>, RHS
+ 118705462U, // <u,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 604859702U, // <u,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2658726906U, // <u,4,5,7>: Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2>
+ 604859720U, // <u,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2266631058U, // <u,4,6,0>: Cost 3 vmrghw <u,6,3,7>, <4,0,5,1>
+ 2302692152U, // <u,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 2718822906U, // <u,4,6,2>: Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3>
+ 2722804309U, // <u,4,6,3>: Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4>
+ 2723467942U, // <u,4,6,4>: Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4>
+ 1192889654U, // <u,4,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2718823224U, // <u,4,6,6>: Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6>
+ 2718823246U, // <u,4,6,7>: Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1>
+ 1192889897U, // <u,4,6,u>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2640822374U, // <u,4,7,0>: Cost 3 vsldoi4 <3,u,4,7>, LHS
+ 2640823194U, // <u,4,7,1>: Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4>
+ 2728113373U, // <u,4,7,2>: Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4>
+ 2640825150U, // <u,4,7,3>: Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7>
+ 1235815632U, // <u,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235814094U, // <u,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 2730767905U, // <u,4,7,6>: Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4>
+ 2309556892U, // <u,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235814097U, // <u,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 1561116774U, // <u,4,u,0>: Cost 2 vsldoi4 <2,u,4,u>, LHS
+ 1618540334U, // <u,4,u,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1561118822U, // <u,4,u,2>: Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u>
+ 2692282300U, // <u,4,u,3>: Cost 3 vsldoi8 <1,2,u,4>, <u,3,0,1>
+ 229035318U, // <u,4,u,4>: Cost 1 vspltisw0 RHS
+ 120368438U, // <u,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 604859945U, // <u,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2309565084U, // <u,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 604859963U, // <u,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2690293760U, // <u,5,0,0>: Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0>
+ 1616552038U, // <u,5,0,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2640840434U, // <u,5,0,2>: Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5>
+ 2640841536U, // <u,5,0,3>: Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0>
+ 1613381970U, // <u,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2316135642U, // <u,5,0,5>: Cost 3 vmrglw <5,6,u,0>, <4,4,5,5>
+ 2289592834U, // <u,5,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,5,6>
+ 2664732324U, // <u,5,0,7>: Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0>
+ 1616552661U, // <u,5,0,u>: Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5>
+ 1573077094U, // <u,5,1,0>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 1237536282U, // <u,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2690294678U, // <u,5,1,2>: Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0>
+ 2646821014U, // <u,5,1,3>: Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2>
+ 1573080602U, // <u,5,1,4>: Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1>
+ 1189466116U, // <u,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1189466210U, // <u,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2646823930U, // <u,5,1,7>: Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2>
+ 1573082926U, // <u,5,1,u>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 2640855142U, // <u,5,2,0>: Cost 3 vsldoi4 <3,u,5,2>, LHS
+ 2697594448U, // <u,5,2,1>: Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5>
+ 2690295400U, // <u,5,2,2>: Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2>
+ 1625179890U, // <u,5,2,3>: Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5>
+ 2699585347U, // <u,5,2,4>: Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5>
+ 2781171471U, // <u,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2690295738U, // <u,5,2,6>: Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7>
+ 3775318070U, // <u,5,2,7>: Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5>
+ 1628498055U, // <u,5,2,u>: Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5>
+ 2287627234U, // <u,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1257016210U, // <u,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2646836942U, // <u,5,3,2>: Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5>
+ 2287625131U, // <u,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287627238U, // <u,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1257016538U, // <u,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1209240066U, // <u,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287625459U, // <u,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1209240068U, // <u,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 2640871526U, // <u,5,4,0>: Cost 3 vsldoi4 <3,u,5,4>, LHS
+ 2316168082U, // <u,5,4,1>: Cost 3 vmrglw <5,6,u,4>, <4,0,5,1>
+ 2640873202U, // <u,5,4,2>: Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5>
+ 2640874308U, // <u,5,4,3>: Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4>
+ 1637788917U, // <u,5,4,4>: Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5>
+ 1616555318U, // <u,5,4,5>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 2287638591U, // <u,5,4,6>: Cost 3 vmrglw <0,u,u,4>, <u,4,5,6>
+ 2664765096U, // <u,5,4,7>: Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4>
+ 1616555561U, // <u,5,4,u>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 1573109862U, // <u,5,5,0>: Cost 2 vsldoi4 <4,u,5,5>, LHS
+ 2646852404U, // <u,5,5,1>: Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1>
+ 2646853224U, // <u,5,5,2>: Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2>
+ 2287646618U, // <u,5,5,3>: Cost 3 vmrglw <0,u,u,5>, <u,2,5,3>
+ 1573113374U, // <u,5,5,4>: Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5>
+ 296144182U, // <u,5,5,5>: Cost 1 vspltisw1 RHS
+ 1192448098U, // <u,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2287646946U, // <u,5,5,7>: Cost 3 vmrglw <0,u,u,5>, <u,6,5,7>
+ 296144182U, // <u,5,5,u>: Cost 1 vspltisw1 RHS
+ 1567146086U, // <u,5,6,0>: Cost 2 vsldoi4 <3,u,5,6>, LHS
+ 2628945300U, // <u,5,6,1>: Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6>
+ 2634917997U, // <u,5,6,2>: Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6>
+ 1567148870U, // <u,5,6,3>: Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6>
+ 1567149366U, // <u,5,6,4>: Cost 2 vsldoi4 <3,u,5,6>, RHS
+ 2781171799U, // <u,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 1228950018U, // <u,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 2628952166U, // <u,5,7,0>: Cost 3 vsldoi4 <1,u,5,7>, LHS
+ 1235815314U, // <u,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309556734U, // <u,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309555115U, // <u,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2628955446U, // <u,5,7,4>: Cost 3 vsldoi4 <1,u,5,7>, RHS
+ 1235815642U, // <u,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235814914U, // <u,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309555443U, // <u,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235814916U, // <u,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 1567162470U, // <u,5,u,0>: Cost 2 vsldoi4 <3,u,5,u>, LHS
+ 1616557870U, // <u,5,u,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2690299781U, // <u,5,u,2>: Cost 3 vsldoi8 <0,u,u,5>, <u,2,3,0>
+ 1567165256U, // <u,5,u,3>: Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u>
+ 1567165750U, // <u,5,u,4>: Cost 2 vsldoi4 <3,u,5,u>, RHS
+ 296144182U, // <u,5,u,5>: Cost 1 vspltisw1 RHS
+ 1209281026U, // <u,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2705563648U, // <u,6,0,0>: Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0>
+ 1631821926U, // <u,6,0,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 2262462970U, // <u,6,0,2>: Cost 3 vmrghw <u,0,1,2>, <6,2,7,3>
+ 2646886941U, // <u,6,0,3>: Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6>
+ 2705563986U, // <u,6,0,4>: Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5>
+ 2316062652U, // <u,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316137272U, // <u,6,0,6>: Cost 3 vmrglw <5,6,u,0>, <6,6,6,6>
+ 1215851830U, // <u,6,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 1215851831U, // <u,6,0,u>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 2634948710U, // <u,6,1,0>: Cost 3 vsldoi4 <2,u,6,1>, LHS
+ 2705564468U, // <u,6,1,1>: Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1>
+ 1189466618U, // <u,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2263208498U, // <u,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2693620843U, // <u,6,1,4>: Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6>
+ 2652868860U, // <u,6,1,5>: Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1>
+ 1189466936U, // <u,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1213869366U, // <u,6,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 1213869367U, // <u,6,1,u>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 2658844774U, // <u,6,2,0>: Cost 3 vsldoi4 <6,u,6,2>, LHS
+ 3771344465U, // <u,6,2,1>: Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6>
+ 1178554874U, // <u,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2698929907U, // <u,6,2,3>: Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6>
+ 2699593540U, // <u,6,2,4>: Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6>
+ 2700257173U, // <u,6,2,5>: Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6>
+ 2705565626U, // <u,6,2,6>: Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7>
+ 1226485046U, // <u,6,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 1226485047U, // <u,6,2,u>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 2705565846U, // <u,6,3,0>: Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2>
+ 2330756585U, // <u,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2330756829U, // <u,6,3,2>: Cost 3 vmrglw LHS, <2,3,6,2>
+ 2282981734U, // <u,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 1631824413U, // <u,6,3,4>: Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6>
+ 2652885246U, // <u,6,3,5>: Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3>
+ 1257018168U, // <u,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135499062U, // <u,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135499063U, // <u,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 2646917222U, // <u,6,4,0>: Cost 3 vsldoi4 <4,u,6,4>, LHS
+ 2217365931U, // <u,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2790167156U, // <u,6,4,2>: Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u>
+ 2646919709U, // <u,6,4,3>: Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6>
+ 2711538934U, // <u,6,4,4>: Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6>
+ 1631825206U, // <u,6,4,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 2316170040U, // <u,6,4,6>: Cost 3 vmrglw <5,6,u,4>, <6,6,6,6>
+ 1215884598U, // <u,6,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 1215884599U, // <u,6,4,u>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 2634981478U, // <u,6,5,0>: Cost 3 vsldoi4 <2,u,6,5>, LHS
+ 2266190247U, // <u,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1192448506U, // <u,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2266190386U, // <u,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2634984758U, // <u,6,5,4>: Cost 3 vsldoi4 <2,u,6,5>, RHS
+ 2652901632U, // <u,6,5,5>: Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5>
+ 1192448824U, // <u,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1213902134U, // <u,6,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1213902135U, // <u,6,5,u>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1583808614U, // <u,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <u,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2718839290U, // <u,6,6,2>: Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3>
+ 2670823965U, // <u,6,6,3>: Cost 3 vsldoi4 <u,u,6,6>, <3,4,u,6>
+ 1583811894U, // <u,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2724147961U, // <u,6,6,5>: Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6>
+ 363253046U, // <u,6,6,6>: Cost 1 vspltisw2 RHS
+ 1229172022U, // <u,6,6,7>: Cost 2 vmrglw <3,4,u,6>, RHS
+ 363253046U, // <u,6,6,u>: Cost 1 vspltisw2 RHS
+ 499458150U, // <u,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1573200692U, // <u,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1573201512U, // <u,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573202070U, // <u,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499461673U, // <u,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1573203972U, // <u,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1235817272U, // <u,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 162073910U, // <u,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 162073911U, // <u,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 499466342U, // <u,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631827758U, // <u,6,u,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 1573209704U, // <u,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573210262U, // <u,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499469866U, // <u,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631828122U, // <u,6,u,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 363253046U, // <u,6,u,6>: Cost 1 vspltisw2 RHS
+ 135540022U, // <u,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135540023U, // <u,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 1638465536U, // <u,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564723814U, // <u,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712207533U, // <u,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712207612U, // <u,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638465874U, // <u,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1579192580U, // <u,7,0,5>: Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0>
+ 2712207862U, // <u,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2316137282U, // <u,7,0,7>: Cost 3 vmrglw <5,6,u,0>, <6,6,7,7>
+ 564724381U, // <u,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 1189467130U, // <u,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 1638466356U, // <u,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638466454U, // <u,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 2311500282U, // <u,7,1,3>: Cost 3 vmrglw <4,u,u,1>, <6,2,7,3>
+ 1189467494U, // <u,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2712208495U, // <u,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2694956302U, // <u,7,1,6>: Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7>
+ 1189467756U, // <u,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1638466940U, // <u,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712208829U, // <u,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712208927U, // <u,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638467176U, // <u,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638467238U, // <u,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712209165U, // <u,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712209256U, // <u,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1627187175U, // <u,7,2,6>: Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7>
+ 2324116290U, // <u,7,2,7>: Cost 3 vmrglw <7,0,u,2>, <6,6,7,7>
+ 1628514441U, // <u,7,2,u>: Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7>
+ 1638467734U, // <u,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712209638U, // <u,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2700929387U, // <u,7,3,2>: Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u>
+ 1638467996U, // <u,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638468098U, // <u,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712210002U, // <u,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 1585189856U, // <u,7,3,6>: Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3>
+ 1257018178U, // <u,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1638468382U, // <u,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638468498U, // <u,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712210378U, // <u,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712210485U, // <u,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712210564U, // <u,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638468816U, // <u,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564727112U, // <u,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712210809U, // <u,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2712210888U, // <u,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,0>
+ 564727337U, // <u,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 1192449018U, // <u,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2714201743U, // <u,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712211198U, // <u,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2311533050U, // <u,7,5,3>: Cost 3 vmrglw <4,u,u,5>, <6,2,7,3>
+ 1192449382U, // <u,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 1638469636U, // <u,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638469730U, // <u,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 1192449644U, // <u,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1638469892U, // <u,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712211745U, // <u,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712211879U, // <u,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638470138U, // <u,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712212018U, // <u,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712212109U, // <u,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712212203U, // <u,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638470456U, // <u,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638470478U, // <u,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638470559U, // <u,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,0,1>
+ 1235816546U, // <u,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309558371U, // <u,7,7,1>: Cost 3 vmrglw RHS, <5,6,7,1>
+ 2641045434U, // <u,7,7,2>: Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7>
+ 1235816954U, // <u,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1235816550U, // <u,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309558375U, // <u,7,7,5>: Cost 3 vmrglw RHS, <5,6,7,5>
+ 1585222628U, // <u,7,7,6>: Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7>
+ 430361910U, // <u,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <u,7,7,u>: Cost 1 vspltisw3 RHS
+ 1638471379U, // <u,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564729646U, // <u,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638471557U, // <u,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638471612U, // <u,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638471743U, // <u,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564730010U, // <u,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638471888U, // <u,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 430361910U, // <u,7,u,7>: Cost 1 vspltisw3 RHS
+ 564730213U, // <u,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 202162278U, // <u,u,0,0>: Cost 1 vspltisw0 LHS
+ 538189985U, // <u,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685673645U, // <u,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 1215848604U, // <u,u,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 1611931986U, // <u,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 1579266317U, // <u,u,0,5>: Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0>
+ 2289592861U, // <u,u,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,u,6>
+ 1215851848U, // <u,u,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 538190493U, // <u,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1549411025U, // <u,u,1,0>: Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1>
+ 115726126U, // <u,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 604862254U, // <u,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 1213866140U, // <u,u,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1549413686U, // <u,u,1,4>: Cost 2 vsldoi4 <0,u,u,1>, RHS
+ 115726490U, // <u,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1585247207U, // <u,u,1,6>: Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1>
+ 1213869384U, // <u,u,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 604862308U, // <u,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 1567334502U, // <u,u,2,0>: Cost 2 vsldoi4 <3,u,u,2>, LHS
+ 1190180654U, // <u,u,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 336380006U, // <u,u,2,2>: Cost 1 vspltisw2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1567337782U, // <u,u,2,4>: Cost 2 vsldoi4 <3,u,u,2>, RHS
+ 1190181018U, // <u,u,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 1611933626U, // <u,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1226485064U, // <u,u,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 475685587U, // <u,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1209239278U, // <u,u,3,1>: Cost 2 vmrglw LHS, <2,3,u,1>
+ 1209239765U, // <u,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135495836U, // <u,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 475688246U, // <u,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1209239282U, // <u,u,3,5>: Cost 2 vmrglw LHS, <2,3,u,5>
+ 1209240093U, // <u,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135499080U, // <u,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135495841U, // <u,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1555406950U, // <u,u,4,0>: Cost 2 vsldoi4 <1,u,u,4>, LHS
+ 1555408301U, // <u,u,4,1>: Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4>
+ 2289625301U, // <u,u,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,u,2>
+ 1215881372U, // <u,u,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 229035318U, // <u,u,4,4>: Cost 1 vspltisw0 RHS
+ 538193206U, // <u,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2289625629U, // <u,u,4,6>: Cost 3 vmrglw <1,2,u,4>, <3,4,u,6>
+ 1215884616U, // <u,u,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 538193449U, // <u,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1549443797U, // <u,u,5,0>: Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5>
+ 118708014U, // <u,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1561389191U, // <u,u,5,2>: Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5>
+ 1213898908U, // <u,u,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 1549446454U, // <u,u,5,4>: Cost 2 vsldoi4 <0,u,u,5>, RHS
+ 118708378U, // <u,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 604862618U, // <u,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 1213902152U, // <u,u,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 604862636U, // <u,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 1567367270U, // <u,u,6,0>: Cost 2 vsldoi4 <3,u,u,6>, LHS
+ 1192892206U, // <u,u,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1638478330U, // <u,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1679046864U, // <u,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 1567370550U, // <u,u,6,4>: Cost 2 vsldoi4 <3,u,u,6>, RHS
+ 1192892570U, // <u,u,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 363253046U, // <u,u,6,6>: Cost 1 vspltisw2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 499605606U, // <u,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1235812425U, // <u,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1561405577U, // <u,u,7,2>: Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7>
+ 162070684U, // <u,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 499609147U, // <u,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1235812753U, // <u,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235814941U, // <u,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 162073928U, // <u,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 162070689U, // <u,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 475726552U, // <u,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 538195758U, // <u,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 604862821U, // <u,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 475729206U, // <u,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 538196122U, // <u,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 604862861U, // <u,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
new file mode 100644
index 0000000..934bdf6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -0,0 +1,1041 @@
+//===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCRegisterInfo.h"
+#include "PPC.h"
+#include "PPCFrameLowering.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "reginfo"
+
+#define GET_REGINFO_TARGET_DESC
+#include "PPCGenRegisterInfo.inc"
+
+static cl::opt<bool>
+EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
+static cl::opt<bool>
+AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false),
+ cl::desc("Force the use of a base pointer in every function"));
+
+PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
+ : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
+ TM.isPPC64() ? 0 : 1,
+ TM.isPPC64() ? 0 : 1),
+ TM(TM) {
+ ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
+ ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
+ ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
+ ImmToIdxMap[PPC::LWZ] = PPC::LWZX; ImmToIdxMap[PPC::LWA] = PPC::LWAX;
+ ImmToIdxMap[PPC::LFS] = PPC::LFSX; ImmToIdxMap[PPC::LFD] = PPC::LFDX;
+ ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX;
+ ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX;
+ ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+ ImmToIdxMap[PPC::LWA_32] = PPC::LWAX_32;
+
+ // 64-bit
+ ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
+ ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
+ ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
+ ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
+ ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ // Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value
+ // when it checks for ZERO folding.
+ if (Kind == 1) {
+ if (TM.isPPC64())
+ return &PPC::G8RC_NOX0RegClass;
+ return &PPC::GPRC_NOR0RegClass;
+ }
+
+ if (TM.isPPC64())
+ return &PPC::G8RCRegClass;
+ return &PPC::GPRCRegClass;
+}
+
+const MCPhysReg*
+PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
+ if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) {
+ if (Subtarget.hasVSX())
+ return CSR_64_AllRegs_VSX_SaveList;
+ if (Subtarget.hasAltivec())
+ return CSR_64_AllRegs_Altivec_SaveList;
+ return CSR_64_AllRegs_SaveList;
+ }
+
+ if (Subtarget.isDarwinABI())
+ return TM.isPPC64()
+ ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList
+ : CSR_Darwin64_SaveList)
+ : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList
+ : CSR_Darwin32_SaveList);
+
+ // On PPC64, we might need to save r2 (but only if it is not reserved).
+ bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
+
+ return TM.isPPC64()
+ ? (Subtarget.hasAltivec()
+ ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
+ : CSR_SVR464_Altivec_SaveList)
+ : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList))
+ : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList
+ : CSR_SVR432_SaveList);
+}
+
+const uint32_t *
+PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ if (CC == CallingConv::AnyReg) {
+ if (Subtarget.hasVSX())
+ return CSR_64_AllRegs_VSX_RegMask;
+ if (Subtarget.hasAltivec())
+ return CSR_64_AllRegs_Altivec_RegMask;
+ return CSR_64_AllRegs_RegMask;
+ }
+
+ if (Subtarget.isDarwinABI())
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask
+ : CSR_Darwin64_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
+ : CSR_Darwin32_RegMask);
+
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
+ : CSR_SVR464_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
+ : CSR_SVR432_RegMask);
+}
+
+const uint32_t*
+PPCRegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
+void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
+ for (unsigned PseudoReg : {PPC::ZERO, PPC::ZERO8, PPC::RM})
+ Mask[PseudoReg / 32] &= ~(1u << (PseudoReg % 32));
+}
+
+BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
+
+ // The ZERO register is not really a register, but the representation of r0
+ // when used in instructions that treat r0 as the constant 0.
+ Reserved.set(PPC::ZERO);
+ Reserved.set(PPC::ZERO8);
+
+ // The FP register is also not really a register, but is the representation
+ // of the frame pointer register used by ISD::FRAMEADDR.
+ Reserved.set(PPC::FP);
+ Reserved.set(PPC::FP8);
+
+ // The BP register is also not really a register, but is the representation
+ // of the base pointer register used by setjmp.
+ Reserved.set(PPC::BP);
+ Reserved.set(PPC::BP8);
+
+ // The counter registers must be reserved so that counter-based loops can
+ // be correctly formed (and the mtctr instructions are not DCE'd).
+ Reserved.set(PPC::CTR);
+ Reserved.set(PPC::CTR8);
+
+ Reserved.set(PPC::R1);
+ Reserved.set(PPC::LR);
+ Reserved.set(PPC::LR8);
+ Reserved.set(PPC::RM);
+
+ if (!Subtarget.isDarwinABI() || !Subtarget.hasAltivec())
+ Reserved.set(PPC::VRSAVE);
+
+ // The SVR4 ABI reserves r2 and r13
+ if (Subtarget.isSVR4ABI()) {
+ Reserved.set(PPC::R2); // System-reserved register
+ Reserved.set(PPC::R13); // Small Data Area pointer register
+ }
+
+ // On PPC64, r13 is the thread pointer. Never allocate this register.
+ if (TM.isPPC64()) {
+ Reserved.set(PPC::R13);
+
+ Reserved.set(PPC::X1);
+ Reserved.set(PPC::X13);
+
+ if (TFI->needsFP(MF))
+ Reserved.set(PPC::X31);
+
+ if (hasBasePointer(MF))
+ Reserved.set(PPC::X30);
+
+ // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
+ if (Subtarget.isSVR4ABI()) {
+ // We only reserve r2 if we need to use the TOC pointer. If we have no
+ // explicit uses of the TOC pointer (meaning we're a leaf function with
+ // no constant-pool loads, etc.) and we have no potential uses inside an
+ // inline asm block, then we can treat r2 has an ordinary callee-saved
+ // register.
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ if (FuncInfo->usesTOCBasePtr() || MF.hasInlineAsm())
+ Reserved.set(PPC::X2);
+ else
+ Reserved.reset(PPC::R2);
+ }
+ }
+
+ if (TFI->needsFP(MF))
+ Reserved.set(PPC::R31);
+
+ if (hasBasePointer(MF)) {
+ if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ Reserved.set(PPC::R29);
+ else
+ Reserved.set(PPC::R30);
+ }
+
+ if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ Reserved.set(PPC::R30);
+
+ // Reserve Altivec registers when Altivec is unavailable.
+ if (!Subtarget.hasAltivec())
+ for (TargetRegisterClass::iterator I = PPC::VRRCRegClass.begin(),
+ IE = PPC::VRRCRegClass.end(); I != IE; ++I)
+ Reserved.set(*I);
+
+ return Reserved;
+}
+
+unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
+ const unsigned DefaultSafety = 1;
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case PPC::G8RC_NOX0RegClassID:
+ case PPC::GPRC_NOR0RegClassID:
+ case PPC::G8RCRegClassID:
+ case PPC::GPRCRegClassID: {
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+ return 32 - FP - DefaultSafety;
+ }
+ case PPC::F8RCRegClassID:
+ case PPC::F4RCRegClassID:
+ case PPC::QFRCRegClassID:
+ case PPC::QSRCRegClassID:
+ case PPC::QBRCRegClassID:
+ case PPC::VRRCRegClassID:
+ case PPC::VFRCRegClassID:
+ case PPC::VSLRCRegClassID:
+ case PPC::VSHRCRegClassID:
+ return 32 - DefaultSafety;
+ case PPC::VSRCRegClassID:
+ case PPC::VSFRCRegClassID:
+ case PPC::VSSRCRegClassID:
+ return 64 - DefaultSafety;
+ case PPC::CRRCRegClassID:
+ return 8 - DefaultSafety;
+ }
+}
+
+const TargetRegisterClass *
+PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ if (Subtarget.hasVSX()) {
+ // With VSX, we can inflate various sub-register classes to the full VSX
+ // register set.
+
+ if (RC == &PPC::F8RCRegClass)
+ return &PPC::VSFRCRegClass;
+ else if (RC == &PPC::VRRCRegClass)
+ return &PPC::VSRCRegClass;
+ else if (RC == &PPC::F4RCRegClass && Subtarget.hasP8Vector())
+ return &PPC::VSSRCRegClass;
+ }
+
+ return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+/// lowerDynamicAlloc - Generate the code for allocating an object in the
+/// current frame. The sequence of code will be in the general form
+///
+/// addi R0, SP, \#frameSize ; get the address of the previous frame
+/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
+/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
+///
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = TM.isPPC64();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Get the maximum call stack size.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+ // Get the total frame size.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get stack alignments.
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
+ unsigned TargetAlign = TFI->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
+ "Maximum call-frame size not sufficiently aligned");
+
+ // Determine the previous frame's address. If FrameSize can't be
+ // represented as 16 bits or we need special alignment, then we load the
+ // previous frame's address from 0(SP). Why not do an addis of the hi?
+ // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
+ // Constructing the constant and adding would take 3 instructions.
+ // Fortunately, a frame greater than 32K is rare.
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
+ } else if (LP64) {
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
+ .addImm(0)
+ .addReg(PPC::R1);
+ }
+
+ bool KillNegSizeReg = MI.getOperand(1).isKill();
+ unsigned NegSizeReg = MI.getOperand(1).getReg();
+
+ // Grow the stack and update the stack pointer link, then determine the
+ // address of new allocated space.
+ if (LP64) {
+ if (MaxAlign > TargetAlign) {
+ unsigned UnalNegSizeReg = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC);
+
+ // Unfortunately, there is no andi, only andi., and we can't insert that
+ // here because we might clobber cr0 while it is live.
+ BuildMI(MBB, II, dl, TII.get(PPC::LI8), NegSizeReg)
+ .addImm(~(MaxAlign-1));
+
+ unsigned NegSizeReg1 = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC);
+ BuildMI(MBB, II, dl, TII.get(PPC::AND8), NegSizeReg)
+ .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
+ .addReg(NegSizeReg1, RegState::Kill);
+ KillNegSizeReg = true;
+ }
+
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize);
+ } else {
+ if (MaxAlign > TargetAlign) {
+ unsigned UnalNegSizeReg = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC);
+
+ // Unfortunately, there is no andi, only andi., and we can't insert that
+ // here because we might clobber cr0 while it is live.
+ BuildMI(MBB, II, dl, TII.get(PPC::LI), NegSizeReg)
+ .addImm(~(MaxAlign-1));
+
+ unsigned NegSizeReg1 = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC);
+ BuildMI(MBB, II, dl, TII.get(PPC::AND), NegSizeReg)
+ .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
+ .addReg(NegSizeReg1, RegState::Kill);
+ KillNegSizeReg = true;
+ }
+
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::R1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize);
+ }
+
+ // Discard the DYNALLOC instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerDynamicAreaOffset(
+ MachineBasicBlock::iterator II) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+ DebugLoc dl = MI.getDebugLoc();
+ BuildMI(MBB, II, dl, TII.get(PPC::LI), MI.getOperand(0).getReg())
+ .addImm(maxCallFrameSize);
+ MBB.erase(II);
+}
+
+/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
+/// reserving a whole register (R0), we scrounge for one here. This generates
+/// code like this:
+///
+/// mfcr rA ; Move the conditional register into GPR rA.
+/// rlwinm rA, rA, SB, 0, 31 ; Shift the bits left so they are in CR0's slot.
+/// stw rA, FI ; Store rA to the frame.
+///
+void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = TM.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ // We need to store the CR in the low 4-bits of the saved value. First, issue
+ // an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ // If the saved register wasn't CR0, shift the bits left so that they are in
+ // CR0's slot.
+ if (SrcReg != PPC::CR0) {
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ // rlwinm rA, rA, ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg) * 4)
+ .addImm(0)
+ .addImm(31);
+ }
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = TM.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_CR does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+ Reg), FrameIndex);
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ unsigned ShiftBits = getEncodingValue(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill).addImm(32-ShiftBits).addImm(0)
+ .addImm(31);
+ }
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF), DestReg)
+ .addReg(Reg, RegState::Kill);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_CRBIT <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = TM.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL),
+ getCRFromCRBit(SrcReg))
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
+ .addReg(getCRFromCRBit(SrcReg));
+
+ // If the saved register wasn't CR0LT, shift the bits left so that the bit to
+ // store is the first one. Mask all but that bit.
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ // rlwinm rA, rA, ShiftBits, 0, 0.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg))
+ .addImm(0).addImm(0);
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CRBIT <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = TM.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_CRBIT does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg);
+
+ unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO)
+ .addReg(getCRFromCRBit(DestReg));
+
+ unsigned ShiftBits = getEncodingValue(DestReg);
+ // rlwimi r11, r10, 32-ShiftBits, ..., ...
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWIMI8 : PPC::RLWIMI), RegO)
+ .addReg(RegO, RegState::Kill)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ShiftBits ? 32 - ShiftBits : 0)
+ .addImm(ShiftBits)
+ .addImm(ShiftBits);
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF),
+ getCRFromCRBit(DestReg))
+ .addReg(RegO, RegState::Kill)
+ // Make sure we have a use dependency all the way through this
+ // sequence of instructions. We can't have the other bits in the CR
+ // modified in between the mfocrf and the mtocrf.
+ .addReg(getCRFromCRBit(DestReg), RegState::Implicit);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ addFrameReference(
+ BuildMI(MBB, II, dl, TII.get(PPC::STW)).addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_VRSAVE does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
+ .addReg(Reg, RegState::Kill);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
+ // ABI, return true to prevent allocating an additional frame slot.
+ // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
+ // is arbitrary and will be subsequently ignored. For 32-bit, we have
+ // previously created the stack slot if needed, so return its FrameIdx.
+ if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
+ if (TM.isPPC64())
+ FrameIdx = 0;
+ else {
+ const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ FrameIdx = FI->getCRSpillFrameIndex();
+ }
+ return true;
+ }
+ return false;
+}
+
+// Figure out if the offset in the instruction must be a multiple of 4.
+// This is true for instructions like "STD".
+static bool usesIXAddr(const MachineInstr &MI) {
+ unsigned OpC = MI.getOpcode();
+
+ switch (OpC) {
+ default:
+ return false;
+ case PPC::LWA:
+ case PPC::LWA_32:
+ case PPC::LD:
+ case PPC::STD:
+ return true;
+ }
+}
+
+// Return the OffsetOperandNo given the FIOperandNum (and the instruction).
+static unsigned getOffsetONFromFION(const MachineInstr &MI,
+ unsigned FIOperandNum) {
+ // Take into account whether it's an add or mem instruction
+ unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
+ if (MI.isInlineAsm())
+ OffsetOperandNo = FIOperandNum - 1;
+ else if (MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT)
+ OffsetOperandNo = FIOperandNum + 1;
+
+ return OffsetOperandNo;
+}
+
+void
+PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
+
+ // Get the frame index.
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+
+ // Get the frame pointer save index. Users of this index are primarily
+ // DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+ // Get the instruction opcode.
+ unsigned OpC = MI.getOpcode();
+
+ if ((OpC == PPC::DYNAREAOFFSET || OpC == PPC::DYNAREAOFFSET8)) {
+ lowerDynamicAreaOffset(II);
+ return;
+ }
+
+ // Special case for dynamic alloca.
+ if (FPSI && FrameIndex == FPSI &&
+ (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
+ lowerDynamicAlloc(II);
+ return;
+ }
+
+ // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::SPILL_CRBIT) {
+ lowerCRBitSpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_CRBIT) {
+ lowerCRBitRestore(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::SPILL_VRSAVE) {
+ lowerVRSAVESpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_VRSAVE) {
+ lowerVRSAVERestore(II, FrameIndex);
+ return;
+ }
+
+ // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+ MI.getOperand(FIOperandNum).ChangeToRegister(
+ FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false);
+
+ // Figure out if the offset in the instruction is shifted right two bits.
+ bool isIXAddr = usesIXAddr(MI);
+
+ // If the instruction is not present in ImmToIdxMap, then it has no immediate
+ // form (and must be r+r).
+ bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP &&
+ OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC);
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
+
+ // If we're not using a Frame Pointer that has been set to the value of the
+ // SP before having the stack size subtracted from it, then add the stack size
+ // to Offset to get the correct offset.
+ // Naked functions have stack size 0, although getStackSize may not reflect
+ // that because we didn't call all the pieces that compute it for naked
+ // functions.
+ if (!MF.getFunction()->hasFnAttribute(Attribute::Naked)) {
+ if (!(hasBasePointer(MF) && FrameIndex < 0))
+ Offset += MFI->getStackSize();
+ }
+
+ // If we can, encode the offset directly into the instruction. If this is a
+ // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If
+ // this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits
+ // clear can be encoded. This is extremely uncommon, because normally you
+ // only "std" to a stack slot that is at least 4-byte aligned, but it can
+ // happen in invalid code.
+ assert(OpC != PPC::DBG_VALUE &&
+ "This should be handled in a target-independent way");
+ if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) ||
+ OpC == TargetOpcode::STACKMAP ||
+ OpC == TargetOpcode::PATCHPOINT)) {
+ MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // The offset doesn't fit into a single register, scavenge one to build the
+ // offset in.
+
+ bool is64Bit = TM.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
+ unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC),
+ SReg = MF.getRegInfo().createVirtualRegister(RC);
+
+ // Insert a set of rA with the full offset value before the ld, st, or add
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi)
+ .addImm(Offset >> 16);
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
+ .addReg(SRegHi, RegState::Kill)
+ .addImm(Offset);
+
+ // Convert into indexed form of the instruction:
+ //
+ // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
+ // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
+ unsigned OperandBase;
+
+ if (noImmForm)
+ OperandBase = 1;
+ else if (OpC != TargetOpcode::INLINEASM) {
+ assert(ImmToIdxMap.count(OpC) &&
+ "No indexed form of load or store available!");
+ unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+ MI.setDesc(TII.get(NewOpcode));
+ OperandBase = 1;
+ } else {
+ OperandBase = OffsetOperandNo;
+ }
+
+ unsigned StackReg = MI.getOperand(FIOperandNum).getReg();
+ MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
+ MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
+}
+
+unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
+
+ if (!TM.isPPC64())
+ return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
+ else
+ return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
+}
+
+unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ if (!hasBasePointer(MF))
+ return getFrameRegister(MF);
+
+ if (TM.isPPC64())
+ return PPC::X30;
+
+ if (Subtarget.isSVR4ABI() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ return PPC::R29;
+
+ return PPC::R30;
+}
+
+bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ if (!EnableBasePointer)
+ return false;
+ if (AlwaysBasePointer)
+ return true;
+
+ // If we need to realign the stack, then the stack pointer can no longer
+ // serve as an offset into the caller's stack space. As a result, we need a
+ // base pointer.
+ return needsStackRealignment(MF);
+}
+
+/// Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool PPCRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ assert(Offset < 0 && "Local offset must be negative");
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores that have
+ // an r+i form. Return false for everything else.
+ unsigned OpC = MI->getOpcode();
+ if (!ImmToIdxMap.count(OpC))
+ return false;
+
+ // Don't generate a new virtual base register just to add zero to it.
+ if ((OpC == PPC::ADDI || OpC == PPC::ADDI8) &&
+ MI->getOperand(2).getImm() == 0)
+ return false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCFrameLowering *TFI = getFrameLowering(MF);
+ unsigned StackEst = TFI->determineFrameLayout(MF, false, true);
+
+ // If we likely don't need a stack frame, then we probably don't need a
+ // virtual base register either.
+ if (!StackEst)
+ return false;
+
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset += StackEst;
+
+ // The frame pointer will point to the end of the stack, so estimate the
+ // offset as the difference between the object offset and the FP location.
+ return !isFrameOffsetLegal(MI, getBaseRegister(MF), Offset);
+}
+
+/// Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void PPCRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const {
+ unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
+
+ MachineBasicBlock::iterator Ins = MBB->begin();
+ DebugLoc DL; // Defaults to "unknown"
+ if (Ins != MBB->end())
+ DL = Ins->getDebugLoc();
+
+ const MachineFunction &MF = *MBB->getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ const MCInstrDesc &MCID = TII.get(ADDriOpc);
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
+
+ BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
+}
+
+void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const {
+ unsigned FIOperandNum = 0;
+ while (!MI.getOperand(FIOperandNum).isFI()) {
+ ++FIOperandNum;
+ assert(FIOperandNum < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
+ unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
+ MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ const MCInstrDesc &MCID = MI.getDesc();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.constrainRegClass(BaseReg,
+ TII.getRegClass(MCID, FIOperandNum, this, MF));
+}
+
+bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ unsigned BaseReg,
+ int64_t Offset) const {
+ unsigned FIOperandNum = 0;
+ while (!MI->getOperand(FIOperandNum).isFI()) {
+ ++FIOperandNum;
+ assert(FIOperandNum < MI->getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
+ Offset += MI->getOperand(OffsetOperandNo).getImm();
+
+ return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
+ MI->getOpcode() == TargetOpcode::STACKMAP ||
+ MI->getOpcode() == TargetOpcode::PATCHPOINT ||
+ (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
new file mode 100644
index 0000000..b15fde8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -0,0 +1,144 @@
+//===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
+
+#include "PPC.h"
+#include "llvm/ADT/DenseMap.h"
+
+#define GET_REGINFO_HEADER
+#include "PPCGenRegisterInfo.inc"
+
+namespace llvm {
+
+inline static unsigned getCRFromCRBit(unsigned SrcReg) {
+ unsigned Reg = 0;
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+ SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+ SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+ SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+ SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+ SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+ SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+ SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+ SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ assert(Reg != 0 && "Invalid CR bit register");
+ return Reg;
+}
+
+class PPCRegisterInfo : public PPCGenRegisterInfo {
+ DenseMap<unsigned, unsigned> ImmToIdxMap;
+ const PPCTargetMachine &TM;
+
+public:
+ PPCRegisterInfo(const PPCTargetMachine &TM);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const override;
+
+ const TargetRegisterClass *
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
+
+ /// Code Generation virtual methods...
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
+ const uint32_t *getNoPreservedMask() const override;
+
+ void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
+
+ /// We require the register scavenger.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+ void lowerDynamicAreaOffset(MachineBasicBlock::iterator II) const;
+ void lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRBitSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRBitRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+
+ bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+ int &FrameIdx) const override;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS = nullptr) const override;
+
+ // Support for virtual base registers.
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const override;
+ void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const override;
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
+ int64_t Offset) const override;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
+
+ // Base pointer (stack realignment) support.
+ unsigned getBaseRegister(const MachineFunction &MF) const;
+ bool hasBasePointer(const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
new file mode 100644
index 0000000..e5f363c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -0,0 +1,363 @@
+//===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "PPC" in {
+def sub_lt : SubRegIndex<1>;
+def sub_gt : SubRegIndex<1, 1>;
+def sub_eq : SubRegIndex<1, 2>;
+def sub_un : SubRegIndex<1, 3>;
+def sub_32 : SubRegIndex<32>;
+def sub_64 : SubRegIndex<64>;
+def sub_128 : SubRegIndex<128>;
+}
+
+
+class PPCReg<string n> : Register<n> {
+ let Namespace = "PPC";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+}
+
+// GP8 - One of the 32 64-bit general-purpose registers
+class GP8<GPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_32];
+}
+
+// SPR - One of the 32-bit special-purpose registers
+class SPR<bits<10> num, string n> : PPCReg<n> {
+ let HWEncoding{9-0} = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+}
+
+// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
+class QFPR<FPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
+// VF - One of the 32 64-bit floating-point subregisters of the vector
+// registers (used by VSX).
+class VF<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+ let HWEncoding{5} = 1;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<VF SubReg, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+ let HWEncoding{5} = 0;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
+// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
+// floating-point registers.
+class VSRL<FPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
+// VSRH - One of the 32 128-bit VSX registers that overlap with the vector
+// registers.
+class VSRH<VR SubReg, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+ let HWEncoding{5} = 1;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_128];
+}
+
+// CR - One of the 8 4-bit condition registers
+class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+ let HWEncoding{2-0} = num;
+ let SubRegs = subregs;
+}
+
+// CRBIT - One of the 32 1-bit condition register fields
+class CRBIT<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+}
+
+// General-purpose registers
+foreach Index = 0-31 in {
+ def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
+}
+
+// 64-bit General-purpose registers
+foreach Index = 0-31 in {
+ def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>,
+ DwarfRegNum<[Index, -2]>;
+}
+
+// Floating-point registers
+foreach Index = 0-31 in {
+ def F#Index : FPR<Index, "f"#Index>,
+ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
+
+// Floating-point vector subregisters (for VSX)
+foreach Index = 0-31 in {
+ def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
+}
+
+// QPX Floating-point registers
+foreach Index = 0-31 in {
+ def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
+ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
+
+// Vector registers
+foreach Index = 0-31 in {
+ def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
+ DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
+}
+
+// VSX registers
+foreach Index = 0-31 in {
+ def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
+ DwarfRegAlias<!cast<FPR>("F"#Index)>;
+}
+foreach Index = 0-31 in {
+ def VSH#Index : VSRH<!cast<VR>("V"#Index), "vs" # !add(Index, 32)>,
+ DwarfRegAlias<!cast<VR>("V"#Index)>;
+}
+
+// The reprsentation of r0 when treated as the constant 0.
+def ZERO : GPR<0, "0">, DwarfRegAlias<R0>;
+def ZERO8 : GP8<ZERO, "0">, DwarfRegAlias<X0>;
+
+// Representations of the frame pointer used by ISD::FRAMEADDR.
+def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
+def FP8 : GP8<FP, "**FRAME POINTER**">;
+
+// Representations of the base pointer used by setjmp.
+def BP : GPR<0 /* arbitrary */, "**BASE POINTER**">;
+def BP8 : GP8<BP, "**BASE POINTER**">;
+
+// Condition register bits
+def CR0LT : CRBIT< 0, "0">;
+def CR0GT : CRBIT< 1, "1">;
+def CR0EQ : CRBIT< 2, "2">;
+def CR0UN : CRBIT< 3, "3">;
+def CR1LT : CRBIT< 4, "4">;
+def CR1GT : CRBIT< 5, "5">;
+def CR1EQ : CRBIT< 6, "6">;
+def CR1UN : CRBIT< 7, "7">;
+def CR2LT : CRBIT< 8, "8">;
+def CR2GT : CRBIT< 9, "9">;
+def CR2EQ : CRBIT<10, "10">;
+def CR2UN : CRBIT<11, "11">;
+def CR3LT : CRBIT<12, "12">;
+def CR3GT : CRBIT<13, "13">;
+def CR3EQ : CRBIT<14, "14">;
+def CR3UN : CRBIT<15, "15">;
+def CR4LT : CRBIT<16, "16">;
+def CR4GT : CRBIT<17, "17">;
+def CR4EQ : CRBIT<18, "18">;
+def CR4UN : CRBIT<19, "19">;
+def CR5LT : CRBIT<20, "20">;
+def CR5GT : CRBIT<21, "21">;
+def CR5EQ : CRBIT<22, "22">;
+def CR5UN : CRBIT<23, "23">;
+def CR6LT : CRBIT<24, "24">;
+def CR6GT : CRBIT<25, "25">;
+def CR6EQ : CRBIT<26, "26">;
+def CR6UN : CRBIT<27, "27">;
+def CR7LT : CRBIT<28, "28">;
+def CR7GT : CRBIT<29, "29">;
+def CR7EQ : CRBIT<30, "30">;
+def CR7UN : CRBIT<31, "31">;
+
+// Condition registers
+let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in {
+def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68, 68]>;
+def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69, 69]>;
+def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70, 70]>;
+def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71, 71]>;
+def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72, 72]>;
+def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73, 73]>;
+def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
+def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
+}
+
+// Link register
+def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
+//let Aliases = [LR] in
+def LR8 : SPR<8, "lr">, DwarfRegNum<[65, -2]>;
+
+// Count register
+def CTR : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>;
+def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>;
+
+// VRsave register
+def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>;
+
+// Carry bit. In the architecture this is really bit 0 of the XER register
+// (which really is SPR register 1); this is the only bit interesting to a
+// compiler.
+def CARRY: SPR<1, "ca">, DwarfRegNum<[76]>;
+
+// FP rounding mode: bits 30 and 31 of the FP status and control register
+// This is not allocated as a normal register; it appears only in
+// Uses and Defs. The ABI says it needs to be preserved by a function,
+// but this is not achieved by saving and restoring it as with
+// most registers, it has to be done in code; to make this work all the
+// return and call instructions are described as Uses of RM, so instructions
+// that do nothing but change RM will not get deleted.
+def RM: PPCReg<"**ROUNDING MODE**">;
+
+/// Register classes
+// Allocate volatiles first
+// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
+def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
+ (sequence "R%u", 30, 13),
+ R31, R0, R1, FP, BP)> {
+ // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+ // put it at the end of the list.
+ let AltOrders = [(add (sub GPRC, R2), R2)];
+ let AltOrderSelect = [{
+ const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+ return S.isPPC64() && S.isSVR4ABI();
+ }];
+}
+
+def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
+ (sequence "X%u", 30, 14),
+ X31, X13, X0, X1, FP8, BP8)> {
+ // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+ // put it at the end of the list.
+ let AltOrders = [(add (sub G8RC, X2), X2)];
+ let AltOrderSelect = [{
+ const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+ return S.isPPC64() && S.isSVR4ABI();
+ }];
+}
+
+// For some instructions r0 is special (representing the value 0 instead of
+// the value in the r0 register), and we use these register subclasses to
+// prevent r0 from being allocated for use by those instructions.
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)> {
+ // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+ // put it at the end of the list.
+ let AltOrders = [(add (sub GPRC_NOR0, R2), R2)];
+ let AltOrderSelect = [{
+ const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+ return S.isPPC64() && S.isSVR4ABI();
+ }];
+}
+
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> {
+ // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
+ // put it at the end of the list.
+ let AltOrders = [(add (sub G8RC_NOX0, X2), X2)];
+ let AltOrderSelect = [{
+ const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
+ return S.isPPC64() && S.isSVR4ABI();
+ }];
+}
+
+// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
+// ABI the size of the Floating-point register save area is determined by the
+// allocated non-volatile register with the lowest register number, as FP
+// register N is spilled to offset 8 * (32 - N) below the back chain word of the
+// previous stack frame. By allocating non-volatiles in reverse order we make
+// sure that the Floating-point register save area is always as small as
+// possible because there aren't any unused spill slots.
+def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
+ (sequence "F%u", 31, 14))>;
+def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
+
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128,
+ (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
+ V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
+ V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
+
+// VSX register classes (the allocation order mirrors that of the corresponding
+// subregister classes).
+def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add (sequence "VSL%u", 0, 13),
+ (sequence "VSL%u", 31, 14))>;
+def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
+ VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
+ VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
+ VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
+ VSH22, VSH21, VSH20)>;
+def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add VSLRC, VSHRC)>;
+
+// Register classes for the 64-bit "scalar" VSX subregisters.
+def VFRC : RegisterClass<"PPC", [f64], 64,
+ (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7,
+ VF8, VF9, VF10, VF11, VF12, VF13, VF14,
+ VF15, VF16, VF17, VF18, VF19, VF31, VF30,
+ VF29, VF28, VF27, VF26, VF25, VF24, VF23,
+ VF22, VF21, VF20)>;
+def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
+
+// Register class for single precision scalars in VSX registers
+def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>;
+
+// For QPX
+def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
+ (sequence "QF%u", 31, 14))>;
+def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
+def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
+ // These are actually stored as floating-point values where a positive
+ // number is true and anything else (including NaN) is false.
+ let Size = 256;
+}
+
+def CRBITRC : RegisterClass<"PPC", [i1], 32,
+ (add CR2LT, CR2GT, CR2EQ, CR2UN,
+ CR3LT, CR3GT, CR3EQ, CR3UN,
+ CR4LT, CR4GT, CR4EQ, CR4UN,
+ CR5LT, CR5GT, CR5EQ, CR5UN,
+ CR6LT, CR6GT, CR6EQ, CR6UN,
+ CR7LT, CR7GT, CR7EQ, CR7UN,
+ CR1LT, CR1GT, CR1EQ, CR1UN,
+ CR0LT, CR0GT, CR0EQ, CR0UN)> {
+ let Size = 32;
+}
+
+def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
+ CR7, CR2, CR3, CR4)>;
+
+def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>;
+
+// The CTR registers are not allocatable because they're used by the
+// decrement-and-branch instructions, and thus need to stay live across
+// multiple basic blocks.
+def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)> {
+ let isAllocatable = 0;
+}
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> {
+ let isAllocatable = 0;
+}
+
+def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
+def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
+ let CopyCost = -1;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
new file mode 100644
index 0000000..d0954a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -0,0 +1,126 @@
+//===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for PowerPC
+//
+def IIC_IntSimple : InstrItinClass;
+def IIC_IntGeneral : InstrItinClass;
+def IIC_IntCompare : InstrItinClass;
+def IIC_IntISEL : InstrItinClass;
+def IIC_IntDivD : InstrItinClass;
+def IIC_IntDivW : InstrItinClass;
+def IIC_IntMFFS : InstrItinClass;
+def IIC_IntMFVSCR : InstrItinClass;
+def IIC_IntMTFSB0 : InstrItinClass;
+def IIC_IntMTSRD : InstrItinClass;
+def IIC_IntMulHD : InstrItinClass;
+def IIC_IntMulHW : InstrItinClass;
+def IIC_IntMulHWU : InstrItinClass;
+def IIC_IntMulLI : InstrItinClass;
+def IIC_IntRFID : InstrItinClass;
+def IIC_IntRotateD : InstrItinClass;
+def IIC_IntRotateDI : InstrItinClass;
+def IIC_IntRotate : InstrItinClass;
+def IIC_IntShift : InstrItinClass;
+def IIC_IntTrapD : InstrItinClass;
+def IIC_IntTrapW : InstrItinClass;
+def IIC_BrB : InstrItinClass;
+def IIC_BrCR : InstrItinClass;
+def IIC_BrMCR : InstrItinClass;
+def IIC_BrMCRX : InstrItinClass;
+def IIC_LdStDCBA : InstrItinClass;
+def IIC_LdStDCBF : InstrItinClass;
+def IIC_LdStDCBI : InstrItinClass;
+def IIC_LdStLoad : InstrItinClass;
+def IIC_LdStLoadUpd : InstrItinClass;
+def IIC_LdStLoadUpdX : InstrItinClass;
+def IIC_LdStStore : InstrItinClass;
+def IIC_LdStStoreUpd : InstrItinClass;
+def IIC_LdStDSS : InstrItinClass;
+def IIC_LdStICBI : InstrItinClass;
+def IIC_LdStLD : InstrItinClass;
+def IIC_LdStLDU : InstrItinClass;
+def IIC_LdStLDUX : InstrItinClass;
+def IIC_LdStLDARX : InstrItinClass;
+def IIC_LdStLFD : InstrItinClass;
+def IIC_LdStLFDU : InstrItinClass;
+def IIC_LdStLFDUX : InstrItinClass;
+def IIC_LdStLHA : InstrItinClass;
+def IIC_LdStLHAU : InstrItinClass;
+def IIC_LdStLHAUX : InstrItinClass;
+def IIC_LdStLMW : InstrItinClass;
+def IIC_LdStLVecX : InstrItinClass;
+def IIC_LdStLWA : InstrItinClass;
+def IIC_LdStLWARX : InstrItinClass;
+def IIC_LdStSLBIA : InstrItinClass;
+def IIC_LdStSLBIE : InstrItinClass;
+def IIC_LdStSTD : InstrItinClass;
+def IIC_LdStSTDCX : InstrItinClass;
+def IIC_LdStSTDU : InstrItinClass;
+def IIC_LdStSTDUX : InstrItinClass;
+def IIC_LdStSTFD : InstrItinClass;
+def IIC_LdStSTFDU : InstrItinClass;
+def IIC_LdStSTVEBX : InstrItinClass;
+def IIC_LdStSTWCX : InstrItinClass;
+def IIC_LdStSync : InstrItinClass;
+def IIC_SprISYNC : InstrItinClass;
+def IIC_SprMFSR : InstrItinClass;
+def IIC_SprMTMSR : InstrItinClass;
+def IIC_SprMTSR : InstrItinClass;
+def IIC_SprTLBSYNC : InstrItinClass;
+def IIC_SprMFCR : InstrItinClass;
+def IIC_SprMFCRF : InstrItinClass;
+def IIC_SprMFMSR : InstrItinClass;
+def IIC_SprMFSPR : InstrItinClass;
+def IIC_SprMFTB : InstrItinClass;
+def IIC_SprMTSPR : InstrItinClass;
+def IIC_SprMTSRIN : InstrItinClass;
+def IIC_SprRFI : InstrItinClass;
+def IIC_SprSC : InstrItinClass;
+def IIC_FPGeneral : InstrItinClass;
+def IIC_FPAddSub : InstrItinClass;
+def IIC_FPCompare : InstrItinClass;
+def IIC_FPDivD : InstrItinClass;
+def IIC_FPDivS : InstrItinClass;
+def IIC_FPFused : InstrItinClass;
+def IIC_FPRes : InstrItinClass;
+def IIC_FPSqrtD : InstrItinClass;
+def IIC_FPSqrtS : InstrItinClass;
+def IIC_VecGeneral : InstrItinClass;
+def IIC_VecFP : InstrItinClass;
+def IIC_VecFPCompare : InstrItinClass;
+def IIC_VecComplex : InstrItinClass;
+def IIC_VecPerm : InstrItinClass;
+def IIC_VecFPRound : InstrItinClass;
+def IIC_VecVSL : InstrItinClass;
+def IIC_VecVSR : InstrItinClass;
+def IIC_SprMTMSRD : InstrItinClass;
+def IIC_SprSLIE : InstrItinClass;
+def IIC_SprSLBIE : InstrItinClass;
+def IIC_SprSLBMTE : InstrItinClass;
+def IIC_SprSLBMFEE : InstrItinClass;
+def IIC_SprSLBIA : InstrItinClass;
+def IIC_SprTLBIA : InstrItinClass;
+def IIC_SprTLBIEL : InstrItinClass;
+def IIC_SprTLBIE : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "PPCScheduleG3.td"
+include "PPCSchedule440.td"
+include "PPCScheduleG4.td"
+include "PPCScheduleG4Plus.td"
+include "PPCScheduleG5.td"
+include "PPCScheduleP7.td"
+include "PPCScheduleP8.td"
+include "PPCScheduleA2.td"
+include "PPCScheduleE500mc.td"
+include "PPCScheduleE5500.td"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
new file mode 100644
index 0000000..04a43bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
@@ -0,0 +1,607 @@
+//===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// PowerPC 440x6 Embedded Processor Core User's Manual.
+// IBM (as updated in) 2010.
+
+// The basic PPC 440 does not include a floating-point unit; the pipeline
+// timings here are constructed to match the FP2 unit shipped with the
+// PPC-440- and PPC-450-based Blue Gene (L and P) supercomputers.
+// References:
+// S. Chatterjee, et al. Design and exploitation of a high-performance
+// SIMD floating-point unit for Blue Gene/L.
+// IBM J. Res. & Dev. 49 (2/3) March/May 2005.
+// also:
+// Carlos Sosa and Brant Knudson. IBM System Blue Gene Solution:
+// Blue Gene/P Application Development.
+// IBM (as updated in) 2009.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC 440/450 chip sets
+//
+def P440_DISS1 : FuncUnit; // Issue unit 1
+def P440_DISS2 : FuncUnit; // Issue unit 2
+def P440_LRACC : FuncUnit; // Register access and dispatch for
+ // the simple integer (J-pipe) and
+ // load/store (L-pipe) pipelines
+def P440_IRACC : FuncUnit; // Register access and dispatch for
+ // the complex integer (I-pipe) pipeline
+def P440_FRACC : FuncUnit; // Register access and dispatch for
+ // the floating-point execution (F-pipe) pipeline
+def P440_IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline
+def P440_IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline
+def P440_IWB : FuncUnit; // Write-back unit for the I pipeline
+def P440_JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline
+def P440_JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline
+def P440_JWB : FuncUnit; // Write-back unit for the J pipeline
+def P440_AGEN : FuncUnit; // Address generation for the L pipeline
+def P440_CRD : FuncUnit; // D-cache access for the L pipeline
+def P440_LWB : FuncUnit; // Write-back unit for the L pipeline
+def P440_FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline
+def P440_FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline
+def P440_FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline
+def P440_FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline
+def P440_FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline
+def P440_FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline
+def P440_FWB : FuncUnit; // Write-back unit for the F pipeline
+
+def P440_LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
+ // to make sure that no lwarx/stwcx.
+ // instructions are issued while another
+ // lwarx/stwcx. is in the L pipe.
+
+def P440_GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+def P440_FPR_Bypass : Bypass; // The bypass for floating-point regs.
+
+// Notes:
+// Instructions are held in the FRACC, LRACC and IRACC pipeline
+// stages until their source operands become ready. Exceptions:
+// - Store instructions will hold in the AGEN stage
+// - The integer multiply-accumulate instruction will hold in
+// the IEXE1 stage
+//
+// For most I-pipe operations, the result is available at the end of
+// the IEXE1 stage. Operations such as multiply and divide must
+// continue to execute in IEXE2 and IWB. Divide resides in IWB for
+// 33 cycles (multiply also calculates its result in IWB). For all
+// J-pipe instructions, the result is available
+// at the end of the JEXE1 stage. Loads have a 3-cycle latency
+// (data is not available until after the LWB stage).
+//
+// The L1 cache hit latency is four cycles for floating point loads
+// and three cycles for integer loads.
+//
+// The stwcx. instruction requires both the LRACC and the IRACC
+// dispatch stages. It must be issued from DISS0.
+//
+// All lwarx/stwcx. instructions hold in LRACC if another
+// uncommitted lwarx/stwcx. is in AGEN, CRD, or LWB.
+//
+// msync (a.k.a. sync) and mbar will hold in LWB until all load/store
+// resources are empty. AGEN and CRD are held empty until the msync/mbar
+// commits.
+//
+// Most floating-point instructions, computational and move,
+// have a 5-cycle latency. Divide takes longer (30 cycles). Instructions that
+// update the CR take 2 cycles. Stores take 3 cycles and, as mentioned above,
+// loads take 4 cycles (for L1 hit).
+
+//
+// This file defines the itinerary class data for the PPC 440 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPC440Itineraries : ProcessorItineraries<
+ [P440_DISS1, P440_DISS2, P440_FRACC, P440_IRACC, P440_IEXE1, P440_IEXE2,
+ P440_IWB, P440_LRACC, P440_JEXE1, P440_JEXE2, P440_JWB, P440_AGEN, P440_CRD,
+ P440_LWB, P440_FEXE1, P440_FEXE2, P440_FEXE3, P440_FEXE4, P440_FEXE5,
+ P440_FEXE6, P440_FWB, P440_LWARX_Hold],
+ [P440_GPR_Bypass, P440_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass, NoBypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<33, [P440_IWB]>],
+ [36, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<3, [P440_AGEN], 1>,
+ InstrStage<2, [P440_CRD], 1>,
+ InstrStage<1, [P440_LWB]>]>,
+ InstrItinData<IIC_SprISYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC], 0>,
+ InstrStage<1, [P440_LRACC], 0>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_FEXE1], 0>,
+ InstrStage<1, [P440_AGEN], 0>,
+ InstrStage<1, [P440_JEXE1], 0>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_FEXE2], 0>,
+ InstrStage<1, [P440_CRD], 0>,
+ InstrStage<1, [P440_JEXE2], 0>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<6, [P440_FEXE3], 0>,
+ InstrStage<6, [P440_LWB], 0>,
+ InstrStage<6, [P440_JWB], 0>,
+ InstrStage<6, [P440_IWB]>]>,
+ InstrItinData<IIC_SprMFSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [5, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprRFI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprSC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass, P440_FPR_Bypass,
+ P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<25, [P440_FWB]>],
+ [31, 0, 0],
+ [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<13, [P440_FWB]>],
+ [19, 0, 0],
+ [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass,
+ P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0],
+ [P440_FPR_Bypass, P440_FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// PPC440 machine model for scheduling and other instruction cost heuristics.
+
+def PPC440Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 instructions are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 5; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPC440Itineraries;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
new file mode 100644
index 0000000..21a357a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -0,0 +1,171 @@
+//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// A2 Processor User's Manual.
+// IBM (as updated in) 2010.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC A2 chip sets
+//
+def A2_XU : FuncUnit; // A2_XU pipeline
+def A2_FU : FuncUnit; // FI pipeline
+
+//
+// This file defines the itinerary class data for the PPC A2 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPCA2Itineraries : ProcessorItineraries<
+ [A2_XU, A2_FU], [], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [A2_XU]>],
+ [39, 0, 0]>,
+ InstrItinData<IIC_IntDivD, [InstrStage<1, [A2_XU]>],
+ [71, 0, 0]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntRotateD, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntRotateDI, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [A2_XU]>],
+ [2, 0]>,
+ InstrItinData<IIC_IntTrapD, [InstrStage<1, [A2_XU]>],
+ [2, 0]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLDU, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLDUX, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [A2_XU]>],
+ [16, 0, 0]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [A2_XU]>],
+ [7, 0, 0]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [A2_XU]>],
+ [7, 9, 0, 0]>,
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [A2_XU]>],
+ [7, 9, 0, 0]>,
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [A2_XU]>],
+ [6]>,
+ InstrItinData<IIC_SprISYNC, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [A2_XU]>],
+ [16, 0]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [A2_XU]>],
+ [1, 0]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [A2_XU]>],
+ [4, 0]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [A2_XU]>],
+ [4, 0]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprRFI, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_SprSC, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [A2_FU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [A2_FU]>],
+ [72, 0, 0]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [A2_FU]>],
+ [59, 0, 0]>,
+ InstrItinData<IIC_FPSqrtD, [InstrStage<1, [A2_FU]>],
+ [69, 0, 0]>,
+ InstrItinData<IIC_FPSqrtS, [InstrStage<1, [A2_FU]>],
+ [65, 0, 0]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0, 0]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [A2_FU]>],
+ [6, 0]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// A2 machine model for scheduling and other instruction cost heuristics.
+
+def PPCA2Model : SchedMachineModel {
+ let IssueWidth = 1; // 1 instruction is dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 6; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 13;
+
+ let Itineraries = PPCA2Itineraries;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
new file mode 100644
index 0000000..36b8517
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -0,0 +1,320 @@
+//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Freescale e500mc 32-bit
+// Power processor.
+//
+// All information is derived from the "e500mc Core Reference Manual",
+// Freescale Document Number E500MCRM, Rev. 1, 03/2012.
+//
+//===----------------------------------------------------------------------===//
+// Relevant functional units in the Freescale e500mc core:
+//
+// * Decode & Dispatch
+// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
+// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
+def E500_DIS0 : FuncUnit; // Dispatch stage - insn 1
+def E500_DIS1 : FuncUnit; // Dispatch stage - insn 2
+
+// * Execute
+// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
+// Some instructions can only execute in SFX0 but not SFX1.
+// The CFX has a bypass path, allowing non-divide instructions to execute
+// while a divide instruction is executed.
+def E500_SFX0 : FuncUnit; // Simple unit 0
+def E500_SFX1 : FuncUnit; // Simple unit 1
+def E500_BU : FuncUnit; // Branch unit
+def E500_CFX_DivBypass
+ : FuncUnit; // CFX divide bypass path
+def E500_CFX_0 : FuncUnit; // CFX pipeline
+def E500_LSU_0 : FuncUnit; // LSU pipeline
+def E500_FPU_0 : FuncUnit; // FPU pipeline
+
+def E500_GPR_Bypass : Bypass;
+def E500_FPR_Bypass : Bypass;
+def E500_CR_Bypass : Bypass;
+
+def PPCE500mcItineraries : ProcessorItineraries<
+ [E500_DIS0, E500_DIS1, E500_SFX0, E500_SFX1, E500_BU, E500_CFX_DivBypass,
+ E500_CFX_0, E500_LSU_0, E500_FPU_0],
+ [E500_CR_Bypass, E500_GPR_Bypass, E500_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass,
+ E500_CR_Bypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [5, 1, 1], // Latency = 1 or 2
+ [E500_CR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0], 0>,
+ InstrStage<14, [E500_CFX_DivBypass]>],
+ [17, 1, 1], // Latency=4..35, Repeat= 4..35
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<8, [E500_FPU_0]>],
+ [11], // Latency = 8
+ [E500_FPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<8, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_SFX0]>],
+ [5, 1], // Latency = 2, Repeat rate = 2
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1], // Latency = 1
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1, 1], // Latency = 1
+ [E500_CR_Bypass,
+ E500_CR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1], // Latency = 1
+ [E500_CR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_CR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3, Repeat rate = 1
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1], // Latency = r+3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<3, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3, Repeat rate = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>]>,
+ InstrItinData<IIC_SprMFSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1],
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_SFX0, E500_SFX1]>],
+ [5, 1], // Latency = 2, Repeat rate = 4
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0]>],
+ [5, 1],
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0], 0>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<5, [E500_SFX0]>],
+ [8, 1],
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<5, [E500_SFX0]>],
+ [8, 1],
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [E500_CR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0]>],
+ [4, 1],
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_FPU_0]>],
+ [13, 1, 1], // Latency = 10, Repeat rate = 4
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [E500_CR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<68, [E500_FPU_0]>],
+ [71, 1, 1], // Latency = 68, Repeat rate = 68
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<38, [E500_FPU_0]>],
+ [41, 1, 1], // Latency = 38, Repeat rate = 38
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_FPU_0]>],
+ [13, 1, 1, 1], // Latency = 10, Repeat rate = 4
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass,
+ E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<38, [E500_FPU_0]>],
+ [41, 1], // Latency = 38, Repeat rate = 38
+ [E500_FPR_Bypass, E500_FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// e500mc machine model for scheduling and other instruction cost heuristics.
+
+def PPCE500mcModel : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 5; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPCE500mcItineraries;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
new file mode 100644
index 0000000..7c2693e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -0,0 +1,380 @@
+//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Freescale e5500 64-bit
+// Power processor.
+//
+// All information is derived from the "e5500 Core Reference Manual",
+// Freescale Document Number e5500RM, Rev. 1, 03/2012.
+//
+//===----------------------------------------------------------------------===//
+// Relevant functional units in the Freescale e5500 core
+// (These are the same as for the e500mc)
+//
+// * Decode & Dispatch
+// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
+// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
+def E5500_DIS0 : FuncUnit;
+def E5500_DIS1 : FuncUnit;
+
+// * Execute
+// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
+// The CFX has a bypass path, allowing non-divide instructions to execute
+// while a divide instruction is being executed.
+def E5500_SFX0 : FuncUnit; // Simple unit 0
+def E5500_SFX1 : FuncUnit; // Simple unit 1
+def E5500_BU : FuncUnit; // Branch unit
+def E5500_CFX_DivBypass
+ : FuncUnit; // CFX divide bypass path
+def E5500_CFX_0 : FuncUnit; // CFX pipeline stage 0
+
+def E5500_CFX_1 : FuncUnit; // CFX pipeline stage 1
+
+def E5500_LSU_0 : FuncUnit; // LSU pipeline
+def E5500_FPU_0 : FuncUnit; // FPU pipeline
+
+def E5500_GPR_Bypass : Bypass;
+def E5500_FPR_Bypass : Bypass;
+def E5500_CR_Bypass : Bypass;
+
+def PPCE5500Itineraries : ProcessorItineraries<
+ [E5500_DIS0, E5500_DIS1, E5500_SFX0, E5500_SFX1, E5500_BU,
+ E5500_CFX_DivBypass, E5500_CFX_0, E5500_CFX_1,
+ E5500_LSU_0, E5500_FPU_0],
+ [E5500_CR_Bypass, E5500_GPR_Bypass, E5500_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass,
+ E5500_CR_Bypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 1 or 2
+ [E5500_CR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<26, [E5500_CFX_DivBypass]>],
+ [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<16, [E5500_CFX_DivBypass]>],
+ [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<7, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 7
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IIC_IntMulHD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<2, [E5500_CFX_1]>],
+ [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<1, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<1, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<2, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4 or 5, Repeat = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotateD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotateDI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0]>],
+ [6, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2], // Latency = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_CR_Bypass,
+ E5500_CR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2], // Latency = 1
+ [E5500_CR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_CR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLDARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<3, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_LSU_0]>],
+ [8, 2], // Latency = r+3, Repeat rate = r+3
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<3, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 3
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_CFX_0]>],
+ [6, 2], // Latency = 2, Repeat rate = 4
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0], 0>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<5, [E5500_CFX_0]>],
+ [9, 2], // Latency = 5, Repeat rate = 5
+ [E5500_GPR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<5, [E5500_CFX_0]>],
+ [9, 2], // Latency = 5, Repeat rate = 5
+ [E5500_GPR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_SFX0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_CFX_0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_CR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<31, [E5500_FPU_0]>],
+ [39, 2, 2], // Latency = 35, Repeat rate = 31
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<16, [E5500_FPU_0]>],
+ [24, 2, 2], // Latency = 20, Repeat rate = 16
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass,
+ E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_FPU_0]>],
+ [12, 2], // Latency = 8, Repeat rate = 2
+ [E5500_FPR_Bypass, E5500_FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// e5500 machine model for scheduling and other instruction cost heuristics.
+
+def PPCE5500Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 6; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPCE5500Itineraries;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
new file mode 100644
index 0000000..21efd8f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -0,0 +1,80 @@
+//===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G3 (750) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G3_BPU : FuncUnit; // Branch unit
+def G3_SLU : FuncUnit; // Store/load unit
+def G3_SRU : FuncUnit; // special register unit
+def G3_IU1 : FuncUnit; // integer unit 1 (simple)
+def G3_IU2 : FuncUnit; // integer unit 2 (complex)
+def G3_FPU1 : FuncUnit; // floating point unit 1
+
+def G3Itineraries : ProcessorItineraries<
+ [G3_IU1, G3_IU2, G3_FPU1, G3_BPU, G3_SRU, G3_SLU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<19, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G3_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G3_IU1]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G3_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_LdStDCBA , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<34, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<8, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<31, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<17, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<2, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<10, [G3_FPU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
new file mode 100644
index 0000000..340773e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -0,0 +1,96 @@
+//===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4 (7400) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4_BPU : FuncUnit; // Branch unit
+def G4_SLU : FuncUnit; // Store/load unit
+def G4_SRU : FuncUnit; // special register unit
+def G4_IU1 : FuncUnit; // integer unit 1 (simple)
+def G4_IU2 : FuncUnit; // integer unit 2 (complex)
+def G4_FPU1 : FuncUnit; // floating point unit 1
+def G4_VPU : FuncUnit; // vector permutation unit
+def G4_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G4_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G4_VFPU : FuncUnit; // vector floating point unit
+
+def G4Itineraries : ProcessorItineraries<
+ [G4_IU1, G4_IU2, G4_SLU, G4_SRU, G4_BPU, G4_FPU1,
+ G4_VIU1, G4_VIU2, G4_VPU, G4_VFPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<19, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<3, [G4_FPU1]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G4_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4_IU1]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G4_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<34, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<5, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<8, [G4_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<8, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<31, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<17, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<10, [G4_FPU1]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<4, [G4_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<3, [G4_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<1, [G4_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4_VFPU]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<1, [G4_VIU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
new file mode 100644
index 0000000..1d9f13f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -0,0 +1,112 @@
+//===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4+ (7450) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4P_BPU : FuncUnit; // Branch unit
+def G4P_SLU : FuncUnit; // Store/load unit
+def G4P_SRU : FuncUnit; // special register unit
+def G4P_IU1 : FuncUnit; // integer unit 1 (simple)
+def G4P_IU2 : FuncUnit; // integer unit 2 (complex)
+def G4P_IU3 : FuncUnit; // integer unit 3 (simple)
+def G4P_IU4 : FuncUnit; // integer unit 4 (simple)
+def G4P_FPU1 : FuncUnit; // floating point unit 1
+def G4P_VPU : FuncUnit; // vector permutation unit
+def G4P_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G4P_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G4P_VFPU : FuncUnit; // vector floating point unit
+
+def G4PlusItineraries : ProcessorItineraries<
+ [G4P_IU1, G4P_IU2, G4P_IU3, G4P_IU4, G4P_BPU, G4P_SLU, G4P_FPU1,
+ G4P_VFPU, G4P_VIU1, G4P_VIU2, G4P_VPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<23, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<2, [G4P_VFPU]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<2, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G4P_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<37, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<35, [G4P_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<0, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<5, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<0, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<35, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<21, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<14, [G4P_FPU1]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4P_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<4, [G4P_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G4P_VFPU]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<4, [G4P_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<2, [G4P_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4P_VIU1]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<2, [G4P_VPU]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<2, [G4P_VPU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
new file mode 100644
index 0000000..a3b73ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -0,0 +1,129 @@
+//===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G5 (970) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G5_BPU : FuncUnit; // Branch unit
+def G5_SLU : FuncUnit; // Store/load unit
+def G5_SRU : FuncUnit; // special register unit
+def G5_IU1 : FuncUnit; // integer unit 1 (simple)
+def G5_IU2 : FuncUnit; // integer unit 2 (complex)
+def G5_FPU1 : FuncUnit; // floating point unit 1
+def G5_FPU2 : FuncUnit; // floating point unit 2
+def G5_VPU : FuncUnit; // vector permutation unit
+def G5_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G5_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G5_VFPU : FuncUnit; // vector floating point unit
+
+def G5Itineraries : ProcessorItineraries<
+ [G5_IU1, G5_IU2, G5_SLU, G5_BPU, G5_FPU1, G5_FPU2,
+ G5_VFPU, G5_VIU1, G5_VIU2, G5_VPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<3, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntDivD , [InstrStage<68, [G5_IU1]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<36, [G5_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<6, [G5_IU2]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G5_VFPU]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_IntMulHD , [InstrStage<7, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<5, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<4, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRFID , [InstrStage<1, [G5_IU2]>]>,
+ InstrItinData<IIC_IntRotateD , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRotateDI , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<4, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntTrapD , [InstrStage<1, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<1, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G5_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<4, [G5_BPU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<2, [G5_BPU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<3, [G5_BPU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<10, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<40, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<4, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<4, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDU , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDUX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDARX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<64, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSLBIA , [InstrStage<40, [G5_SLU]>]>, // needs work
+ InstrItinData<IIC_LdStSLBIE , [InstrStage<2, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<35, [G5_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<40, [G5_SLU]>]>, // needs work
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<2, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFCRF , [InstrStage<2, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<10, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<8, [G5_IU2]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<1, [G5_IU2]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<8, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPSqrtD , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPSqrtS , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<2, [G5_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<8, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<5, [G5_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<3, [G5_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<8, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<2, [G5_VIU1]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<3, [G5_VPU]>]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// G5 machine model for scheduling and other instruction cost heuristics.
+
+def G5Model : SchedMachineModel {
+ let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle.
+ let MinLatency = 0; // Out-of-order dispatch.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 16;
+
+ let Itineraries = G5Itineraries;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
new file mode 100644
index 0000000..267f567
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -0,0 +1,396 @@
+//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the POWER7 processor.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// IBM POWER7 multicore server processor
+// B. Sinharoy, et al.
+// IBM J. Res. & Dev. (55) 3. May/June 2011.
+
+// Scheduling for the P7 involves tracking two types of resources:
+// 1. The dispatch bundle slots
+// 2. The functional unit resources
+
+// Dispatch units:
+def P7_DU1 : FuncUnit;
+def P7_DU2 : FuncUnit;
+def P7_DU3 : FuncUnit;
+def P7_DU4 : FuncUnit;
+def P7_DU5 : FuncUnit;
+def P7_DU6 : FuncUnit;
+
+def P7_LS1 : FuncUnit; // Load/Store pipeline 1
+def P7_LS2 : FuncUnit; // Load/Store pipeline 2
+
+def P7_FX1 : FuncUnit; // FX pipeline 1
+def P7_FX2 : FuncUnit; // FX pipeline 2
+
+// VS pipeline 1 (vector integer ops. always here)
+def P7_VS1 : FuncUnit; // VS pipeline 1
+// VS pipeline 2 (128-bit stores and perms. here)
+def P7_VS2 : FuncUnit; // VS pipeline 2
+
+def P7_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
+def P7_BRU : FuncUnit; // BR unit
+
+// Notes:
+// Each LSU pipeline can also execute FX add and logical instructions.
+// Each LSU pipeline can complete a load or store in one cycle.
+//
+// Each store is broken into two parts, AGEN goes to the LSU while a
+// "data steering" op. goes to the FXU or VSU.
+//
+// FX loads have a two cycle load-to-use latency (so one "bubble" cycle).
+// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle).
+//
+// Frequent FX ops. take only one cycle and results can be used again in the
+// next cycle (there is a self-bypass). Getting results from the other FX
+// pipeline takes an additional cycle.
+//
+// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles
+// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops.
+// Dispatch of an instruction to VS1 that uses four single prec. inputs
+// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any
+// floating point instruction.
+//
+// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles
+// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline
+// (unlike on the POWER6).
+//
+// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP
+// share the same write-back, and have a 5-cycle latency difference, so the
+// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP
+// op. has been dispatched to VS1.
+//
+// Three cycles after an L1 cache hit, a dependent VSU instruction can issue.
+//
+// Instruction dispatch groups have (at most) four non-branch instructions, and
+// two branches. Unlike on the POWER4/5, a branch does not automatically
+// end the dispatch group, but a second branch must be the last in the group.
+
+def P7Itineraries : ProcessorItineraries<
+ [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6,
+ P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2,
+ P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ // FIXME: Add record-form itinerary data.
+ InstrItinData<IIC_IntDivW , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<36, [P7_FX1, P7_FX2]>],
+ [36, 1, 1]>,
+ InstrItinData<IIC_IntDivD , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<68, [P7_FX1, P7_FX2]>],
+ [68, 1, 1]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntRotateD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1]>,
+ InstrItinData<IIC_IntTrapD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 2, 1, 1]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 2, 1, 1]>,
+ InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 4, 1, 1]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 4, 1, 1]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_VS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_CRU]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1]>, // mtcr
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [6, 1]>,
+ InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [3, 1]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_FX1]>],
+ [4, 1]>, // mtctr
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [8, 1, 1]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [33, 1, 1]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [27, 1, 1]>,
+ InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [44, 1, 1]>,
+ InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [32, 1, 1]>,
+ InstrItinData<IIC_FPFused , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1, 1]>,
+ InstrItinData<IIC_FPRes , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecFP , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [7, 1, 1]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<1, [P7_DU1, P7_DU2], 0>,
+ InstrStage<1, [P7_VS2]>],
+ [3, 1, 1]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// P7 machine model for scheduling and other instruction cost heuristics.
+
+def P7Model : SchedMachineModel {
+ let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle.
+ // Note that the dispatch bundle size is 6 (including
+ // branches), but the total internal issue bandwidth per
+ // cycle (from all queues) is 8.
+
+ let MinLatency = 0; // Out-of-order dispatch.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 16;
+
+ // Try to make sure we have at least 10 dispatch groups in a loop.
+ let LoopMicroOpBufferSize = 40;
+
+ let Itineraries = P7Itineraries;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
new file mode 100644
index 0000000..69e6d05
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td
@@ -0,0 +1,405 @@
+//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the POWER8 processor.
+//
+//===----------------------------------------------------------------------===//
+
+// Scheduling for the P8 involves tracking two types of resources:
+// 1. The dispatch bundle slots
+// 2. The functional unit resources
+
+// Dispatch units:
+def P8_DU1 : FuncUnit;
+def P8_DU2 : FuncUnit;
+def P8_DU3 : FuncUnit;
+def P8_DU4 : FuncUnit;
+def P8_DU5 : FuncUnit;
+def P8_DU6 : FuncUnit;
+def P8_DU7 : FuncUnit; // Only branch instructions will use DU7,DU8
+def P8_DU8 : FuncUnit;
+
+// 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
+
+def P8_LU1 : FuncUnit; // Loads or fixed-point operations 1
+def P8_LU2 : FuncUnit; // Loads or fixed-point operations 2
+
+// Load/Store pipelines can handle Stores, fixed-point loads, and simple
+// fixed-point operations.
+def P8_LSU1 : FuncUnit; // Load/Store pipeline 1
+def P8_LSU2 : FuncUnit; // Load/Store pipeline 2
+
+// Fixed Point unit
+def P8_FXU1 : FuncUnit; // FX pipeline 1
+def P8_FXU2 : FuncUnit; // FX pipeline 2
+
+// The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units
+// are combined on P7 and newer into a Vector Scalar Unit (VSU).
+// The P8 Instruction latency documents still refers to the unit as the
+// FPU, so keep in mind that FPU==VSU.
+// In contrast to the P7, the VMX units on P8 are symmetric, so no need to
+// split vector integer ops or 128-bit load/store/perms to the specific units.
+def P8_FPU1 : FuncUnit; // VS pipeline 1
+def P8_FPU2 : FuncUnit; // VS pipeline 2
+
+def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
+def P8_BRU : FuncUnit; // BR unit
+
+def P8Itineraries : ProcessorItineraries<
+ [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8,
+ P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2,
+ P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2,
+ P8_LU1, P8_LU2,
+ P8_LSU1, P8_LSU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
+ P8_LU2, P8_LSU1, P8_LSU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntISEL, [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
+ InstrStage<1, [P8_BRU]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<15, [P8_FXU1, P8_FXU2]>],
+ [15, 1, 1]>,
+ InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<23, [P8_FXU1, P8_FXU2]>],
+ [23, 1, 1]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1]>,
+ InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [1, 1]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU7, P8_DU8], 0>,
+ InstrStage<1, [P8_BRU]>],
+ [3, 1, 1]>,
+ // FIXME - the Br* groups below are not branch related, so should probably
+ // be renamed.
+ // IIC_BrCR consists of the cr* instructions. (crand,crnor,creqv, etc).
+ // and should be 'First' in dispatch.
+ InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_CRU]>],
+ [3, 1, 1]>,
+ // IIC_BrMCR consists of the mcrf instruction.
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_CRU]>],
+ [3, 1, 1]>,
+ // IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which
+ // should be first in the dispatch group.
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 1]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2 ], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [2, 2, 1, 1]>,
+ // Update-Indexed form loads/stores are no longer first and last in the
+ // dispatch group. They are simply cracked, so require DU1,DU2.
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [2, 2, 1, 1]>,
+ InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2,
+ P8_LU1, P8_LU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 4, 1, 1]>,
+ // first+last in dispatch group.
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_DU5], 0>,
+ InstrStage<1, [P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 4, 1, 1]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>],
+ [3, 1, 1]>,
+ // first+last
+ InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_DU5], 0>,
+ InstrStage<1, [P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2,
+ P8_LU1, P8_LU2]>],
+ [2, 1, 1]>,
+// Stores are dual-issued from the issue queue, so may only take up one
+// dispatch slot. The instruction will be broken into two IOPS. The agen
+// op is issued to the LSU, and the data op (register fetch) is issued
+// to either the LU (GPR store) or the VSU (FPR store).
+ InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2]>,
+ InstrStage<1, [P8_LU1, P8_LU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2,
+ P8_LSU1, P8_LSU2]>]
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2,
+ P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [2, 1, 1, 1]>,
+ // First+last
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_DU5], 0>,
+ InstrStage<1, [P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_DU5], 0>,
+ InstrStage<1, [P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_DU2], 0>,
+ InstrStage<1, [P8_DU3], 0>,
+ InstrStage<1, [P8_DU4], 0>,
+ InstrStage<1, [P8_DU5], 0>,
+ InstrStage<1, [P8_DU6], 0>,
+ InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
+ InstrStage<1, [P8_LU1, P8_LU2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_CRU]>],
+ [6, 1]>,
+ InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_CRU]>],
+ [3, 1]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FXU1, P8_FXU2]>],
+ [4, 1]>, // mtctr
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [8, 1, 1]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [33, 1, 1]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [27, 1, 1]>,
+ InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [44, 1, 1]>,
+ InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [32, 1, 1]>,
+ InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [5, 1, 1, 1]>,
+ InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+ P8_DU4, P8_DU5, P8_DU6], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>,
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
+ [7, 1, 1]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
+ InstrStage<1, [P8_FPU2, P8_FPU2]>],
+ [3, 1, 1]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// P8 machine model for scheduling and other instruction cost heuristics.
+// P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up
+// to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
+
+def P8Model : SchedMachineModel {
+ let IssueWidth = 8; // up to 8 instructions dispatched per cycle.
+ // up to six non-branch instructions.
+ // up to two branches in a dispatch group.
+
+ let MinLatency = 0; // Out-of-order dispatch.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 16;
+
+ // Try to make sure we have at least 10 dispatch groups in a loop.
+ let LoopMicroOpBufferSize = 60;
+
+ let Itineraries = P8Itineraries;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
new file mode 100644
index 0000000..c0fcb6c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -0,0 +1,245 @@
+//===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCSubtarget.h"
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PPCGenSubtargetInfo.inc"
+
+static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
+cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
+
+static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
+ cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
+ cl::Hidden);
+
+PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ initializeEnvironment();
+ initSubtargetFeatures(CPU, FS);
+ return *this;
+}
+
+PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
+ const std::string &FS, const PPCTargetMachine &TM)
+ : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+ IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
+ TargetTriple.getArch() == Triple::ppc64le),
+ TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)),
+ InstrInfo(*this), TLInfo(TM, *this) {}
+
+void PPCSubtarget::initializeEnvironment() {
+ StackAlignment = 16;
+ DarwinDirective = PPC::DIR_NONE;
+ HasMFOCRF = false;
+ Has64BitSupport = false;
+ Use64BitRegs = false;
+ UseCRBits = false;
+ UseSoftFloat = false;
+ HasAltivec = false;
+ HasSPE = false;
+ HasQPX = false;
+ HasVSX = false;
+ HasP8Vector = false;
+ HasP8Altivec = false;
+ HasP8Crypto = false;
+ HasFCPSGN = false;
+ HasFSQRT = false;
+ HasFRE = false;
+ HasFRES = false;
+ HasFRSQRTE = false;
+ HasFRSQRTES = false;
+ HasRecipPrec = false;
+ HasSTFIWX = false;
+ HasLFIWAX = false;
+ HasFPRND = false;
+ HasFPCVT = false;
+ HasISEL = false;
+ HasPOPCNTD = false;
+ HasBPERMD = false;
+ HasExtDiv = false;
+ HasCMPB = false;
+ HasLDBRX = false;
+ IsBookE = false;
+ HasOnlyMSYNC = false;
+ IsPPC4xx = false;
+ IsPPC6xx = false;
+ IsE500 = false;
+ FeatureMFTB = false;
+ DeprecatedDST = false;
+ HasLazyResolverStubs = false;
+ HasICBT = false;
+ HasInvariantFunctionDescriptors = false;
+ HasPartwordAtomics = false;
+ HasDirectMove = false;
+ IsQPXStackUnaligned = false;
+ HasHTM = false;
+ HasFusion = false;
+ HasFloat128 = false;
+}
+
+void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
+ // Determine default and user specified characteristics
+ std::string CPUName = CPU;
+ if (CPUName.empty()) {
+ // If cross-compiling with -march=ppc64le without -mcpu
+ if (TargetTriple.getArch() == Triple::ppc64le)
+ CPUName = "ppc64le";
+ else
+ CPUName = "generic";
+ }
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+
+ // If the user requested use of 64-bit regs, but the cpu selected doesn't
+ // support it, ignore.
+ if (IsPPC64 && has64BitSupport())
+ Use64BitRegs = true;
+
+ // Set up darwin-specific properties.
+ if (isDarwin())
+ HasLazyResolverStubs = true;
+
+ // QPX requires a 32-byte aligned stack. Note that we need to do this if
+ // we're compiling for a BG/Q system regardless of whether or not QPX
+ // is enabled because external functions will assume this alignment.
+ IsQPXStackUnaligned = QPXStackUnaligned;
+ StackAlignment = getPlatformStackAlignment();
+
+ // Determine endianness.
+ // FIXME: Part of the TargetMachine.
+ IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
+}
+
+/// hasLazyResolverStub - Return true if accesses to the specified global have
+/// to go through a dyld lazy resolution stub. This means that an extra load
+/// is required to get the address of the global.
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
+ // We never have stubs if HasLazyResolverStubs=false or if in static mode.
+ if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
+ return false;
+ bool isDecl = GV->isDeclaration();
+ if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
+ return false;
+ return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ GV->hasCommonLinkage() || isDecl;
+}
+
+// Embedded cores need aggressive scheduling (and some others also benefit).
+static bool needsAggressiveScheduling(unsigned Directive) {
+ switch (Directive) {
+ default: return false;
+ case PPC::DIR_440:
+ case PPC::DIR_A2:
+ case PPC::DIR_E500mc:
+ case PPC::DIR_E5500:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
+ return true;
+ }
+}
+
+bool PPCSubtarget::enableMachineScheduler() const {
+ // Enable MI scheduling for the embedded cores.
+ // FIXME: Enable this for all cores (some additional modeling
+ // may be necessary).
+ return needsAggressiveScheduling(DarwinDirective);
+}
+
+// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+bool PPCSubtarget::enablePostRAScheduler() const { return true; }
+
+PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const {
+ return TargetSubtargetInfo::ANTIDEP_ALL;
+}
+
+void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(isPPC64() ?
+ &PPC::G8RCRegClass : &PPC::GPRCRegClass);
+}
+
+void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const {
+ if (needsAggressiveScheduling(DarwinDirective)) {
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+ }
+
+ // Spilling is generally expensive on all PPC cores, so always enable
+ // register-pressure tracking.
+ Policy.ShouldTrackPressure = true;
+}
+
+bool PPCSubtarget::useAA() const {
+ // Use AA during code generation for the embedded cores.
+ return needsAggressiveScheduling(DarwinDirective);
+}
+
+bool PPCSubtarget::enableSubRegLiveness() const {
+ return UseSubRegLiveness;
+}
+
+unsigned char PPCSubtarget::classifyGlobalReference(
+ const GlobalValue *GV) const {
+ // Note that currently we don't generate non-pic references.
+ // If a caller wants that, this will have to be updated.
+
+ // Large code model always uses the TOC even for local symbols.
+ if (TM.getCodeModel() == CodeModel::Large)
+ return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
+
+ unsigned char flags = PPCII::MO_PIC_FLAG;
+
+ // Only if the relocation mode is PIC do we have to worry about
+ // interposition. In all other cases we can use a slightly looser standard to
+ // decide how to access the symbol.
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // If it's local, or it's non-default, it can't be interposed.
+ if (!GV->hasLocalLinkage() &&
+ GV->hasDefaultVisibility()) {
+ flags |= PPCII::MO_NLP_FLAG;
+ }
+ return flags;
+ }
+
+ if (GV->isStrongDefinitionForLinker())
+ return flags;
+ return flags | PPCII::MO_NLP_FLAG;
+}
+
+bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
+bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
new file mode 100644
index 0000000..4f5c95c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -0,0 +1,302 @@
+//===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
+#define LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
+
+#include "PPCFrameLowering.h"
+#include "PPCISelLowering.h"
+#include "PPCInstrInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PPCGenSubtargetInfo.inc"
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+class StringRef;
+
+namespace PPC {
+ // -m directive values.
+ enum {
+ DIR_NONE,
+ DIR_32,
+ DIR_440,
+ DIR_601,
+ DIR_602,
+ DIR_603,
+ DIR_7400,
+ DIR_750,
+ DIR_970,
+ DIR_A2,
+ DIR_E500mc,
+ DIR_E5500,
+ DIR_PWR3,
+ DIR_PWR4,
+ DIR_PWR5,
+ DIR_PWR5X,
+ DIR_PWR6,
+ DIR_PWR6X,
+ DIR_PWR7,
+ DIR_PWR8,
+ DIR_64
+ };
+}
+
+class GlobalValue;
+class TargetMachine;
+
+class PPCSubtarget : public PPCGenSubtargetInfo {
+protected:
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which cpu directive was used.
+ unsigned DarwinDirective;
+
+ /// Used by the ISel to turn in optimizations for POWER4-derived architectures
+ bool HasMFOCRF;
+ bool Has64BitSupport;
+ bool Use64BitRegs;
+ bool UseCRBits;
+ bool UseSoftFloat;
+ bool IsPPC64;
+ bool HasAltivec;
+ bool HasSPE;
+ bool HasQPX;
+ bool HasVSX;
+ bool HasP8Vector;
+ bool HasP8Altivec;
+ bool HasP8Crypto;
+ bool HasFCPSGN;
+ bool HasFSQRT;
+ bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
+ bool HasRecipPrec;
+ bool HasSTFIWX;
+ bool HasLFIWAX;
+ bool HasFPRND;
+ bool HasFPCVT;
+ bool HasISEL;
+ bool HasPOPCNTD;
+ bool HasBPERMD;
+ bool HasExtDiv;
+ bool HasCMPB;
+ bool HasLDBRX;
+ bool IsBookE;
+ bool HasOnlyMSYNC;
+ bool IsE500;
+ bool IsPPC4xx;
+ bool IsPPC6xx;
+ bool FeatureMFTB;
+ bool DeprecatedDST;
+ bool HasLazyResolverStubs;
+ bool IsLittleEndian;
+ bool HasICBT;
+ bool HasInvariantFunctionDescriptors;
+ bool HasPartwordAtomics;
+ bool HasDirectMove;
+ bool HasHTM;
+ bool HasFusion;
+ bool HasFloat128;
+
+ /// When targeting QPX running a stock PPC64 Linux kernel where the stack
+ /// alignment has not been changed, we need to keep the 16-byte alignment
+ /// of the stack.
+ bool IsQPXStackUnaligned;
+
+ const PPCTargetMachine &TM;
+ PPCFrameLowering FrameLowering;
+ PPCInstrInfo InstrInfo;
+ PPCTargetLowering TLInfo;
+ TargetSelectionDAGInfo TSInfo;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ PPCSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+ const PPCTargetMachine &TM);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getDarwinDirective - Returns the -m directive specified for the cpu.
+ ///
+ unsigned getDarwinDirective() const { return DarwinDirective; }
+
+ /// getInstrItins - Return the instruction itineraries based on subtarget
+ /// selection.
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
+ const PPCFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const PPCTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const PPCRegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
+ }
+ const PPCTargetMachine &getTargetMachine() const { return TM; }
+
+ /// initializeSubtargetDependencies - Initializes using a CPU and feature string
+ /// so that we can use initializer lists for subtarget initialization.
+ PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
+
+private:
+ void initializeEnvironment();
+ void initSubtargetFeatures(StringRef CPU, StringRef FS);
+
+public:
+ /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
+ ///
+ bool isPPC64() const;
+
+ /// has64BitSupport - Return true if the selected CPU supports 64-bit
+ /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
+ bool has64BitSupport() const { return Has64BitSupport; }
+ // useSoftFloat - Return true if soft-float option is turned on.
+ bool useSoftFloat() const { return UseSoftFloat; }
+
+ /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
+ /// registers in 32-bit mode when possible. This can only true if
+ /// has64BitSupport() returns true.
+ bool use64BitRegs() const { return Use64BitRegs; }
+
+ /// useCRBits - Return true if we should store and manipulate i1 values in
+ /// the individual condition register bits.
+ bool useCRBits() const { return UseCRBits; }
+
+ /// hasLazyResolverStub - Return true if accesses to the specified global have
+ /// to go through a dyld lazy resolution stub. This means that an extra load
+ /// is required to get the address of the global.
+ bool hasLazyResolverStub(const GlobalValue *GV) const;
+
+ // isLittleEndian - True if generating little-endian code
+ bool isLittleEndian() const { return IsLittleEndian; }
+
+ // Specific obvious features.
+ bool hasFCPSGN() const { return HasFCPSGN; }
+ bool hasFSQRT() const { return HasFSQRT; }
+ bool hasFRE() const { return HasFRE; }
+ bool hasFRES() const { return HasFRES; }
+ bool hasFRSQRTE() const { return HasFRSQRTE; }
+ bool hasFRSQRTES() const { return HasFRSQRTES; }
+ bool hasRecipPrec() const { return HasRecipPrec; }
+ bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasLFIWAX() const { return HasLFIWAX; }
+ bool hasFPRND() const { return HasFPRND; }
+ bool hasFPCVT() const { return HasFPCVT; }
+ bool hasAltivec() const { return HasAltivec; }
+ bool hasSPE() const { return HasSPE; }
+ bool hasQPX() const { return HasQPX; }
+ bool hasVSX() const { return HasVSX; }
+ bool hasP8Vector() const { return HasP8Vector; }
+ bool hasP8Altivec() const { return HasP8Altivec; }
+ bool hasP8Crypto() const { return HasP8Crypto; }
+ bool hasMFOCRF() const { return HasMFOCRF; }
+ bool hasISEL() const { return HasISEL; }
+ bool hasPOPCNTD() const { return HasPOPCNTD; }
+ bool hasBPERMD() const { return HasBPERMD; }
+ bool hasExtDiv() const { return HasExtDiv; }
+ bool hasCMPB() const { return HasCMPB; }
+ bool hasLDBRX() const { return HasLDBRX; }
+ bool isBookE() const { return IsBookE; }
+ bool hasOnlyMSYNC() const { return HasOnlyMSYNC; }
+ bool isPPC4xx() const { return IsPPC4xx; }
+ bool isPPC6xx() const { return IsPPC6xx; }
+ bool isE500() const { return IsE500; }
+ bool isFeatureMFTB() const { return FeatureMFTB; }
+ bool isDeprecatedDST() const { return DeprecatedDST; }
+ bool hasICBT() const { return HasICBT; }
+ bool hasInvariantFunctionDescriptors() const {
+ return HasInvariantFunctionDescriptors;
+ }
+ bool hasPartwordAtomics() const { return HasPartwordAtomics; }
+ bool hasDirectMove() const { return HasDirectMove; }
+
+ bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
+ unsigned getPlatformStackAlignment() const {
+ if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
+ return 32;
+
+ return 16;
+ }
+ bool hasHTM() const { return HasHTM; }
+ bool hasFusion() const { return HasFusion; }
+ bool hasFloat128() const { return HasFloat128; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ /// isDarwin - True if this is any darwin platform.
+ bool isDarwin() const { return TargetTriple.isMacOSX(); }
+ /// isBGQ - True if this is a BG/Q platform.
+ bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
+
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+ bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+
+ bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
+ bool isSVR4ABI() const { return !isDarwinABI(); }
+ bool isELFv2ABI() const;
+
+ bool enableEarlyIfConversion() const override { return hasISEL(); }
+
+ // Scheduling customization.
+ bool enableMachineScheduler() const override;
+ // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+ bool enablePostRAScheduler() const override;
+ AntiDepBreakMode getAntiDepBreakMode() const override;
+ void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
+
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const override;
+ bool useAA() const override;
+
+ bool enableSubRegLiveness() const override;
+
+ /// classifyGlobalReference - Classify a global variable reference for the
+ /// current subtarget accourding to how we should reference it.
+ unsigned char classifyGlobalReference(const GlobalValue *GV) const;
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
new file mode 100644
index 0000000..2dc0d82
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -0,0 +1,170 @@
+//===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into
+// separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of
+// which define GPR3. A copy is added from GPR3 to the target virtual
+// register of the original instruction. The GETtlsADDR[32] is really
+// a call instruction, so its target register is constrained to be GPR3.
+// This is not true of ADDItls[gd]L[32], but there is a legacy linker
+// optimization bug that requires the target register of the addi of
+// a local- or general-dynamic TLS access sequence to be GPR3.
+//
+// This is done in a late pass so that TLS variable accesses can be
+// fully commoned by MachineCSE.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-tls-dynamic-call"
+
+namespace llvm {
+ void initializePPCTLSDynamicCallPass(PassRegistry&);
+}
+
+namespace {
+ struct PPCTLSDynamicCall : public MachineFunctionPass {
+ static char ID;
+ PPCTLSDynamicCall() : MachineFunctionPass(ID) {
+ initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCInstrInfo *TII;
+ LiveIntervals *LIS;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE;) {
+ MachineInstr *MI = I;
+
+ if (MI->getOpcode() != PPC::ADDItlsgdLADDR &&
+ MI->getOpcode() != PPC::ADDItlsldLADDR &&
+ MI->getOpcode() != PPC::ADDItlsgdLADDR32 &&
+ MI->getOpcode() != PPC::ADDItlsldLADDR32) {
+ ++I;
+ continue;
+ }
+
+ DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << *MI;);
+
+ unsigned OutReg = MI->getOperand(0).getReg();
+ unsigned InReg = MI->getOperand(1).getReg();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
+ unsigned Opc1, Opc2;
+ SmallVector<unsigned, 4> OrigRegs;
+ OrigRegs.push_back(OutReg);
+ OrigRegs.push_back(InReg);
+ OrigRegs.push_back(GPR3);
+
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Opcode inconsistency error");
+ case PPC::ADDItlsgdLADDR:
+ Opc1 = PPC::ADDItlsgdL;
+ Opc2 = PPC::GETtlsADDR;
+ break;
+ case PPC::ADDItlsldLADDR:
+ Opc1 = PPC::ADDItlsldL;
+ Opc2 = PPC::GETtlsldADDR;
+ break;
+ case PPC::ADDItlsgdLADDR32:
+ Opc1 = PPC::ADDItlsgdL32;
+ Opc2 = PPC::GETtlsADDR32;
+ break;
+ case PPC::ADDItlsldLADDR32:
+ Opc1 = PPC::ADDItlsldL32;
+ Opc2 = PPC::GETtlsldADDR32;
+ break;
+ }
+
+ // Expand into two ops built prior to the existing instruction.
+ MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
+ .addReg(InReg);
+ Addi->addOperand(MI->getOperand(2));
+
+ // The ADDItls* instruction is the first instruction in the
+ // repair range.
+ MachineBasicBlock::iterator First = I;
+ --First;
+
+ MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
+ .addReg(GPR3));
+ Call->addOperand(MI->getOperand(3));
+
+ BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
+ .addReg(GPR3);
+
+ // The COPY is the last instruction in the repair range.
+ MachineBasicBlock::iterator Last = I;
+ --Last;
+
+ // Move past the original instruction and remove it.
+ ++I;
+ MI->removeFromParent();
+
+ // Repair the live intervals.
+ LIS->repairIntervalsInRange(&MBB, First, Last, OrigRegs);
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
+ "PowerPC TLS Dynamic Call Fixup", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE,
+ "PowerPC TLS Dynamic Call Fixup", false, false)
+
+char PPCTLSDynamicCall::ID = 0;
+FunctionPass*
+llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
new file mode 100644
index 0000000..bf165c9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -0,0 +1,156 @@
+//===-- PPCTOCRegDeps.cpp - Add Extra TOC Register Dependencies -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// When resolving an address using the ELF ABI TOC pointer, two relocations are
+// generally required: one for the high part and one for the low part. Only
+// the high part generally explicitly depends on r2 (the TOC pointer). And, so,
+// we might produce code like this:
+//
+// .Ltmp526:
+// addis 3, 2, .LC12@toc@ha
+// .Ltmp1628:
+// std 2, 40(1)
+// ld 5, 0(27)
+// ld 2, 8(27)
+// ld 11, 16(27)
+// ld 3, .LC12@toc@l(3)
+// rldicl 4, 4, 0, 32
+// mtctr 5
+// bctrl
+// ld 2, 40(1)
+//
+// And there is nothing wrong with this code, as such, but there is a linker bug
+// in binutils (https://sourceware.org/bugzilla/show_bug.cgi?id=18414) that will
+// misoptimize this code sequence to this:
+// nop
+// std r2,40(r1)
+// ld r5,0(r27)
+// ld r2,8(r27)
+// ld r11,16(r27)
+// ld r3,-32472(r2)
+// clrldi r4,r4,32
+// mtctr r5
+// bctrl
+// ld r2,40(r1)
+// because the linker does not know (and does not check) that the value in r2
+// changed in between the instruction using the .LC12@toc@ha (TOC-relative)
+// relocation and the instruction using the .LC12@toc@l(3) relocation.
+// Because it finds these instructions using the relocations (and not by
+// scanning the instructions), it has been asserted that there is no good way
+// to detect the change of r2 in between. As a result, this bug may never be
+// fixed (i.e. it may become part of the definition of the ABI). GCC was
+// updated to add extra dependencies on r2 to instructions using the @toc@l
+// relocations to avoid this problem, and we'll do the same here.
+//
+// This is done as a separate pass because:
+// 1. These extra r2 dependencies are not really properties of the
+// instructions, but rather due to a linker bug, and maybe one day we'll be
+// able to get rid of them when targeting linkers without this bug (and,
+// thus, keeping the logic centralized here will make that
+// straightforward).
+// 2. There are ISel-level peephole optimizations that propagate the @toc@l
+// relocations to some user instructions, and so the exta dependencies do
+// not apply only to a fixed set of instructions (without undesirable
+// definition replication).
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-toc-reg-deps"
+
+namespace llvm {
+ void initializePPCTOCRegDepsPass(PassRegistry&);
+}
+
+namespace {
+ // PPCTOCRegDeps pass - For simple functions without epilogue code, move
+ // returns up, and create conditional returns, to avoid unnecessary
+ // branch-to-blr sequences.
+ struct PPCTOCRegDeps : public MachineFunctionPass {
+ static char ID;
+ PPCTOCRegDeps() : MachineFunctionPass(ID) {
+ initializePPCTOCRegDepsPass(*PassRegistry::getPassRegistry());
+ }
+
+protected:
+ bool hasTOCLoReloc(const MachineInstr &MI) {
+ if (MI.getOpcode() == PPC::LDtocL ||
+ MI.getOpcode() == PPC::ADDItocL)
+ return true;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if ((MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) == PPCII::MO_TOC_LO)
+ return true;
+ }
+
+ return false;
+ }
+
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ for (auto &MI : MBB) {
+ if (!hasTOCLoReloc(MI))
+ continue;
+
+ MI.addOperand(MachineOperand::CreateReg(PPC::X2,
+ false /*IsDef*/,
+ true /*IsImp*/));
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCTOCRegDeps, DEBUG_TYPE,
+ "PowerPC TOC Register Dependencies", false, false)
+
+char PPCTOCRegDeps::ID = 0;
+FunctionPass*
+llvm::createPPCTOCRegDepsPass() { return new PPCTOCRegDeps(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
new file mode 100644
index 0000000..d24b590
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -0,0 +1,407 @@
+//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetMachine.h"
+#include "PPC.h"
+#include "PPCTargetObjectFile.h"
+#include "PPCTargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+static cl::
+opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
+ cl::desc("Disable CTR loops for PPC"));
+
+static cl::
+opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
+ cl::desc("Disable PPC loop preinc prep"));
+
+static cl::opt<bool>
+VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
+ cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
+
+static cl::
+opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
+ cl::desc("Disable VSX Swap Removal for PPC"));
+
+static cl::
+opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
+ cl::desc("Disable machine peepholes for PPC"));
+
+static cl::opt<bool>
+EnableGEPOpt("ppc-gep-opt", cl::Hidden,
+ cl::desc("Enable optimizations on complex GEPs"),
+ cl::init(true));
+
+static cl::opt<bool>
+EnablePrefetch("enable-ppc-prefetching",
+ cl::desc("disable software prefetching on PPC"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
+ cl::desc("Add extra TOC register dependencies"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+EnableMachineCombinerPass("ppc-machine-combiner",
+ cl::desc("Enable the machine combiner pass"),
+ cl::init(true), cl::Hidden);
+
+extern "C" void LLVMInitializePowerPCTarget() {
+ // Register the targets
+ RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
+ RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
+ RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
+
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializePPCBoolRetToIntPass(PR);
+}
+
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const Triple &T) {
+ bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
+ std::string Ret;
+
+ // Most PPC* platforms are big endian, PPC64LE is little endian.
+ if (T.getArch() == Triple::ppc64le)
+ Ret = "e";
+ else
+ Ret = "E";
+
+ Ret += DataLayout::getManglingComponent(T);
+
+ // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
+ // pointers.
+ if (!is64Bit || T.getOS() == Triple::Lv2)
+ Ret += "-p:32:32";
+
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ if (is64Bit || !T.isOSDarwin())
+ Ret += "-i64:64";
+ else
+ Ret += "-f64:32:64";
+
+ // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
+ if (is64Bit)
+ Ret += "-n32:64";
+ else
+ Ret += "-n32";
+
+ return Ret;
+}
+
+static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
+ const Triple &TT) {
+ std::string FullFS = FS;
+
+ // Make sure 64-bit features are available when CPUname is generic
+ if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
+ if (!FullFS.empty())
+ FullFS = "+64bit," + FullFS;
+ else
+ FullFS = "+64bit";
+ }
+
+ if (OL >= CodeGenOpt::Default) {
+ if (!FullFS.empty())
+ FullFS = "+crbits," + FullFS;
+ else
+ FullFS = "+crbits";
+ }
+
+ if (OL != CodeGenOpt::None) {
+ if (!FullFS.empty())
+ FullFS = "+invariant-function-descriptors," + FullFS;
+ else
+ FullFS = "+invariant-function-descriptors";
+ }
+
+ return FullFS;
+}
+
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ // If it isn't a Mach-O file then it's going to be a linux ELF
+ // object file.
+ if (TT.isOSDarwin())
+ return make_unique<TargetLoweringObjectFileMachO>();
+
+ return make_unique<PPC64LinuxTargetObjectFile>();
+}
+
+static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
+ const TargetOptions &Options) {
+ if (Options.MCOptions.getABIName().startswith("elfv1"))
+ return PPCTargetMachine::PPC_ABI_ELFv1;
+ else if (Options.MCOptions.getABIName().startswith("elfv2"))
+ return PPCTargetMachine::PPC_ABI_ELFv2;
+
+ assert(Options.MCOptions.getABIName().empty() &&
+ "Unknown target-abi option!");
+
+ if (!TT.isMacOSX()) {
+ switch (TT.getArch()) {
+ case Triple::ppc64le:
+ return PPCTargetMachine::PPC_ABI_ELFv2;
+ case Triple::ppc64:
+ return PPCTargetMachine::PPC_ABI_ELFv1;
+ default:
+ // Fallthrough.
+ ;
+ }
+ }
+ return PPCTargetMachine::PPC_ABI_UNKNOWN;
+}
+
+// The FeatureString here is a little subtle. We are modifying the feature
+// string with what are (currently) non-function specific overrides as it goes
+// into the LLVMTargetMachine constructor and then using the stored value in the
+// Subtarget constructor below it.
+PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
+ computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
+ TLOF(createTLOF(getTargetTriple())),
+ TargetABI(computeTargetABI(TT, Options)),
+ Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
+
+ // For the estimates, convergence is quadratic, so we essentially double the
+ // number of digits correct after every iteration. For both FRE and FRSQRTE,
+ // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+ // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+ unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
+ RefinementSteps64 = RefinementSteps + 1;
+
+ this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
+ this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
+
+ this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
+ this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
+
+ initAsmInfo();
+}
+
+PPCTargetMachine::~PPCTargetMachine() {}
+
+void PPC32TargetMachine::anchor() { }
+
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+
+void PPC64TargetMachine::anchor() { }
+
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+
+const PPCSubtarget *
+PPCTargetMachine::getSubtargetImpl(const Function &F) const {
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function before we can generate a subtarget. We also need to use
+ // it as a key for the subtarget since that can be the only difference
+ // between two functions.
+ bool SoftFloat =
+ F.hasFnAttribute("use-soft-float") &&
+ F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+ // If the soft float attribute is set on the function turn on the soft float
+ // subtarget feature.
+ if (SoftFloat)
+ FS += FS.empty() ? "+soft-float" : ",+soft-float";
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<PPCSubtarget>(
+ TargetTriple, CPU,
+ // FIXME: It would be good to have the subtarget additions here
+ // not necessary. Anything that turns them on/off (overrides) ends
+ // up being put at the end of the feature string, but the defaults
+ // shouldn't require adding them. Fixing this means pulling Feature64Bit
+ // out of most of the target cpus in the .td file and making it set only
+ // as part of initialization via the TargetTriple.
+ computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
+ }
+ return I.get();
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// PPC Code Generator Pass Configuration Options.
+class PPCPassConfig : public TargetPassConfig {
+public:
+ PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ PPCTargetMachine &getPPCTargetMachine() const {
+ return getTM<PPCTargetMachine>();
+ }
+
+ void addIRPasses() override;
+ bool addPreISel() override;
+ bool addILPOpts() override;
+ bool addInstSelector() override;
+ void addMachineSSAOptimization() override;
+ void addPreRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
+};
+} // namespace
+
+TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new PPCPassConfig(this, PM);
+}
+
+void PPCPassConfig::addIRPasses() {
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCBoolRetToIntPass());
+ addPass(createAtomicExpandPass(&getPPCTargetMachine()));
+
+ // For the BG/Q (or if explicitly requested), add explicit data prefetch
+ // intrinsics.
+ bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
+ getOptLevel() != CodeGenOpt::None;
+ if (EnablePrefetch.getNumOccurrences() > 0)
+ UsePrefetching = EnablePrefetch;
+ if (UsePrefetching)
+ addPass(createPPCLoopDataPrefetchPass());
+
+ if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
+ // Call SeparateConstOffsetFromGEP pass to extract constants within indices
+ // and lower a GEP with multiple indices to either arithmetic operations or
+ // multiple GEPs with single index.
+ addPass(createSeparateConstOffsetFromGEPPass(TM, true));
+ // Call EarlyCSE pass to find and remove subexpressions in the lowered
+ // result.
+ addPass(createEarlyCSEPass());
+ // Do loop invariant code motion in case part of the lowered result is
+ // invariant.
+ addPass(createLICMPass());
+ }
+
+ TargetPassConfig::addIRPasses();
+}
+
+bool PPCPassConfig::addPreISel() {
+ if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
+
+ if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCCTRLoops(getPPCTargetMachine()));
+
+ return false;
+}
+
+bool PPCPassConfig::addILPOpts() {
+ addPass(&EarlyIfConverterID);
+
+ if (EnableMachineCombinerPass)
+ addPass(&MachineCombinerID);
+
+ return true;
+}
+
+bool PPCPassConfig::addInstSelector() {
+ // Install an instruction selector.
+ addPass(createPPCISelDag(getPPCTargetMachine()));
+
+#ifndef NDEBUG
+ if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCCTRLoopsVerify());
+#endif
+
+ addPass(createPPCVSXCopyPass());
+ return false;
+}
+
+void PPCPassConfig::addMachineSSAOptimization() {
+ TargetPassConfig::addMachineSSAOptimization();
+ // For little endian, remove where possible the vector swap instructions
+ // introduced at code generation to normalize vector element order.
+ if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
+ !DisableVSXSwapRemoval)
+ addPass(createPPCVSXSwapRemovalPass());
+ // Target-specific peephole cleanups performed after instruction
+ // selection.
+ if (!DisableMIPeephole) {
+ addPass(createPPCMIPeepholePass());
+ addPass(&DeadMachineInstructionElimID);
+ }
+}
+
+void PPCPassConfig::addPreRegAlloc() {
+ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
+ insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
+ &PPCVSXFMAMutateID);
+ if (getPPCTargetMachine().getRelocationModel() == Reloc::PIC_)
+ addPass(createPPCTLSDynamicCallPass());
+ if (EnableExtraTOCRegDeps)
+ addPass(createPPCTOCRegDepsPass());
+}
+
+void PPCPassConfig::addPreSched2() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&IfConverterID);
+}
+
+void PPCPassConfig::addPreEmitPass() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCEarlyReturnPass(), false);
+ // Must run branch selection immediately preceding the asm printer.
+ addPass(createPPCBranchSelectionPass(), false);
+}
+
+TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(PPCTTIImpl(this, F));
+ });
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
new file mode 100644
index 0000000..6496339
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -0,0 +1,84 @@
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H
+
+#include "PPCInstrInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
+///
+class PPCTargetMachine : public LLVMTargetMachine {
+public:
+ enum PPCABI { PPC_ABI_UNKNOWN, PPC_ABI_ELFv1, PPC_ABI_ELFv2 };
+private:
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ PPCABI TargetABI;
+ PPCSubtarget Subtarget;
+
+ mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
+
+public:
+ PPCTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL);
+
+ ~PPCTargetMachine() override;
+
+ const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
+
+ // Pass Pipeline Configuration
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ TargetIRAnalysis getTargetIRAnalysis() override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+ bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; }
+ bool isPPC64() const {
+ const Triple &TT = getTargetTriple();
+ return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
+ };
+};
+
+/// PPC32TargetMachine - PowerPC 32-bit target machine.
+///
+class PPC32TargetMachine : public PPCTargetMachine {
+ virtual void anchor();
+public:
+ PPC32TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+/// PPC64TargetMachine - PowerPC 64-bit target machine.
+///
+class PPC64TargetMachine : public PPCTargetMachine {
+ virtual void anchor();
+public:
+ PPC64TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
new file mode 100644
index 0000000..798bb9d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -0,0 +1,61 @@
+//===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetObjectFile.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+
+using namespace llvm;
+
+void
+PPC64LinuxTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
+
+MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI
+ // when we have a constant that contains global relocations. This is
+ // necessary because of this ABI's handling of pointers to functions in
+ // a shared library. The address of a function is actually the address
+ // of a function descriptor, which resides in the .opd section. Generated
+ // code uses the descriptor directly rather than going via the GOT as some
+ // other ABIs do, which means that initialized function pointers must
+ // reference the descriptor. The linker must convert copy relocs of
+ // pointers to functions in shared libraries into dynamic relocations,
+ // because of an ordering problem with initialization of copy relocs and
+ // PLT entries. The dynamic relocation will be initialized by the dynamic
+ // linker, so we must use DataRelROSection instead of ReadOnlySection.
+ // For more information, see the description of ELIMINATE_COPY_RELOCS in
+ // GNU ld.
+ if (Kind.isReadOnly()) {
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+ if (GVar && GVar->isConstant() && GVar->getInitializer()->needsRelocation())
+ Kind = SectionKind::getReadOnlyWithRel();
+ }
+
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind,
+ Mang, TM);
+}
+
+const MCExpr *PPC64LinuxTargetObjectFile::
+getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_PPC_DTPREL, getContext());
+ return MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(0x8000, getContext()),
+ getContext());
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
new file mode 100644
index 0000000..d248791
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -0,0 +1,35 @@
+//===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+ /// PPC64LinuxTargetObjectFile - This implementation is used for
+ /// 64-bit PowerPC Linux.
+ class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
+
+ /// \brief Describe a TLS variable address within debug info.
+ const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
new file mode 100644
index 0000000..dbe7617
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -0,0 +1,27 @@
+//===-- PPCTargetStreamer.h - PPC Target Streamer --s-----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class PPCTargetStreamer : public MCTargetStreamer {
+public:
+ PPCTargetStreamer(MCStreamer &S);
+ ~PPCTargetStreamer() override;
+ virtual void emitTCEntry(const MCSymbol &S) = 0;
+ virtual void emitMachine(StringRef CPU) = 0;
+ virtual void emitAbiVersion(int AbiVersion) = 0;
+ virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
new file mode 100644
index 0000000..cd86dab
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -0,0 +1,413 @@
+//===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ppctti"
+
+static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
+cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
+
+//===----------------------------------------------------------------------===//
+//
+// PPC cost model.
+//
+//===----------------------------------------------------------------------===//
+
+TargetTransformInfo::PopcntSupportKind
+PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ if (ST->hasPOPCNTD() && TyWidth <= 64)
+ return TTI::PSK_FastHardware;
+ return TTI::PSK_Software;
+}
+
+int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+ if (DisablePPCConstHoist)
+ return BaseT::getIntImmCost(Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ if (Imm == 0)
+ return TTI::TCC_Free;
+
+ if (Imm.getBitWidth() <= 64) {
+ if (isInt<16>(Imm.getSExtValue()))
+ return TTI::TCC_Basic;
+
+ if (isInt<32>(Imm.getSExtValue())) {
+ // A constant that can be materialized using lis.
+ if ((Imm.getZExtValue() & 0xFFFF) == 0)
+ return TTI::TCC_Basic;
+
+ return 2 * TTI::TCC_Basic;
+ }
+ }
+
+ return 4 * TTI::TCC_Basic;
+}
+
+int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
+ if (DisablePPCConstHoist)
+ return BaseT::getIntImmCost(IID, Idx, Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ switch (IID) {
+ default:
+ return TTI::TCC_Free;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ break;
+ case Intrinsic::experimental_stackmap:
+ if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TTI::TCC_Free;
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TTI::TCC_Free;
+ break;
+ }
+ return PPCTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
+ if (DisablePPCConstHoist)
+ return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ unsigned ImmIdx = ~0U;
+ bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
+ ZeroFree = false;
+ switch (Opcode) {
+ default:
+ return TTI::TCC_Free;
+ case Instruction::GetElementPtr:
+ // Always hoist the base address of a GetElementPtr. This prevents the
+ // creation of new constants for every base constant that gets constant
+ // folded with the offset.
+ if (Idx == 0)
+ return 2 * TTI::TCC_Basic;
+ return TTI::TCC_Free;
+ case Instruction::And:
+ RunFree = true; // (for the rotate-and-mask instructions)
+ // Fallthrough...
+ case Instruction::Add:
+ case Instruction::Or:
+ case Instruction::Xor:
+ ShiftedFree = true;
+ // Fallthrough...
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ ImmIdx = 1;
+ break;
+ case Instruction::ICmp:
+ UnsignedFree = true;
+ ImmIdx = 1;
+ // Fallthrough... (zero comparisons can use record-form instructions)
+ case Instruction::Select:
+ ZeroFree = true;
+ break;
+ case Instruction::PHI:
+ case Instruction::Call:
+ case Instruction::Ret:
+ case Instruction::Load:
+ case Instruction::Store:
+ break;
+ }
+
+ if (ZeroFree && Imm == 0)
+ return TTI::TCC_Free;
+
+ if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
+ if (isInt<16>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+
+ if (RunFree) {
+ if (Imm.getBitWidth() <= 32 &&
+ (isShiftedMask_32(Imm.getZExtValue()) ||
+ isShiftedMask_32(~Imm.getZExtValue())))
+ return TTI::TCC_Free;
+
+ if (ST->isPPC64() &&
+ (isShiftedMask_64(Imm.getZExtValue()) ||
+ isShiftedMask_64(~Imm.getZExtValue())))
+ return TTI::TCC_Free;
+ }
+
+ if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+
+ if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
+ return TTI::TCC_Free;
+ }
+
+ return PPCTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+void PPCTTIImpl::getUnrollingPreferences(Loop *L,
+ TTI::UnrollingPreferences &UP) {
+ if (ST->getDarwinDirective() == PPC::DIR_A2) {
+ // The A2 is in-order with a deep pipeline, and concatenation unrolling
+ // helps expose latency-hiding opportunities to the instruction scheduler.
+ UP.Partial = UP.Runtime = true;
+
+ // We unroll a lot on the A2 (hundreds of instructions), and the benefits
+ // often outweigh the cost of a division to compute the trip count.
+ UP.AllowExpensiveTripCount = true;
+ }
+
+ BaseT::getUnrollingPreferences(L, UP);
+}
+
+bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
+ // On the A2, always unroll aggressively. For QPX unaligned loads, we depend
+ // on combining the loads generated for consecutive accesses, and failure to
+ // do so is particularly expensive. This makes it much more likely (compared
+ // to only using concatenation unrolling).
+ if (ST->getDarwinDirective() == PPC::DIR_A2)
+ return true;
+
+ return LoopHasReductions;
+}
+
+bool PPCTTIImpl::enableInterleavedAccessVectorization() {
+ return true;
+}
+
+unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
+ if (Vector && !ST->hasAltivec() && !ST->hasQPX())
+ return 0;
+ return ST->hasVSX() ? 64 : 32;
+}
+
+unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
+ if (Vector) {
+ if (ST->hasQPX()) return 256;
+ if (ST->hasAltivec()) return 128;
+ return 0;
+ }
+
+ if (ST->isPPC64())
+ return 64;
+ return 32;
+
+}
+
+unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+ unsigned Directive = ST->getDarwinDirective();
+ // The 440 has no SIMD support, but floating-point instructions
+ // have a 5-cycle latency, so unroll by 5x for latency hiding.
+ if (Directive == PPC::DIR_440)
+ return 5;
+
+ // The A2 has no SIMD support, but floating-point instructions
+ // have a 6-cycle latency, so unroll by 6x for latency hiding.
+ if (Directive == PPC::DIR_A2)
+ return 6;
+
+ // FIXME: For lack of any better information, do no harm...
+ if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
+ return 1;
+
+ // For P7 and P8, floating-point instructions have a 6-cycle latency and
+ // there are two execution units, so unroll by 12x for latency hiding.
+ if (Directive == PPC::DIR_PWR7 ||
+ Directive == PPC::DIR_PWR8)
+ return 12;
+
+ // For most things, modern systems have two execution units (and
+ // out-of-order execution).
+ return 2;
+}
+
+int PPCTTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+ TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo) {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ // Fallback to the default implementation.
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
+}
+
+int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // (at least in the sense that there need only be one non-loop-invariant
+ // instruction). We need one such shuffle instruction for each actual
+ // register (this is not true for arbitrary shuffles, but is true for the
+ // structured types of shuffles covered by TTI::ShuffleKind).
+ return LT.first;
+}
+
+int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
+}
+
+int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
+ // Double-precision scalars are already located in index #0.
+ if (Index == 0)
+ return 0;
+
+ return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
+ // Floating point scalars are already located in index #0.
+ if (Index == 0)
+ return 0;
+
+ return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ }
+
+ // Estimated cost of a load-hit-store delay. This was obtained
+ // experimentally as a minimum needed to prevent unprofitable
+ // vectorization for the paq8p benchmark. It may need to be
+ // raised further if other unprofitable cases remain.
+ unsigned LHSPenalty = 2;
+ if (ISD == ISD::INSERT_VECTOR_ELT)
+ LHSPenalty += 7;
+
+ // Vector element insert/extract with Altivec is very expensive,
+ // because they require store and reload with the attendant
+ // processor stall for load-hit-store. Until VSX is available,
+ // these need to be estimated as very costly.
+ if (ISD == ISD::EXTRACT_VECTOR_ELT ||
+ ISD == ISD::INSERT_VECTOR_ELT)
+ return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index);
+
+ return BaseT::getVectorInstrCost(Opcode, Val, Index);
+}
+
+int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) {
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+
+ // Aligned loads and stores are easy.
+ unsigned SrcBytes = LT.second.getStoreSize();
+ if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
+ return Cost;
+
+ bool IsAltivecType = ST->hasAltivec() &&
+ (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
+ LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
+ bool IsVSXType = ST->hasVSX() &&
+ (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
+ bool IsQPXType = ST->hasQPX() &&
+ (LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
+
+ // If we can use the permutation-based load sequence, then this is also
+ // relatively cheap (not counting loop-invariant instructions): one load plus
+ // one permute (the last load in a series has extra cost, but we're
+ // neglecting that here). Note that on the P7, we should do unaligned loads
+ // for Altivec types using the VSX instructions, but that's more expensive
+ // than using the permutation-based load sequence. On the P8, that's no
+ // longer true.
+ if (Opcode == Instruction::Load &&
+ ((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
+ Alignment >= LT.second.getScalarType().getStoreSize())
+ return Cost + LT.first; // Add the cost of the permutations.
+
+ // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
+ // P7, unaligned vector loads are more expensive than the permutation-based
+ // load sequence, so that might be used instead, but regardless, the net cost
+ // is about the same (not counting loop-invariant instructions).
+ if (IsVSXType || (ST->hasVSX() && IsAltivecType))
+ return Cost;
+
+ // PPC in general does not support unaligned loads and stores. They'll need
+ // to be decomposed based on the alignment factor.
+
+ // Add the cost of each scalar load or store.
+ Cost += LT.first*(SrcBytes/Alignment-1);
+
+ // For a vector type, there is also scalarization overhead (only for
+ // stores, loads are expanded using the vector-load + permutation sequence,
+ // which is much less expensive).
+ if (Src->isVectorTy() && Opcode == Instruction::Store)
+ for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
+ Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
+
+ return Cost;
+}
+
+int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ assert(isa<VectorType>(VecTy) &&
+ "Expect a vector type for interleaved memory op");
+
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
+
+ // Firstly, the cost of load/store operation.
+ int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // (at least in the sense that there need only be one non-loop-invariant
+ // instruction). For each result vector, we need one shuffle per incoming
+ // vector (except that the first shuffle can take two incoming vectors
+ // because it does not need to take itself).
+ Cost += Factor*(LT.first-1);
+
+ return Cost;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
new file mode 100644
index 0000000..04c1b02
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -0,0 +1,97 @@
+//===-- PPCTargetTransformInfo.h - PPC specific TTI -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// PPC target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
+
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
+ typedef BasicTTIImplBase<PPCTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const PPCSubtarget *ST;
+ const PPCTargetLowering *TLI;
+
+ const PPCSubtarget *getST() const { return ST; }
+ const PPCTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ PPCTTIImpl(const PPCTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ PPCTTIImpl(PPCTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ using BaseT::getIntImmCost;
+ int getIntImmCost(const APInt &Imm, Type *Ty);
+
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ bool enableAggressiveInterleaving(bool LoopHasReductions);
+ bool enableInterleavedAccessVectorization();
+ unsigned getNumberOfRegisters(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector);
+ unsigned getMaxInterleaveFactor(unsigned VF);
+ int getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace);
+
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
new file mode 100644
index 0000000..782583c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -0,0 +1,188 @@
+//===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass which deals with the complexity of generating legal VSX register
+// copies to/from register classes which partially overlap with the VSX
+// register file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCHazardRecognizers.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-vsx-copy"
+
+namespace llvm {
+ void initializePPCVSXCopyPass(PassRegistry&);
+}
+
+namespace {
+ // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXCopy : public MachineFunctionPass {
+ static char ID;
+ PPCVSXCopy() : MachineFunctionPass(ID) {
+ initializePPCVSXCopyPass(*PassRegistry::getPassRegistry());
+ }
+
+ const TargetInstrInfo *TII;
+
+ bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
+ MachineRegisterInfo &MRI) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ return RC->hasSubClassEq(MRI.getRegClass(Reg));
+ } else if (RC->contains(Reg)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
+ }
+
+ bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
+ }
+
+ bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
+ }
+
+ bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI);
+ }
+
+ bool IsVSSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSSRCRegClass, MRI);
+ }
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+ if (!MI->isFullCopy())
+ continue;
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if ( IsVSReg(DstMO.getReg(), MRI) &&
+ !IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *to* a VSX register from a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *SrcRC =
+ IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(SrcMO.getReg(), MRI) ||
+ IsVRReg(SrcMO.getReg(), MRI) ||
+ IsVSSReg(SrcMO.getReg(), MRI) ||
+ IsVSFReg(SrcMO.getReg(), MRI)) &&
+ "Unknown source for a VSX copy");
+
+ unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
+ .addImm(1) // add 1, not 0, because there is no implicit clearing
+ // of the high bits.
+ .addOperand(SrcMO)
+ .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+
+ // The source of the original copy is now the new virtual register.
+ SrcMO.setReg(NewVReg);
+ } else if (!IsVSReg(DstMO.getReg(), MRI) &&
+ IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *from* a VSX register to a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *DstRC =
+ IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(DstMO.getReg(), MRI) ||
+ IsVSFReg(DstMO.getReg(), MRI) ||
+ IsVSSReg(DstMO.getReg(), MRI) ||
+ IsVRReg(DstMO.getReg(), MRI)) &&
+ "Unknown destination for a VSX copy");
+
+ // Copy the VSX value into a new VSX register of the correct subclass.
+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg)
+ .addOperand(SrcMO);
+
+ // Transform the original copy into a subregister extraction copy.
+ SrcMO.setReg(NewVReg);
+ SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+ }
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // If we don't have VSX on the subtarget, don't do anything.
+ const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
+ if (!STI.hasVSX())
+ return false;
+ TII = STI.getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+ "PowerPC VSX Copy Legalization", false, false)
+
+char PPCVSXCopy::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
new file mode 100644
index 0000000..6b19a2f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -0,0 +1,372 @@
+//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mutates the form of VSX FMA instructions to avoid unnecessary
+// copies.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation",
+cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden);
+
+#define DEBUG_TYPE "ppc-vsx-fma-mutate"
+
+namespace llvm { namespace PPC {
+ int getAltVSXFMAOpcode(uint16_t Opcode);
+} }
+
+namespace {
+ // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXFMAMutate : public MachineFunctionPass {
+ static char ID;
+ PPCVSXFMAMutate() : MachineFunctionPass(ID) {
+ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
+ }
+
+ LiveIntervals *LIS;
+ const PPCInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+
+ // The default (A-type) VSX FMA form kills the addend (it is taken from
+ // the target register, which is then updated to reflect the result of
+ // the FMA). If the instruction, however, kills one of the registers
+ // used for the product, then we can use the M-form instruction (which
+ // will take that value from the to-be-defined register).
+
+ int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
+ if (AltOpc == -1)
+ continue;
+
+ // This pass is run after register coalescing, and so we're looking for
+ // a situation like this:
+ // ...
+ // %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+ // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+ // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+ // ...
+ // %vreg9<def,tied1> = XSMADDADP %vreg9<tied0>, %vreg17, %vreg19,
+ // %RM<imp-use>; VSLRC:%vreg9,%vreg17,%vreg19
+ // ...
+ // Where we can eliminate the copy by changing from the A-type to the
+ // M-type instruction. Specifically, for this example, this means:
+ // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+ // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+ // is replaced by:
+ // %vreg16<def,tied1> = XSMADDMDP %vreg16<tied0>, %vreg18, %vreg9,
+ // %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9
+ // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+
+ SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
+
+ VNInfo *AddendValNo =
+ LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
+
+ // This can be null if the register is undef.
+ if (!AddendValNo)
+ continue;
+
+ MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
+
+ // The addend and this instruction must be in the same block.
+
+ if (!AddendMI || AddendMI->getParent() != MI->getParent())
+ continue;
+
+ // The addend must be a full copy within the same register class.
+
+ if (!AddendMI->isFullCopy())
+ continue;
+
+ unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
+ if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
+ MRI.getRegClass(AddendSrcReg))
+ continue;
+ } else {
+ // If AddendSrcReg is a physical register, make sure the destination
+ // register class contains it.
+ if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
+ ->contains(AddendSrcReg))
+ continue;
+ }
+
+ // In theory, there could be other uses of the addend copy before this
+ // fma. We could deal with this, but that would require additional
+ // logic below and I suspect it will not occur in any relevant
+ // situations. Additionally, check whether the copy source is killed
+ // prior to the fma. In order to replace the addend here with the
+ // source of the copy, it must still be live here. We can't use
+ // interval testing for a physical register, so as long as we're
+ // walking the MIs we may as well test liveness here.
+ //
+ // FIXME: There is a case that occurs in practice, like this:
+ // %vreg9<def> = COPY %F1; VSSRC:%vreg9
+ // ...
+ // %vreg6<def> = COPY %vreg9; VSSRC:%vreg6,%vreg9
+ // %vreg7<def> = COPY %vreg9; VSSRC:%vreg7,%vreg9
+ // %vreg9<def,tied1> = XSMADDASP %vreg9<tied0>, %vreg1, %vreg4; VSSRC:
+ // %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg1, %vreg2; VSSRC:
+ // %vreg7<def,tied1> = XSMADDASP %vreg7<tied0>, %vreg1, %vreg3; VSSRC:
+ // which prevents an otherwise-profitable transformation.
+ bool OtherUsers = false, KillsAddendSrc = false;
+ for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
+ J != JE; --J) {
+ if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
+ OtherUsers = true;
+ break;
+ }
+ if (J->modifiesRegister(AddendSrcReg, TRI) ||
+ J->killsRegister(AddendSrcReg, TRI)) {
+ KillsAddendSrc = true;
+ break;
+ }
+ }
+
+ if (OtherUsers || KillsAddendSrc)
+ continue;
+
+ // Find one of the product operands that is killed by this instruction.
+
+ unsigned KilledProdOp = 0, OtherProdOp = 0;
+ if (LIS->getInterval(MI->getOperand(2).getReg())
+ .Query(FMAIdx).isKill()) {
+ KilledProdOp = 2;
+ OtherProdOp = 3;
+ } else if (LIS->getInterval(MI->getOperand(3).getReg())
+ .Query(FMAIdx).isKill()) {
+ KilledProdOp = 3;
+ OtherProdOp = 2;
+ }
+
+ // If there are no killed product operands, then this transformation is
+ // likely not profitable.
+ if (!KilledProdOp)
+ continue;
+
+ // If the addend copy is used only by this MI, then the addend source
+ // register is likely not live here. This could be fixed (based on the
+ // legality checks above, the live range for the addend source register
+ // could be extended), but it seems likely that such a trivial copy can
+ // be coalesced away later, and thus is not worth the effort.
+ if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
+ !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
+ continue;
+
+ // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
+
+ unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
+ unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg();
+
+ unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
+ unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg();
+ unsigned OtherProdSubReg = MI->getOperand(OtherProdOp).getSubReg();
+
+ bool AddRegKill = AddendMI->getOperand(1).isKill();
+ bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill();
+ bool OtherProdRegKill = MI->getOperand(OtherProdOp).isKill();
+
+ bool AddRegUndef = AddendMI->getOperand(1).isUndef();
+ bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef();
+ bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef();
+
+ unsigned OldFMAReg = MI->getOperand(0).getReg();
+
+ // The transformation doesn't work well with things like:
+ // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5;
+ // so leave such things alone.
+ if (OldFMAReg == KilledProdReg)
+ continue;
+
+ // If there isn't a class that fits, we can't perform the transform.
+ // This is needed for correctness with a mixture of VSX and Altivec
+ // instructions to make sure that a low VSX register is not assigned to
+ // the Altivec instruction.
+ if (!MRI.constrainRegClass(KilledProdReg,
+ MRI.getRegClass(OldFMAReg)))
+ continue;
+
+ assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
+ "Addend copy not tied to old FMA output!");
+
+ DEBUG(dbgs() << "VSX FMA Mutation:\n " << *MI;);
+
+ MI->getOperand(0).setReg(KilledProdReg);
+ MI->getOperand(1).setReg(KilledProdReg);
+ MI->getOperand(3).setReg(AddendSrcReg);
+ MI->getOperand(2).setReg(OtherProdReg);
+
+ MI->getOperand(0).setSubReg(KilledProdSubReg);
+ MI->getOperand(1).setSubReg(KilledProdSubReg);
+ MI->getOperand(3).setSubReg(AddSubReg);
+ MI->getOperand(2).setSubReg(OtherProdSubReg);
+
+ MI->getOperand(1).setIsKill(KilledProdRegKill);
+ MI->getOperand(3).setIsKill(AddRegKill);
+ MI->getOperand(2).setIsKill(OtherProdRegKill);
+
+ MI->getOperand(1).setIsUndef(KilledProdRegUndef);
+ MI->getOperand(3).setIsUndef(AddRegUndef);
+ MI->getOperand(2).setIsUndef(OtherProdRegUndef);
+
+ MI->setDesc(TII->get(AltOpc));
+
+ DEBUG(dbgs() << " -> " << *MI);
+
+ // The killed product operand was killed here, so we can reuse it now
+ // for the result of the fma.
+
+ LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
+ VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
+ for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
+ UI != UE;) {
+ MachineOperand &UseMO = *UI;
+ MachineInstr *UseMI = UseMO.getParent();
+ ++UI;
+
+ // Don't replace the result register of the copy we're about to erase.
+ if (UseMI == AddendMI)
+ continue;
+
+ UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
+ }
+
+ // Extend the live intervals of the killed product operand to hold the
+ // fma result.
+
+ LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
+ for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
+ AI != AE; ++AI) {
+ // Don't add the segment that corresponds to the original copy.
+ if (AI->valno == AddendValNo)
+ continue;
+
+ VNInfo *NewFMAValNo =
+ NewFMAInt.getNextValue(AI->start,
+ LIS->getVNInfoAllocator());
+
+ NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
+ NewFMAValNo));
+ }
+ DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
+
+ // Extend the live interval of the addend source (it might end at the
+ // copy to be removed, or somewhere in between there and here). This
+ // is necessary only if it is a physical register.
+ if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg))
+ for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid();
+ ++Units) {
+ unsigned Unit = *Units;
+
+ LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
+ AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
+ FMAIdx.getRegSlot());
+ DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n');
+ }
+
+ FMAInt.removeValNo(FMAValNo);
+ DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
+
+ // Remove the (now unused) copy.
+
+ DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
+ LIS->RemoveMachineInstrFromMaps(AddendMI);
+ AddendMI->eraseFromParent();
+
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // If we don't have VSX then go ahead and return without doing
+ // anything.
+ const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
+ if (!STI.hasVSX())
+ return false;
+
+ LIS = &getAnalysis<LiveIntervals>();
+
+ TII = STI.getInstrInfo();
+
+ bool Changed = false;
+
+ if (DisableVSXFMAMutate)
+ return Changed;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
+ "PowerPC VSX FMA Mutation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
+ "PowerPC VSX FMA Mutation", false, false)
+
+char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
+
+char PPCVSXFMAMutate::ID = 0;
+FunctionPass *llvm::createPPCVSXFMAMutatePass() {
+ return new PPCVSXFMAMutate();
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
new file mode 100644
index 0000000..27c540f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -0,0 +1,1003 @@
+//===----------- PPCVSXSwapRemoval.cpp - Remove VSX LE Swaps -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass analyzes vector computations and removes unnecessary
+// doubleword swaps (xxswapd instructions). This pass is performed
+// only for little-endian VSX code generation.
+//
+// For this specific case, loads and stores of v4i32, v4f32, v2i64,
+// and v2f64 vectors are inefficient. These are implemented using
+// the lxvd2x and stxvd2x instructions, which invert the order of
+// doublewords in a vector register. Thus code generation inserts
+// an xxswapd after each such load, and prior to each such store.
+//
+// The extra xxswapd instructions reduce performance. The purpose
+// of this pass is to reduce the number of xxswapd instructions
+// required for correctness.
+//
+// The primary insight is that much code that operates on vectors
+// does not care about the relative order of elements in a register,
+// so long as the correct memory order is preserved. If we have a
+// computation where all input values are provided by lxvd2x/xxswapd,
+// all outputs are stored using xxswapd/lxvd2x, and all intermediate
+// computations are lane-insensitive (independent of element order),
+// then all the xxswapd instructions associated with the loads and
+// stores may be removed without changing observable semantics.
+//
+// This pass uses standard equivalence class infrastructure to create
+// maximal webs of computations fitting the above description. Each
+// such web is then optimized by removing its unnecessary xxswapd
+// instructions.
+//
+// There are some lane-sensitive operations for which we can still
+// permit the optimization, provided we modify those operations
+// accordingly. Such operations are identified as using "special
+// handling" within this module.
+//
+//===---------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-vsx-swaps"
+
+namespace llvm {
+ void initializePPCVSXSwapRemovalPass(PassRegistry&);
+}
+
+namespace {
+
+// A PPCVSXSwapEntry is created for each machine instruction that
+// is relevant to a vector computation.
+struct PPCVSXSwapEntry {
+ // Pointer to the instruction.
+ MachineInstr *VSEMI;
+
+ // Unique ID (position in the swap vector).
+ int VSEId;
+
+ // Attributes of this node.
+ unsigned int IsLoad : 1;
+ unsigned int IsStore : 1;
+ unsigned int IsSwap : 1;
+ unsigned int MentionsPhysVR : 1;
+ unsigned int IsSwappable : 1;
+ unsigned int MentionsPartialVR : 1;
+ unsigned int SpecialHandling : 3;
+ unsigned int WebRejected : 1;
+ unsigned int WillRemove : 1;
+};
+
+enum SHValues {
+ SH_NONE = 0,
+ SH_EXTRACT,
+ SH_INSERT,
+ SH_NOSWAP_LD,
+ SH_NOSWAP_ST,
+ SH_SPLAT,
+ SH_XXPERMDI,
+ SH_COPYWIDEN
+};
+
+struct PPCVSXSwapRemoval : public MachineFunctionPass {
+
+ static char ID;
+ const PPCInstrInfo *TII;
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+
+ // Swap entries are allocated in a vector for better performance.
+ std::vector<PPCVSXSwapEntry> SwapVector;
+
+ // A mapping is maintained between machine instructions and
+ // their swap entries. The key is the address of the MI.
+ DenseMap<MachineInstr*, int> SwapMap;
+
+ // Equivalence classes are used to gather webs of related computation.
+ // Swap entries are represented by their VSEId fields.
+ EquivalenceClasses<int> *EC;
+
+ PPCVSXSwapRemoval() : MachineFunctionPass(ID) {
+ initializePPCVSXSwapRemovalPass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize data structures.
+ void initialize(MachineFunction &MFParm);
+
+ // Walk the machine instructions to gather vector usage information.
+ // Return true iff vector mentions are present.
+ bool gatherVectorInstructions();
+
+ // Add an entry to the swap vector and swap map.
+ int addSwapEntry(MachineInstr *MI, PPCVSXSwapEntry &SwapEntry);
+
+ // Hunt backwards through COPY and SUBREG_TO_REG chains for a
+ // source register. VecIdx indicates the swap vector entry to
+ // mark as mentioning a physical register if the search leads
+ // to one.
+ unsigned lookThruCopyLike(unsigned SrcReg, unsigned VecIdx);
+
+ // Generate equivalence classes for related computations (webs).
+ void formWebs();
+
+ // Analyze webs and determine those that cannot be optimized.
+ void recordUnoptimizableWebs();
+
+ // Record which swap instructions can be safely removed.
+ void markSwapsForRemoval();
+
+ // Remove swaps and update other instructions requiring special
+ // handling. Return true iff any changes are made.
+ bool removeSwaps();
+
+ // Insert a swap instruction from SrcReg to DstReg at the given
+ // InsertPoint.
+ void insertSwap(MachineInstr *MI, MachineBasicBlock::iterator InsertPoint,
+ unsigned DstReg, unsigned SrcReg);
+
+ // Update instructions requiring special handling.
+ void handleSpecialSwappables(int EntryIdx);
+
+ // Dump a description of the entries in the swap vector.
+ void dumpSwapVector();
+
+ // Return true iff the given register is in the given class.
+ bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return RC->hasSubClassEq(MRI->getRegClass(Reg));
+ return RC->contains(Reg);
+ }
+
+ // Return true iff the given register is a full vector register.
+ bool isVecReg(unsigned Reg) {
+ return (isRegInClass(Reg, &PPC::VSRCRegClass) ||
+ isRegInClass(Reg, &PPC::VRRCRegClass));
+ }
+
+ // Return true iff the given register is a partial vector register.
+ bool isScalarVecReg(unsigned Reg) {
+ return (isRegInClass(Reg, &PPC::VSFRCRegClass) ||
+ isRegInClass(Reg, &PPC::VSSRCRegClass));
+ }
+
+ // Return true iff the given register mentions all or part of a
+ // vector register. Also sets Partial to true if the mention
+ // is for just the floating-point register overlap of the register.
+ bool isAnyVecReg(unsigned Reg, bool &Partial) {
+ if (isScalarVecReg(Reg))
+ Partial = true;
+ return isScalarVecReg(Reg) || isVecReg(Reg);
+ }
+
+public:
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // If we don't have VSX on the subtarget, don't do anything.
+ const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
+ if (!STI.hasVSX())
+ return false;
+
+ bool Changed = false;
+ initialize(MF);
+
+ if (gatherVectorInstructions()) {
+ formWebs();
+ recordUnoptimizableWebs();
+ markSwapsForRemoval();
+ Changed = removeSwaps();
+ }
+
+ // FIXME: See the allocation of EC in initialize().
+ delete EC;
+ return Changed;
+ }
+};
+
+// Initialize data structures for this pass. In particular, clear the
+// swap vector and allocate the equivalence class mapping before
+// processing each function.
+void PPCVSXSwapRemoval::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ MRI = &MF->getRegInfo();
+ TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
+
+ // An initial vector size of 256 appears to work well in practice.
+ // Small/medium functions with vector content tend not to incur a
+ // reallocation at this size. Three of the vector tests in
+ // projects/test-suite reallocate, which seems like a reasonable rate.
+ const int InitialVectorSize(256);
+ SwapVector.clear();
+ SwapVector.reserve(InitialVectorSize);
+
+ // FIXME: Currently we allocate EC each time because we don't have
+ // access to the set representation on which to call clear(). Should
+ // consider adding a clear() method to the EquivalenceClasses class.
+ EC = new EquivalenceClasses<int>;
+}
+
+// Create an entry in the swap vector for each instruction that mentions
+// a full vector register, recording various characteristics of the
+// instructions there.
+bool PPCVSXSwapRemoval::gatherVectorInstructions() {
+ bool RelevantFunction = false;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+
+ if (MI.isDebugValue())
+ continue;
+
+ bool RelevantInstr = false;
+ bool Partial = false;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (isAnyVecReg(Reg, Partial)) {
+ RelevantInstr = true;
+ break;
+ }
+ }
+
+ if (!RelevantInstr)
+ continue;
+
+ RelevantFunction = true;
+
+ // Create a SwapEntry initialized to zeros, then fill in the
+ // instruction and ID fields before pushing it to the back
+ // of the swap vector.
+ PPCVSXSwapEntry SwapEntry{};
+ int VecIdx = addSwapEntry(&MI, SwapEntry);
+
+ switch(MI.getOpcode()) {
+ default:
+ // Unless noted otherwise, an instruction is considered
+ // safe for the optimization. There are a large number of
+ // such true-SIMD instructions (all vector math, logical,
+ // select, compare, etc.). However, if the instruction
+ // mentions a partial vector register and does not have
+ // special handling defined, it is not swappable.
+ if (Partial)
+ SwapVector[VecIdx].MentionsPartialVR = 1;
+ else
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
+ case PPC::XXPERMDI: {
+ // This is a swap if it is of the form XXPERMDI t, s, s, 2.
+ // Unfortunately, MachineCSE ignores COPY and SUBREG_TO_REG, so we
+ // can also see XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), 2,
+ // for example. We have to look through chains of COPY and
+ // SUBREG_TO_REG to find the real source value for comparison.
+ // If the real source value is a physical register, then mark the
+ // XXPERMDI as mentioning a physical register.
+ int immed = MI.getOperand(3).getImm();
+ if (immed == 2) {
+ unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
+ VecIdx);
+ unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
+ VecIdx);
+ if (trueReg1 == trueReg2)
+ SwapVector[VecIdx].IsSwap = 1;
+ else {
+ // We can still handle these if the two registers are not
+ // identical, by adjusting the form of the XXPERMDI.
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
+ }
+ // This is a doubleword splat if it is of the form
+ // XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3. As above we
+ // must look through chains of copy-likes to find the source
+ // register. We turn off the marking for mention of a physical
+ // register, because splatting it is safe; the optimization
+ // will not swap the value in the physical register. Whether
+ // or not the two input registers are identical, we can handle
+ // these by adjusting the form of the XXPERMDI.
+ } else if (immed == 0 || immed == 3) {
+
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
+
+ unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
+ VecIdx);
+ unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
+ VecIdx);
+ if (trueReg1 == trueReg2)
+ SwapVector[VecIdx].MentionsPhysVR = 0;
+
+ } else {
+ // We can still handle these by adjusting the form of the XXPERMDI.
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
+ }
+ break;
+ }
+ case PPC::LVX:
+ // Non-permuting loads are currently unsafe. We can use special
+ // handling for this in the future. By not marking these as
+ // IsSwap, we ensure computations containing them will be rejected
+ // for now.
+ SwapVector[VecIdx].IsLoad = 1;
+ break;
+ case PPC::LXVD2X:
+ case PPC::LXVW4X:
+ // Permuting loads are marked as both load and swap, and are
+ // safe for optimization.
+ SwapVector[VecIdx].IsLoad = 1;
+ SwapVector[VecIdx].IsSwap = 1;
+ break;
+ case PPC::LXSDX:
+ case PPC::LXSSPX:
+ // A load of a floating-point value into the high-order half of
+ // a vector register is safe, provided that we introduce a swap
+ // following the load, which will be done by the SUBREG_TO_REG
+ // support. So just mark these as safe.
+ SwapVector[VecIdx].IsLoad = 1;
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
+ case PPC::STVX:
+ // Non-permuting stores are currently unsafe. We can use special
+ // handling for this in the future. By not marking these as
+ // IsSwap, we ensure computations containing them will be rejected
+ // for now.
+ SwapVector[VecIdx].IsStore = 1;
+ break;
+ case PPC::STXVD2X:
+ case PPC::STXVW4X:
+ // Permuting stores are marked as both store and swap, and are
+ // safe for optimization.
+ SwapVector[VecIdx].IsStore = 1;
+ SwapVector[VecIdx].IsSwap = 1;
+ break;
+ case PPC::COPY:
+ // These are fine provided they are moving between full vector
+ // register classes.
+ if (isVecReg(MI.getOperand(0).getReg()) &&
+ isVecReg(MI.getOperand(1).getReg()))
+ SwapVector[VecIdx].IsSwappable = 1;
+ // If we have a copy from one scalar floating-point register
+ // to another, we can accept this even if it is a physical
+ // register. The only way this gets involved is if it feeds
+ // a SUBREG_TO_REG, which is handled by introducing a swap.
+ else if (isScalarVecReg(MI.getOperand(0).getReg()) &&
+ isScalarVecReg(MI.getOperand(1).getReg()))
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
+ case PPC::SUBREG_TO_REG: {
+ // These are fine provided they are moving between full vector
+ // register classes. If they are moving from a scalar
+ // floating-point class to a vector class, we can handle those
+ // as well, provided we introduce a swap. It is generally the
+ // case that we will introduce fewer swaps than we remove, but
+ // (FIXME) a cost model could be used. However, introduced
+ // swaps could potentially be CSEd, so this is not trivial.
+ if (isVecReg(MI.getOperand(0).getReg()) &&
+ isVecReg(MI.getOperand(2).getReg()))
+ SwapVector[VecIdx].IsSwappable = 1;
+ else if (isVecReg(MI.getOperand(0).getReg()) &&
+ isScalarVecReg(MI.getOperand(2).getReg())) {
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
+ }
+ break;
+ }
+ case PPC::VSPLTB:
+ case PPC::VSPLTH:
+ case PPC::VSPLTW:
+ // Splats are lane-sensitive, but we can use special handling
+ // to adjust the source lane for the splat. This is not yet
+ // implemented. When it is, we need to uncomment the following:
+ SwapVector[VecIdx].IsSwappable = 1;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT;
+ break;
+ // The presence of the following lane-sensitive operations in a
+ // web will kill the optimization, at least for now. For these
+ // we do nothing, causing the optimization to fail.
+ // FIXME: Some of these could be permitted with special handling,
+ // and will be phased in as time permits.
+ // FIXME: There is no simple and maintainable way to express a set
+ // of opcodes having a common attribute in TableGen. Should this
+ // change, this is a prime candidate to use such a mechanism.
+ case PPC::INLINEASM:
+ case PPC::EXTRACT_SUBREG:
+ case PPC::INSERT_SUBREG:
+ case PPC::COPY_TO_REGCLASS:
+ case PPC::LVEBX:
+ case PPC::LVEHX:
+ case PPC::LVEWX:
+ case PPC::LVSL:
+ case PPC::LVSR:
+ case PPC::LVXL:
+ case PPC::STVEBX:
+ case PPC::STVEHX:
+ case PPC::STVEWX:
+ case PPC::STVXL:
+ // We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX,
+ // by adding special handling for narrowing copies as well as
+ // widening ones. However, I've experimented with this, and in
+ // practice we currently do not appear to use STXSDX fed by
+ // a narrowing copy from a full vector register. Since I can't
+ // generate any useful test cases, I've left this alone for now.
+ case PPC::STXSDX:
+ case PPC::STXSSPX:
+ case PPC::VCIPHER:
+ case PPC::VCIPHERLAST:
+ case PPC::VMRGHB:
+ case PPC::VMRGHH:
+ case PPC::VMRGHW:
+ case PPC::VMRGLB:
+ case PPC::VMRGLH:
+ case PPC::VMRGLW:
+ case PPC::VMULESB:
+ case PPC::VMULESH:
+ case PPC::VMULESW:
+ case PPC::VMULEUB:
+ case PPC::VMULEUH:
+ case PPC::VMULEUW:
+ case PPC::VMULOSB:
+ case PPC::VMULOSH:
+ case PPC::VMULOSW:
+ case PPC::VMULOUB:
+ case PPC::VMULOUH:
+ case PPC::VMULOUW:
+ case PPC::VNCIPHER:
+ case PPC::VNCIPHERLAST:
+ case PPC::VPERM:
+ case PPC::VPERMXOR:
+ case PPC::VPKPX:
+ case PPC::VPKSHSS:
+ case PPC::VPKSHUS:
+ case PPC::VPKSDSS:
+ case PPC::VPKSDUS:
+ case PPC::VPKSWSS:
+ case PPC::VPKSWUS:
+ case PPC::VPKUDUM:
+ case PPC::VPKUDUS:
+ case PPC::VPKUHUM:
+ case PPC::VPKUHUS:
+ case PPC::VPKUWUM:
+ case PPC::VPKUWUS:
+ case PPC::VPMSUMB:
+ case PPC::VPMSUMD:
+ case PPC::VPMSUMH:
+ case PPC::VPMSUMW:
+ case PPC::VRLB:
+ case PPC::VRLD:
+ case PPC::VRLH:
+ case PPC::VRLW:
+ case PPC::VSBOX:
+ case PPC::VSHASIGMAD:
+ case PPC::VSHASIGMAW:
+ case PPC::VSL:
+ case PPC::VSLDOI:
+ case PPC::VSLO:
+ case PPC::VSR:
+ case PPC::VSRO:
+ case PPC::VSUM2SWS:
+ case PPC::VSUM4SBS:
+ case PPC::VSUM4SHS:
+ case PPC::VSUM4UBS:
+ case PPC::VSUMSWS:
+ case PPC::VUPKHPX:
+ case PPC::VUPKHSB:
+ case PPC::VUPKHSH:
+ case PPC::VUPKHSW:
+ case PPC::VUPKLPX:
+ case PPC::VUPKLSB:
+ case PPC::VUPKLSH:
+ case PPC::VUPKLSW:
+ case PPC::XXMRGHW:
+ case PPC::XXMRGLW:
+ // XXSLDWI could be replaced by a general permute with one of three
+ // permute control vectors (for shift values 1, 2, 3). However,
+ // VPERM has a more restrictive register class.
+ case PPC::XXSLDWI:
+ case PPC::XXSPLTW:
+ break;
+ }
+ }
+ }
+
+ if (RelevantFunction) {
+ DEBUG(dbgs() << "Swap vector when first built\n\n");
+ dumpSwapVector();
+ }
+
+ return RelevantFunction;
+}
+
+// Add an entry to the swap vector and swap map, and make a
+// singleton equivalence class for the entry.
+int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *MI,
+ PPCVSXSwapEntry& SwapEntry) {
+ SwapEntry.VSEMI = MI;
+ SwapEntry.VSEId = SwapVector.size();
+ SwapVector.push_back(SwapEntry);
+ EC->insert(SwapEntry.VSEId);
+ SwapMap[MI] = SwapEntry.VSEId;
+ return SwapEntry.VSEId;
+}
+
+// This is used to find the "true" source register for an
+// XXPERMDI instruction, since MachineCSE does not handle the
+// "copy-like" operations (Copy and SubregToReg). Returns
+// the original SrcReg unless it is the target of a copy-like
+// operation, in which case we chain backwards through all
+// such operations to the ultimate source register. If a
+// physical register is encountered, we stop the search and
+// flag the swap entry indicated by VecIdx (the original
+// XXPERMDI) as mentioning a physical register.
+unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
+ unsigned VecIdx) {
+ MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ if (!MI->isCopyLike())
+ return SrcReg;
+
+ unsigned CopySrcReg;
+ if (MI->isCopy())
+ CopySrcReg = MI->getOperand(1).getReg();
+ else {
+ assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ }
+
+ if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
+ if (!isScalarVecReg(CopySrcReg))
+ SwapVector[VecIdx].MentionsPhysVR = 1;
+ return CopySrcReg;
+ }
+
+ return lookThruCopyLike(CopySrcReg, VecIdx);
+}
+
+// Generate equivalence classes for related computations (webs) by
+// def-use relationships of virtual registers. Mention of a physical
+// register terminates the generation of equivalence classes as this
+// indicates a use of a parameter, definition of a return value, use
+// of a value returned from a call, or definition of a parameter to a
+// call. Computations with physical register mentions are flagged
+// as such so their containing webs will not be optimized.
+void PPCVSXSwapRemoval::formWebs() {
+
+ DEBUG(dbgs() << "\n*** Forming webs for swap removal ***\n\n");
+
+ for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+ DEBUG(dbgs() << "\n" << SwapVector[EntryIdx].VSEId << " ");
+ DEBUG(MI->dump());
+
+ // It's sufficient to walk vector uses and join them to their unique
+ // definitions. In addition, check full vector register operands
+ // for physical regs. We exclude partial-vector register operands
+ // because we can handle them if copied to a full vector.
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (!isVecReg(Reg) && !isScalarVecReg(Reg))
+ continue;
+
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!(MI->isCopy() && isScalarVecReg(Reg)))
+ SwapVector[EntryIdx].MentionsPhysVR = 1;
+ continue;
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ MachineInstr* DefMI = MRI->getVRegDef(Reg);
+ assert(SwapMap.find(DefMI) != SwapMap.end() &&
+ "Inconsistency: def of vector reg not found in swap map!");
+ int DefIdx = SwapMap[DefMI];
+ (void)EC->unionSets(SwapVector[DefIdx].VSEId,
+ SwapVector[EntryIdx].VSEId);
+
+ DEBUG(dbgs() << format("Unioning %d with %d\n", SwapVector[DefIdx].VSEId,
+ SwapVector[EntryIdx].VSEId));
+ DEBUG(dbgs() << " Def: ");
+ DEBUG(DefMI->dump());
+ }
+ }
+}
+
+// Walk the swap vector entries looking for conditions that prevent their
+// containing computations from being optimized. When such conditions are
+// found, mark the representative of the computation's equivalence class
+// as rejected.
+void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
+
+ DEBUG(dbgs() << "\n*** Rejecting webs for swap removal ***\n\n");
+
+ for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+ int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+ // If representative is already rejected, don't waste further time.
+ if (SwapVector[Repr].WebRejected)
+ continue;
+
+ // Reject webs containing mentions of physical or partial registers, or
+ // containing operations that we don't know how to handle in a lane-
+ // permuted region.
+ if (SwapVector[EntryIdx].MentionsPhysVR ||
+ SwapVector[EntryIdx].MentionsPartialVR ||
+ !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
+
+ SwapVector[Repr].WebRejected = 1;
+
+ DEBUG(dbgs() <<
+ format("Web %d rejected for physreg, partial reg, or not "
+ "swap[pable]\n", Repr));
+ DEBUG(dbgs() << " in " << EntryIdx << ": ");
+ DEBUG(SwapVector[EntryIdx].VSEMI->dump());
+ DEBUG(dbgs() << "\n");
+ }
+
+ // Reject webs than contain swapping loads that feed something other
+ // than a swap instruction.
+ else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ unsigned DefReg = MI->getOperand(0).getReg();
+
+ // We skip debug instructions in the analysis. (Note that debug
+ // location information is still maintained by this optimization
+ // because it remains on the LXVD2X and STXVD2X instructions after
+ // the XXPERMDIs are removed.)
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
+ int UseIdx = SwapMap[&UseMI];
+
+ if (!SwapVector[UseIdx].IsSwap || SwapVector[UseIdx].IsLoad ||
+ SwapVector[UseIdx].IsStore) {
+
+ SwapVector[Repr].WebRejected = 1;
+
+ DEBUG(dbgs() <<
+ format("Web %d rejected for load not feeding swap\n", Repr));
+ DEBUG(dbgs() << " def " << EntryIdx << ": ");
+ DEBUG(MI->dump());
+ DEBUG(dbgs() << " use " << UseIdx << ": ");
+ DEBUG(UseMI.dump());
+ DEBUG(dbgs() << "\n");
+ }
+ }
+
+ // Reject webs that contain swapping stores that are fed by something
+ // other than a swap instruction.
+ } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ unsigned UseReg = MI->getOperand(0).getReg();
+ MachineInstr *DefMI = MRI->getVRegDef(UseReg);
+ int DefIdx = SwapMap[DefMI];
+
+ if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
+ SwapVector[DefIdx].IsStore) {
+
+ SwapVector[Repr].WebRejected = 1;
+
+ DEBUG(dbgs() <<
+ format("Web %d rejected for store not fed by swap\n", Repr));
+ DEBUG(dbgs() << " def " << DefIdx << ": ");
+ DEBUG(DefMI->dump());
+ DEBUG(dbgs() << " use " << EntryIdx << ": ");
+ DEBUG(MI->dump());
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "Swap vector after web analysis:\n\n");
+ dumpSwapVector();
+}
+
+// Walk the swap vector entries looking for swaps fed by permuting loads
+// and swaps that feed permuting stores. If the containing computation
+// has not been marked rejected, mark each such swap for removal.
+// (Removal is delayed in case optimization has disturbed the pattern,
+// such that multiple loads feed the same swap, etc.)
+void PPCVSXSwapRemoval::markSwapsForRemoval() {
+
+ DEBUG(dbgs() << "\n*** Marking swaps for removal ***\n\n");
+
+ for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+
+ if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
+ int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+ if (!SwapVector[Repr].WebRejected) {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ unsigned DefReg = MI->getOperand(0).getReg();
+
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
+ int UseIdx = SwapMap[&UseMI];
+ SwapVector[UseIdx].WillRemove = 1;
+
+ DEBUG(dbgs() << "Marking swap fed by load for removal: ");
+ DEBUG(UseMI.dump());
+ }
+ }
+
+ } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
+ int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+ if (!SwapVector[Repr].WebRejected) {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ unsigned UseReg = MI->getOperand(0).getReg();
+ MachineInstr *DefMI = MRI->getVRegDef(UseReg);
+ int DefIdx = SwapMap[DefMI];
+ SwapVector[DefIdx].WillRemove = 1;
+
+ DEBUG(dbgs() << "Marking swap feeding store for removal: ");
+ DEBUG(DefMI->dump());
+ }
+
+ } else if (SwapVector[EntryIdx].IsSwappable &&
+ SwapVector[EntryIdx].SpecialHandling != 0) {
+ int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+ if (!SwapVector[Repr].WebRejected)
+ handleSpecialSwappables(EntryIdx);
+ }
+ }
+}
+
+// Create an xxswapd instruction and insert it prior to the given point.
+// MI is used to determine basic block and debug loc information.
+// FIXME: When inserting a swap, we should check whether SrcReg is
+// defined by another swap: SrcReg = XXPERMDI Reg, Reg, 2; If so,
+// then instead we should generate a copy from Reg to DstReg.
+void PPCVSXSwapRemoval::insertSwap(MachineInstr *MI,
+ MachineBasicBlock::iterator InsertPoint,
+ unsigned DstReg, unsigned SrcReg) {
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::XXPERMDI), DstReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg)
+ .addImm(2);
+}
+
+// The identified swap entry requires special handling to allow its
+// containing computation to be optimized. Perform that handling
+// here.
+// FIXME: Additional opportunities will be phased in with subsequent
+// patches.
+void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
+ switch (SwapVector[EntryIdx].SpecialHandling) {
+
+ default:
+ llvm_unreachable("Unexpected special handling type");
+
+ // For splats based on an index into a vector, add N/2 modulo N
+ // to the index, where N is the number of vector elements.
+ case SHValues::SH_SPLAT: {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ unsigned NElts;
+
+ DEBUG(dbgs() << "Changing splat: ");
+ DEBUG(MI->dump());
+
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected splat opcode");
+ case PPC::VSPLTB: NElts = 16; break;
+ case PPC::VSPLTH: NElts = 8; break;
+ case PPC::VSPLTW: NElts = 4; break;
+ }
+
+ unsigned EltNo = MI->getOperand(1).getImm();
+ EltNo = (EltNo + NElts / 2) % NElts;
+ MI->getOperand(1).setImm(EltNo);
+
+ DEBUG(dbgs() << " Into: ");
+ DEBUG(MI->dump());
+ break;
+ }
+
+ // For an XXPERMDI that isn't handled otherwise, we need to
+ // reverse the order of the operands. If the selector operand
+ // has a value of 0 or 3, we need to change it to 3 or 0,
+ // respectively. Otherwise we should leave it alone. (This
+ // is equivalent to reversing the two bits of the selector
+ // operand and complementing the result.)
+ case SHValues::SH_XXPERMDI: {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+ DEBUG(dbgs() << "Changing XXPERMDI: ");
+ DEBUG(MI->dump());
+
+ unsigned Selector = MI->getOperand(3).getImm();
+ if (Selector == 0 || Selector == 3)
+ Selector = 3 - Selector;
+ MI->getOperand(3).setImm(Selector);
+
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setReg(Reg1);
+
+ DEBUG(dbgs() << " Into: ");
+ DEBUG(MI->dump());
+ break;
+ }
+
+ // For a copy from a scalar floating-point register to a vector
+ // register, removing swaps will leave the copied value in the
+ // wrong lane. Insert a swap following the copy to fix this.
+ case SHValues::SH_COPYWIDEN: {
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+ DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
+ DEBUG(MI->dump());
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+
+ MI->getOperand(0).setReg(NewVReg);
+ DEBUG(dbgs() << " Into: ");
+ DEBUG(MI->dump());
+
+ auto InsertPoint = ++MachineBasicBlock::iterator(MI);
+
+ // Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG
+ // is copying to a VRRC, we need to be careful to avoid a register
+ // assignment problem. In this case we must copy from VRRC to VSRC
+ // prior to the swap, and from VSRC to VRRC following the swap.
+ // Coalescing will usually remove all this mess.
+ if (DstRC == &PPC::VRRCRegClass) {
+ unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+ unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::COPY), VSRCTmp1)
+ .addReg(NewVReg);
+ DEBUG(std::prev(InsertPoint)->dump());
+
+ insertSwap(MI, InsertPoint, VSRCTmp2, VSRCTmp1);
+ DEBUG(std::prev(InsertPoint)->dump());
+
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::COPY), DstReg)
+ .addReg(VSRCTmp2);
+ DEBUG(std::prev(InsertPoint)->dump());
+
+ } else {
+ insertSwap(MI, InsertPoint, DstReg, NewVReg);
+ DEBUG(std::prev(InsertPoint)->dump());
+ }
+ break;
+ }
+ }
+}
+
+// Walk the swap vector and replace each entry marked for removal with
+// a copy operation.
+bool PPCVSXSwapRemoval::removeSwaps() {
+
+ DEBUG(dbgs() << "\n*** Removing swaps ***\n\n");
+
+ bool Changed = false;
+
+ for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+ if (SwapVector[EntryIdx].WillRemove) {
+ Changed = true;
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ MachineBasicBlock *MBB = MI->getParent();
+ BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
+ .addOperand(MI->getOperand(1));
+
+ DEBUG(dbgs() << format("Replaced %d with copy: ",
+ SwapVector[EntryIdx].VSEId));
+ DEBUG(MI->dump());
+
+ MI->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+// For debug purposes, dump the contents of the swap vector.
+void PPCVSXSwapRemoval::dumpSwapVector() {
+
+ for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
+
+ MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+ int ID = SwapVector[EntryIdx].VSEId;
+
+ DEBUG(dbgs() << format("%6d", ID));
+ DEBUG(dbgs() << format("%6d", EC->getLeaderValue(ID)));
+ DEBUG(dbgs() << format(" BB#%3d", MI->getParent()->getNumber()));
+ DEBUG(dbgs() << format(" %14s ", TII->getName(MI->getOpcode())));
+
+ if (SwapVector[EntryIdx].IsLoad)
+ DEBUG(dbgs() << "load ");
+ if (SwapVector[EntryIdx].IsStore)
+ DEBUG(dbgs() << "store ");
+ if (SwapVector[EntryIdx].IsSwap)
+ DEBUG(dbgs() << "swap ");
+ if (SwapVector[EntryIdx].MentionsPhysVR)
+ DEBUG(dbgs() << "physreg ");
+ if (SwapVector[EntryIdx].MentionsPartialVR)
+ DEBUG(dbgs() << "partialreg ");
+
+ if (SwapVector[EntryIdx].IsSwappable) {
+ DEBUG(dbgs() << "swappable ");
+ switch(SwapVector[EntryIdx].SpecialHandling) {
+ default:
+ DEBUG(dbgs() << "special:**unknown**");
+ break;
+ case SH_NONE:
+ break;
+ case SH_EXTRACT:
+ DEBUG(dbgs() << "special:extract ");
+ break;
+ case SH_INSERT:
+ DEBUG(dbgs() << "special:insert ");
+ break;
+ case SH_NOSWAP_LD:
+ DEBUG(dbgs() << "special:load ");
+ break;
+ case SH_NOSWAP_ST:
+ DEBUG(dbgs() << "special:store ");
+ break;
+ case SH_SPLAT:
+ DEBUG(dbgs() << "special:splat ");
+ break;
+ case SH_XXPERMDI:
+ DEBUG(dbgs() << "special:xxpermdi ");
+ break;
+ case SH_COPYWIDEN:
+ DEBUG(dbgs() << "special:copywiden ");
+ break;
+ }
+ }
+
+ if (SwapVector[EntryIdx].WebRejected)
+ DEBUG(dbgs() << "rejected ");
+ if (SwapVector[EntryIdx].WillRemove)
+ DEBUG(dbgs() << "remove ");
+
+ DEBUG(dbgs() << "\n");
+
+ // For no-asserts builds.
+ (void)MI;
+ (void)ID;
+ }
+
+ DEBUG(dbgs() << "\n");
+}
+
+} // end default namespace
+
+INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE,
+ "PowerPC VSX Swap Removal", false, false)
+INITIALIZE_PASS_END(PPCVSXSwapRemoval, DEBUG_TYPE,
+ "PowerPC VSX Swap Removal", false, false)
+
+char PPCVSXSwapRemoval::ID = 0;
+FunctionPass*
+llvm::createPPCVSXSwapRemovalPass() { return new PPCVSXSwapRemoval(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
new file mode 100644
index 0000000..5b2fe19
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::ThePPC32Target, llvm::ThePPC64Target, llvm::ThePPC64LETarget;
+
+extern "C" void LLVMInitializePowerPCTargetInfo() {
+ RegisterTarget<Triple::ppc, /*HasJIT=*/true>
+ X(ThePPC32Target, "ppc32", "PowerPC 32");
+
+ RegisterTarget<Triple::ppc64, /*HasJIT=*/true>
+ Y(ThePPC64Target, "ppc64", "PowerPC 64");
+
+ RegisterTarget<Triple::ppc64le, /*HasJIT=*/true>
+ Z(ThePPC64LETarget, "ppc64le", "PowerPC 64 LE");
+}
OpenPOWER on IntegriCloud